OSDN Git Service

Merge branch 'x86-cpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 22 Jun 2015 23:43:01 +0000 (16:43 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 22 Jun 2015 23:43:01 +0000 (16:43 -0700)
Pull x86 CPU features from Ingo Molnar:
 "Various CPU feature support related changes: in particular the
  /proc/cpuinfo model name sanitization change should be monitored, it
  has a chance to break stuff.  (but really shouldn't and there are no
  regression reports)"

* 'x86-cpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/cpu/amd: Give access to the number of nodes in a physical package
  x86/cpu: Trim model ID whitespace
  x86/cpu: Strip any /proc/cpuinfo model name field whitespace
  x86/cpu/amd: Set X86_FEATURE_EXTD_APICID for future processors
  x86/gart: Check for GART support before accessing GART registers

1554 files changed:
CREDITS
Documentation/ABI/testing/sysfs-devices-system-cpu
Documentation/RCU/arrayRCU.txt
Documentation/RCU/lockdep.txt
Documentation/RCU/rcu_dereference.txt
Documentation/RCU/whatisRCU.txt
Documentation/acpi/enumeration.txt
Documentation/acpi/gpio-properties.txt
Documentation/cputopology.txt
Documentation/devicetree/bindings/arm/omap/l3-noc.txt
Documentation/devicetree/bindings/clock/at91-clock.txt
Documentation/devicetree/bindings/clock/silabs,si5351.txt
Documentation/devicetree/bindings/dma/fsl-mxs-dma.txt
Documentation/devicetree/bindings/input/touchscreen/tsc2005.txt
Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt [moved from Documentation/devicetree/bindings/mtd/m25p80.txt with 85% similarity]
Documentation/devicetree/bindings/net/cdns-emac.txt
Documentation/devicetree/bindings/usb/renesas_usbhs.txt
Documentation/filesystems/Locking
Documentation/filesystems/automount-support.txt
Documentation/filesystems/porting
Documentation/filesystems/vfs.txt
Documentation/hwmon/tmp401
Documentation/i2c/slave-interface
Documentation/kernel-parameters.txt
Documentation/memory-barriers.txt
Documentation/networking/udplite.txt
Documentation/scheduler/sched-deadline.txt
Documentation/serial/tty.txt
Documentation/target/tcmu-design.txt
Documentation/virtual/kvm/mmu.txt
MAINTAINERS
Makefile
arch/alpha/boot/Makefile
arch/alpha/boot/main.c
arch/alpha/boot/stdio.c [new file with mode: 0644]
arch/alpha/boot/tools/objstrip.c
arch/alpha/include/asm/cmpxchg.h
arch/alpha/include/asm/types.h
arch/alpha/include/asm/unistd.h
arch/alpha/include/uapi/asm/unistd.h
arch/alpha/kernel/err_ev6.c
arch/alpha/kernel/irq.c
arch/alpha/kernel/osf_sys.c
arch/alpha/kernel/process.c
arch/alpha/kernel/smp.c
arch/alpha/kernel/srmcons.c
arch/alpha/kernel/sys_marvel.c
arch/alpha/kernel/systbls.S
arch/alpha/kernel/traps.c
arch/alpha/mm/fault.c
arch/alpha/oprofile/op_model_ev4.c
arch/alpha/oprofile/op_model_ev5.c
arch/alpha/oprofile/op_model_ev6.c
arch/alpha/oprofile/op_model_ev67.c
arch/arc/Kconfig.debug
arch/arc/include/asm/atomic.h
arch/arc/include/asm/futex.h
arch/arc/mm/cache_arc700.c
arch/arc/mm/fault.c
arch/arm/boot/dts/Makefile
arch/arm/boot/dts/am335x-bone-common.dtsi
arch/arm/boot/dts/am335x-boneblack.dts
arch/arm/boot/dts/am335x-evmsk.dts
arch/arm/boot/dts/am35xx-clocks.dtsi
arch/arm/boot/dts/am437x-sk-evm.dts
arch/arm/boot/dts/am57xx-beagle-x15.dts
arch/arm/boot/dts/armada-375.dtsi
arch/arm/boot/dts/armada-38x.dtsi
arch/arm/boot/dts/armada-39x.dtsi
arch/arm/boot/dts/armada-xp-linksys-mamba.dts
arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts
arch/arm/boot/dts/dm816x.dtsi
arch/arm/boot/dts/dove-cubox.dts
arch/arm/boot/dts/dra7.dtsi
arch/arm/boot/dts/exynos4412-odroid-common.dtsi
arch/arm/boot/dts/exynos4412-trats2.dts
arch/arm/boot/dts/exynos5250-snow.dts
arch/arm/boot/dts/exynos5420-peach-pit.dts
arch/arm/boot/dts/exynos5420-trip-points.dtsi
arch/arm/boot/dts/exynos5420.dtsi
arch/arm/boot/dts/exynos5440-trip-points.dtsi
arch/arm/boot/dts/exynos5800-peach-pi.dts
arch/arm/boot/dts/imx23-olinuxino.dts
arch/arm/boot/dts/imx25.dtsi
arch/arm/boot/dts/imx27.dtsi
arch/arm/boot/dts/imx28.dtsi
arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi
arch/arm/boot/dts/imx6qdl-sabreauto.dtsi
arch/arm/boot/dts/omap3-devkit8000.dts
arch/arm/boot/dts/omap3-n900.dts
arch/arm/boot/dts/omap3.dtsi
arch/arm/boot/dts/omap5.dtsi
arch/arm/boot/dts/r8a7791-koelsch.dts
arch/arm/boot/dts/ste-dbx5x0.dtsi
arch/arm/boot/dts/ste-href.dtsi
arch/arm/boot/dts/ste-snowball.dts
arch/arm/boot/dts/tegra124.dtsi
arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
arch/arm/boot/dts/vexpress-v2p-ca9.dts
arch/arm/boot/dts/zynq-7000.dtsi
arch/arm/configs/multi_v7_defconfig
arch/arm/configs/omap2plus_defconfig
arch/arm/include/asm/barrier.h
arch/arm/include/asm/dma-iommu.h
arch/arm/include/asm/futex.h
arch/arm/include/asm/topology.h
arch/arm/include/asm/xen/page.h
arch/arm/kernel/entry-common.S
arch/arm/kernel/perf_event_cpu.c
arch/arm/mach-exynos/common.h
arch/arm/mach-exynos/exynos.c
arch/arm/mach-exynos/platsmp.c
arch/arm/mach-exynos/pm_domains.c
arch/arm/mach-exynos/suspend.c
arch/arm/mach-gemini/common.h
arch/arm/mach-gemini/reset.c
arch/arm/mach-imx/devices/platform-sdhci-esdhc-imx.c
arch/arm/mach-imx/gpc.c
arch/arm/mach-omap2/omap_hwmod.c
arch/arm/mach-omap2/omap_hwmod_43xx_data.c
arch/arm/mach-omap2/prcm43xx.h
arch/arm/mach-omap2/prm-regbits-34xx.h
arch/arm/mach-omap2/prm-regbits-44xx.h
arch/arm/mach-omap2/prminst44xx.c
arch/arm/mach-omap2/sleep34xx.S
arch/arm/mach-omap2/timer.c
arch/arm/mach-omap2/vc.c
arch/arm/mach-omap2/vc.h
arch/arm/mach-omap2/vc3xxx_data.c
arch/arm/mach-omap2/vc44xx_data.c
arch/arm/mach-pxa/Kconfig
arch/arm/mach-pxa/Makefile
arch/arm/mach-pxa/include/mach/lubbock.h
arch/arm/mach-pxa/include/mach/mainstone.h
arch/arm/mach-pxa/lubbock.c
arch/arm/mach-pxa/mainstone.c
arch/arm/mach-pxa/pxa_cplds_irqs.c [new file with mode: 0644]
arch/arm/mach-rockchip/pm.c
arch/arm/mach-rockchip/pm.h
arch/arm/mach-rockchip/rockchip.c
arch/arm/mm/dma-mapping.c
arch/arm/mm/fault.c
arch/arm/mm/highmem.c
arch/arm/mm/mmu.c
arch/arm/mm/proc-arm1020.S
arch/arm/mm/proc-arm1020e.S
arch/arm/mm/proc-arm925.S
arch/arm/mm/proc-feroceon.S
arch/arm/net/bpf_jit_32.c
arch/arm/xen/enlighten.c
arch/arm/xen/mm.c
arch/arm64/boot/dts/arm/juno-motherboard.dtsi
arch/arm64/boot/dts/mediatek/mt8173-evb.dts
arch/arm64/crypto/crc32-arm64.c
arch/arm64/crypto/sha1-ce-glue.c
arch/arm64/crypto/sha2-ce-glue.c
arch/arm64/include/asm/barrier.h
arch/arm64/include/asm/futex.h
arch/arm64/include/asm/topology.h
arch/arm64/kernel/alternative.c
arch/arm64/kernel/perf_event.c
arch/arm64/mm/dump.c
arch/arm64/mm/fault.c
arch/arm64/net/bpf_jit_comp.c
arch/avr32/include/asm/cmpxchg.h
arch/avr32/include/asm/uaccess.h
arch/avr32/mm/fault.c
arch/blackfin/include/asm/io.h
arch/cris/mm/fault.c
arch/frv/mm/fault.c
arch/frv/mm/highmem.c
arch/hexagon/include/asm/cmpxchg.h
arch/hexagon/include/asm/uaccess.h
arch/ia64/include/asm/barrier.h
arch/ia64/include/asm/topology.h
arch/ia64/include/uapi/asm/cmpxchg.h
arch/ia64/kernel/smpboot.c
arch/ia64/mm/fault.c
arch/ia64/pci/pci.c
arch/m32r/include/asm/cmpxchg.h
arch/m32r/include/asm/uaccess.h
arch/m32r/kernel/smp.c
arch/m32r/mm/fault.c
arch/m68k/include/asm/cmpxchg.h
arch/m68k/include/asm/irqflags.h
arch/m68k/mm/fault.c
arch/metag/include/asm/barrier.h
arch/metag/include/asm/cmpxchg.h
arch/metag/mm/fault.c
arch/metag/mm/highmem.c
arch/microblaze/include/asm/uaccess.h
arch/microblaze/mm/fault.c
arch/microblaze/mm/highmem.c
arch/mips/Makefile
arch/mips/ath79/prom.c
arch/mips/ath79/setup.c
arch/mips/cobalt/Makefile
arch/mips/configs/fuloong2e_defconfig
arch/mips/include/asm/barrier.h
arch/mips/include/asm/cmpxchg.h
arch/mips/include/asm/elf.h
arch/mips/include/asm/pgtable-bits.h
arch/mips/include/asm/smp.h
arch/mips/include/asm/switch_to.h
arch/mips/include/asm/topology.h
arch/mips/include/asm/uaccess.h
arch/mips/kernel/cpu-probe.c
arch/mips/kernel/elf.c
arch/mips/kernel/irq.c
arch/mips/kernel/ptrace.c
arch/mips/kernel/signal-common.h
arch/mips/kernel/smp-bmips.c
arch/mips/kernel/smp-cps.c
arch/mips/kernel/smp.c
arch/mips/kernel/traps.c
arch/mips/kvm/emulate.c
arch/mips/lib/strnlen_user.S
arch/mips/loongson/common/Makefile
arch/mips/loongson/loongson-3/smp.c
arch/mips/math-emu/cp1emu.c
arch/mips/mm/c-r4k.c
arch/mips/mm/fault.c
arch/mips/mm/highmem.c
arch/mips/mm/init.c
arch/mips/mm/tlb-r4k.c
arch/mips/net/bpf_jit.c
arch/mips/ralink/ill_acc.c
arch/mips/sgi-ip32/ip32-platform.c
arch/mn10300/include/asm/highmem.h
arch/mn10300/mm/fault.c
arch/nios2/mm/fault.c
arch/parisc/include/asm/cacheflush.h
arch/parisc/include/asm/cmpxchg.h
arch/parisc/include/asm/elf.h
arch/parisc/kernel/process.c
arch/parisc/kernel/sys_parisc.c
arch/parisc/kernel/traps.c
arch/parisc/mm/fault.c
arch/powerpc/include/asm/barrier.h
arch/powerpc/include/asm/cmpxchg.h
arch/powerpc/include/asm/topology.h
arch/powerpc/kernel/mce.c
arch/powerpc/kernel/vmlinux.lds.S
arch/powerpc/kvm/book3s_hv.c
arch/powerpc/lib/vmx-helper.c
arch/powerpc/mm/fault.c
arch/powerpc/mm/highmem.c
arch/powerpc/mm/hugetlbpage.c
arch/powerpc/mm/pgtable_64.c
arch/powerpc/mm/tlb_nohash.c
arch/s390/crypto/ghash_s390.c
arch/s390/crypto/prng.c
arch/s390/include/asm/barrier.h
arch/s390/include/asm/cmpxchg.h
arch/s390/include/asm/pgtable.h
arch/s390/include/asm/topology.h
arch/s390/include/asm/uaccess.h
arch/s390/mm/fault.c
arch/s390/net/bpf_jit.h
arch/s390/net/bpf_jit_comp.c
arch/score/include/asm/cmpxchg.h
arch/score/include/asm/uaccess.h
arch/score/lib/string.S
arch/score/mm/fault.c
arch/sh/include/asm/barrier.h
arch/sh/include/asm/cmpxchg.h
arch/sh/mm/fault.c
arch/sparc/include/asm/barrier_64.h
arch/sparc/include/asm/cmpxchg_32.h
arch/sparc/include/asm/cmpxchg_64.h
arch/sparc/include/asm/cpudata_64.h
arch/sparc/include/asm/pgtable_64.h
arch/sparc/include/asm/topology_64.h
arch/sparc/include/asm/trap_block.h
arch/sparc/kernel/entry.h
arch/sparc/kernel/leon_pci_grpci2.c
arch/sparc/kernel/mdesc.c
arch/sparc/kernel/pci.c
arch/sparc/kernel/setup_64.c
arch/sparc/kernel/smp_64.c
arch/sparc/kernel/vmlinux.lds.S
arch/sparc/mm/fault_32.c
arch/sparc/mm/fault_64.c
arch/sparc/mm/highmem.c
arch/sparc/mm/init_64.c
arch/tile/include/asm/atomic_64.h
arch/tile/include/asm/topology.h
arch/tile/include/asm/uaccess.h
arch/tile/mm/fault.c
arch/tile/mm/highmem.c
arch/um/kernel/trap.c
arch/unicore32/mm/fault.c
arch/x86/Kconfig
arch/x86/boot/compressed/eboot.c
arch/x86/boot/compressed/misc.h
arch/x86/include/asm/barrier.h
arch/x86/include/asm/cmpxchg.h
arch/x86/include/asm/dma-mapping.h
arch/x86/include/asm/hypervisor.h
arch/x86/include/asm/kvm_host.h
arch/x86/include/asm/paravirt.h
arch/x86/include/asm/paravirt_types.h
arch/x86/include/asm/preempt.h
arch/x86/include/asm/ptrace.h
arch/x86/include/asm/qspinlock.h [new file with mode: 0644]
arch/x86/include/asm/qspinlock_paravirt.h [new file with mode: 0644]
arch/x86/include/asm/segment.h
arch/x86/include/asm/smp.h
arch/x86/include/asm/spinlock.h
arch/x86/include/asm/spinlock_types.h
arch/x86/include/asm/topology.h
arch/x86/include/asm/uaccess.h
arch/x86/include/asm/uaccess_32.h
arch/x86/include/asm/xen/page.h
arch/x86/include/uapi/asm/msr-index.h
arch/x86/kernel/asm-offsets.c
arch/x86/kernel/asm-offsets_32.c
arch/x86/kernel/asm-offsets_64.c
arch/x86/kernel/cpu/hypervisor.c
arch/x86/kernel/cpu/mcheck/mce.c
arch/x86/kernel/cpu/perf_event.c
arch/x86/kernel/cpu/perf_event.h
arch/x86/kernel/cpu/perf_event_intel.c
arch/x86/kernel/cpu/perf_event_intel_bts.c
arch/x86/kernel/cpu/perf_event_intel_cqm.c
arch/x86/kernel/cpu/perf_event_intel_ds.c
arch/x86/kernel/cpu/perf_event_intel_lbr.c
arch/x86/kernel/cpu/perf_event_intel_pt.c
arch/x86/kernel/cpu/perf_event_intel_rapl.c
arch/x86/kernel/cpu/perf_event_intel_uncore.c
arch/x86/kernel/cpu/perf_event_intel_uncore.h
arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
arch/x86/kernel/cpu/proc.c
arch/x86/kernel/head64.c
arch/x86/kernel/head_32.S
arch/x86/kernel/head_64.S
arch/x86/kernel/i386_ksyms_32.c
arch/x86/kernel/i387.c
arch/x86/kernel/kvm.c
arch/x86/kernel/paravirt-spinlocks.c
arch/x86/kernel/paravirt_patch_32.c
arch/x86/kernel/paravirt_patch_64.c
arch/x86/kernel/pci-dma.c
arch/x86/kernel/process.c
arch/x86/kernel/smpboot.c
arch/x86/kernel/tsc_sync.c
arch/x86/kernel/x8664_ksyms_64.c
arch/x86/kvm/cpuid.c
arch/x86/kvm/cpuid.h
arch/x86/kvm/lapic.c
arch/x86/kvm/mmu.c
arch/x86/kvm/mmu.h
arch/x86/kvm/paging_tmpl.h
arch/x86/kvm/svm.c
arch/x86/kvm/vmx.c
arch/x86/kvm/x86.c
arch/x86/lib/thunk_32.S
arch/x86/lib/thunk_64.S
arch/x86/lib/usercopy_32.c
arch/x86/mm/fault.c
arch/x86/mm/highmem_32.c
arch/x86/mm/iomap_32.c
arch/x86/mm/ioremap.c
arch/x86/net/bpf_jit_comp.c
arch/x86/pci/acpi.c
arch/x86/um/asm/barrier.h
arch/x86/vdso/Makefile
arch/x86/xen/enlighten.c
arch/x86/xen/spinlock.c
arch/x86/xen/suspend.c
arch/xtensa/include/asm/dma-mapping.h
arch/xtensa/mm/fault.c
arch/xtensa/mm/highmem.c
block/blk-core.c
block/blk-mq-cpumap.c
block/blk-mq.c
block/blk-sysfs.c
block/bounce.c
block/elevator.c
block/genhd.c
crypto/Kconfig
crypto/algif_aead.c
drivers/acpi/acpi_pad.c
drivers/acpi/acpi_pnp.c
drivers/acpi/acpica/utglobal.c
drivers/acpi/osl.c
drivers/acpi/resource.c
drivers/acpi/sbshc.c
drivers/ata/Kconfig
drivers/ata/Makefile
drivers/ata/ahci.c
drivers/ata/ahci_mvebu.c
drivers/ata/ahci_st.c
drivers/ata/libahci.c
drivers/ata/libata-core.c
drivers/ata/libata-eh.c
drivers/ata/pata_octeon_cf.c
drivers/ata/pata_scc.c [deleted file]
drivers/base/cacheinfo.c
drivers/base/init.c
drivers/base/topology.c
drivers/block/Kconfig
drivers/block/loop.c
drivers/block/nvme-core.c
drivers/block/nvme-scsi.c
drivers/block/xen-blkback/blkback.c
drivers/block/zram/zram_drv.c
drivers/bluetooth/ath3k.c
drivers/bluetooth/bt3c_cs.c
drivers/bluetooth/btbcm.c
drivers/bluetooth/btbcm.h
drivers/bluetooth/btusb.c
drivers/bluetooth/hci_ath.c
drivers/bus/arm-cci.c
drivers/bus/mips_cdmm.c
drivers/bus/mvebu-mbus.c
drivers/bus/omap_l3_noc.c
drivers/bus/omap_l3_noc.h
drivers/clk/at91/clk-peripheral.c
drivers/clk/at91/clk-pll.c
drivers/clk/at91/pmc.h
drivers/clk/clk-si5351.c
drivers/clk/clk.c
drivers/clk/qcom/gcc-msm8916.c
drivers/clk/samsung/Makefile
drivers/clk/samsung/clk-exynos5420.c
drivers/clk/samsung/clk-exynos5433.c
drivers/cpufreq/acpi-cpufreq.c
drivers/cpufreq/p4-clockmod.c
drivers/cpufreq/powernow-k8.c
drivers/cpufreq/speedstep-ich.c
drivers/crypto/caam/caamhash.c
drivers/crypto/caam/caamrng.c
drivers/crypto/vmx/aes.c
drivers/crypto/vmx/aes_cbc.c
drivers/crypto/vmx/ghash.c
drivers/dma/at_xdmac.c
drivers/dma/dmaengine.c
drivers/dma/hsu/hsu.c
drivers/dma/pl330.c
drivers/extcon/extcon-usb-gpio.c
drivers/firmware/dmi_scan.c
drivers/firmware/efi/runtime-map.c
drivers/firmware/iscsi_ibft.c
drivers/gpio/gpio-kempld.c
drivers/gpio/gpio-omap.c
drivers/gpio/gpiolib-acpi.c
drivers/gpio/gpiolib-sysfs.c
drivers/gpio/gpiolib.c
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
drivers/gpu/drm/amd/amdkfd/kfd_topology.c
drivers/gpu/drm/drm_irq.c
drivers/gpu/drm/drm_plane_helper.c
drivers/gpu/drm/drm_sysfs.c
drivers/gpu/drm/exynos/exynos7_drm_decon.c
drivers/gpu/drm/exynos/exynos_dp_core.c
drivers/gpu/drm/exynos/exynos_drm_crtc.c
drivers/gpu/drm/exynos/exynos_drm_crtc.h
drivers/gpu/drm/exynos/exynos_drm_drv.h
drivers/gpu/drm/exynos/exynos_drm_fb.c
drivers/gpu/drm/exynos/exynos_drm_fimd.c
drivers/gpu/drm/exynos/exynos_drm_fimd.h [deleted file]
drivers/gpu/drm/exynos/exynos_drm_plane.c
drivers/gpu/drm/exynos/exynos_drm_vidi.c
drivers/gpu/drm/exynos/exynos_mixer.c
drivers/gpu/drm/i915/i915_debugfs.c
drivers/gpu/drm/i915/i915_drv.c
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_gem_execbuffer.c
drivers/gpu/drm/i915/intel_display.c
drivers/gpu/drm/i915/intel_dp.c
drivers/gpu/drm/i915/intel_i2c.c
drivers/gpu/drm/i915/intel_lrc.c
drivers/gpu/drm/i915/intel_lvds.c
drivers/gpu/drm/i915/intel_pm.c
drivers/gpu/drm/i915/intel_ringbuffer.c
drivers/gpu/drm/i915/intel_sdvo.c
drivers/gpu/drm/mgag200/mgag200_mode.c
drivers/gpu/drm/msm/adreno/adreno_gpu.c
drivers/gpu/drm/msm/dsi/dsi.c
drivers/gpu/drm/msm/dsi/dsi_host.c
drivers/gpu/drm/msm/dsi/dsi_manager.c
drivers/gpu/drm/msm/edp/edp_aux.c
drivers/gpu/drm/msm/edp/edp_connector.c
drivers/gpu/drm/msm/edp/edp_ctrl.c
drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c
drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.h
drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c
drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c
drivers/gpu/drm/msm/msm_drv.c
drivers/gpu/drm/msm/msm_fb.c
drivers/gpu/drm/msm/msm_gem.c
drivers/gpu/drm/msm/msm_iommu.c
drivers/gpu/drm/msm/msm_ringbuffer.c
drivers/gpu/drm/nouveau/include/nvif/class.h
drivers/gpu/drm/nouveau/nvkm/engine/gr/gm204.c
drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gf100.c
drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm107.c
drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm204.c
drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.h
drivers/gpu/drm/radeon/atombios_crtc.c
drivers/gpu/drm/radeon/atombios_dp.c
drivers/gpu/drm/radeon/cik.c
drivers/gpu/drm/radeon/dce3_1_afmt.c
drivers/gpu/drm/radeon/evergreen_hdmi.c
drivers/gpu/drm/radeon/ni.c
drivers/gpu/drm/radeon/radeon.h
drivers/gpu/drm/radeon/radeon_asic.c
drivers/gpu/drm/radeon/radeon_asic.h
drivers/gpu/drm/radeon/radeon_audio.c
drivers/gpu/drm/radeon/radeon_connectors.c
drivers/gpu/drm/radeon/radeon_device.c
drivers/gpu/drm/radeon/radeon_dp_auxch.c
drivers/gpu/drm/radeon/radeon_dp_mst.c
drivers/gpu/drm/radeon/radeon_kms.c
drivers/gpu/drm/radeon/radeon_mn.c
drivers/gpu/drm/radeon/radeon_ttm.c
drivers/gpu/drm/radeon/radeon_uvd.c
drivers/gpu/drm/radeon/radeon_vce.c
drivers/gpu/drm/radeon/radeon_vm.c
drivers/gpu/drm/radeon/rv770d.h
drivers/gpu/drm/radeon/si.c
drivers/gpu/drm/radeon/uvd_v1_0.c
drivers/gpu/drm/radeon/uvd_v2_2.c
drivers/gpu/drm/tegra/drm.c
drivers/gpu/drm/vgem/Makefile
drivers/gpu/drm/vgem/vgem_dma_buf.c [deleted file]
drivers/gpu/drm/vgem/vgem_drv.c
drivers/gpu/drm/vgem/vgem_drv.h
drivers/hid/hid-ids.h
drivers/hid/hid-logitech-hidpp.c
drivers/hid/hid-sensor-hub.c
drivers/hid/i2c-hid/i2c-hid.c
drivers/hid/usbhid/hid-quirks.c
drivers/hid/wacom_wac.c
drivers/hwmon/coretemp.c
drivers/hwmon/nct6683.c
drivers/hwmon/nct6775.c
drivers/hwmon/ntc_thermistor.c
drivers/hwmon/tmp401.c
drivers/i2c/busses/i2c-hix5hd2.c
drivers/i2c/busses/i2c-s3c2410.c
drivers/ide/Kconfig
drivers/ide/Makefile
drivers/ide/scc_pata.c [deleted file]
drivers/iio/accel/mma9551_core.c
drivers/iio/accel/mma9553.c
drivers/iio/accel/st_accel_core.c
drivers/iio/adc/axp288_adc.c
drivers/iio/adc/cc10001_adc.c
drivers/iio/adc/mcp320x.c
drivers/iio/adc/qcom-spmi-vadc.c
drivers/iio/adc/twl6030-gpadc.c
drivers/iio/adc/xilinx-xadc-core.c
drivers/iio/adc/xilinx-xadc.h
drivers/iio/common/st_sensors/st_sensors_core.c
drivers/iio/gyro/st_gyro_core.c
drivers/iio/imu/adis16400.h
drivers/iio/imu/adis16400_buffer.c
drivers/iio/imu/adis16400_core.c
drivers/iio/kfifo_buf.c
drivers/iio/light/hid-sensor-prox.c
drivers/iio/magnetometer/st_magn_core.c
drivers/iio/pressure/bmp280.c
drivers/iio/pressure/hid-sensor-press.c
drivers/iio/pressure/st_pressure_core.c
drivers/infiniband/core/addr.c
drivers/infiniband/core/cm.c
drivers/infiniband/core/cm_msgs.h
drivers/infiniband/core/cma.c
drivers/infiniband/core/iwpm_msg.c
drivers/infiniband/core/iwpm_util.c
drivers/infiniband/core/iwpm_util.h
drivers/infiniband/core/umem_odp.c
drivers/infiniband/hw/cxgb4/cm.c
drivers/infiniband/hw/cxgb4/cq.c
drivers/infiniband/hw/cxgb4/device.c
drivers/infiniband/hw/cxgb4/iw_cxgb4.h
drivers/infiniband/hw/cxgb4/mem.c
drivers/infiniband/hw/cxgb4/qp.c
drivers/infiniband/hw/cxgb4/t4.h
drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
drivers/infiniband/hw/ehca/ehca_mcast.c
drivers/infiniband/hw/mlx4/main.c
drivers/infiniband/hw/mlx5/qp.c
drivers/infiniband/hw/nes/nes.c
drivers/infiniband/hw/nes/nes_cm.c
drivers/infiniband/hw/ocrdma/ocrdma.h
drivers/infiniband/hw/ocrdma/ocrdma_ah.c
drivers/infiniband/hw/ocrdma/ocrdma_hw.c
drivers/infiniband/hw/ocrdma/ocrdma_sli.h
drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
drivers/infiniband/hw/qib/qib.h
drivers/infiniband/hw/qib/qib_file_ops.c
drivers/infiniband/hw/qib/qib_iba6120.c
drivers/infiniband/hw/qib/qib_iba7220.c
drivers/infiniband/hw/qib/qib_iba7322.c
drivers/infiniband/hw/qib/qib_init.c
drivers/infiniband/hw/qib/qib_wc_x86_64.c
drivers/infiniband/ulp/ipoib/ipoib_cm.c
drivers/infiniband/ulp/isert/ib_isert.c
drivers/input/joydev.c
drivers/input/mouse/Kconfig
drivers/input/mouse/alps.c
drivers/input/mouse/elantech.c
drivers/input/mouse/synaptics.c
drivers/input/touchscreen/stmpe-ts.c
drivers/input/touchscreen/sx8654.c
drivers/iommu/amd_iommu.c
drivers/iommu/amd_iommu_v2.c
drivers/iommu/arm-smmu.c
drivers/iommu/intel-iommu.c
drivers/iommu/rockchip-iommu.c
drivers/irqchip/irq-gic-v3-its.c
drivers/irqchip/irq-gic.c
drivers/irqchip/irq-mips-gic.c
drivers/irqchip/irq-sunxi-nmi.c
drivers/irqchip/irq-tegra.c
drivers/lguest/core.c
drivers/md/bitmap.c
drivers/md/dm-crypt.c
drivers/md/dm-mpath.c
drivers/md/dm-table.c
drivers/md/dm.c
drivers/md/md.c
drivers/md/raid0.c
drivers/md/raid10.c
drivers/md/raid5.c
drivers/md/raid5.h
drivers/media/Kconfig
drivers/mfd/da9052-core.c
drivers/mmc/card/block.c
drivers/mmc/card/queue.c
drivers/mmc/card/queue.h
drivers/mmc/core/core.c
drivers/mmc/host/atmel-mci.c
drivers/mmc/host/dw_mmc.c
drivers/mmc/host/sh_mmcif.c
drivers/mtd/devices/m25p80.c
drivers/mtd/tests/readtest.c
drivers/mtd/ubi/block.c
drivers/net/bonding/bond_options.c
drivers/net/can/xilinx_can.c
drivers/net/dsa/mv88e6xxx.c
drivers/net/ethernet/amd/Kconfig
drivers/net/ethernet/amd/xgbe/xgbe-drv.c
drivers/net/ethernet/apm/xgene/Kconfig
drivers/net/ethernet/broadcom/b44.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c
drivers/net/ethernet/broadcom/genet/bcmmii.c
drivers/net/ethernet/brocade/bna/bfa_ioc.c
drivers/net/ethernet/brocade/bna/bnad.c
drivers/net/ethernet/brocade/bna/cna_fwimg.c
drivers/net/ethernet/cadence/macb.c
drivers/net/ethernet/cadence/macb.h
drivers/net/ethernet/cisco/enic/enic_ethtool.c
drivers/net/ethernet/cisco/enic/enic_main.c
drivers/net/ethernet/cisco/enic/vnic_rq.c
drivers/net/ethernet/emulex/benet/be_cmds.c
drivers/net/ethernet/emulex/benet/be_ethtool.c
drivers/net/ethernet/emulex/benet/be_main.c
drivers/net/ethernet/ibm/emac/core.c
drivers/net/ethernet/ibm/emac/core.h
drivers/net/ethernet/intel/e1000e/e1000.h
drivers/net/ethernet/intel/fm10k/fm10k_main.c
drivers/net/ethernet/intel/i40e/i40e.h
drivers/net/ethernet/intel/i40e/i40e_debugfs.c
drivers/net/ethernet/intel/i40e/i40e_main.c
drivers/net/ethernet/intel/i40e/i40e_txrx.c
drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
drivers/net/ethernet/intel/i40evf/i40e_txrx.c
drivers/net/ethernet/intel/igb/igb_main.c
drivers/net/ethernet/intel/igb/igb_ptp.c
drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
drivers/net/ethernet/mellanox/mlx4/cmd.c
drivers/net/ethernet/mellanox/mlx4/en_netdev.c
drivers/net/ethernet/mellanox/mlx4/en_port.c
drivers/net/ethernet/mellanox/mlx4/en_tx.c
drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c
drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
drivers/net/ethernet/qualcomm/qca_spi.c
drivers/net/ethernet/realtek/r8169.c
drivers/net/ethernet/rocker/rocker.c
drivers/net/ethernet/sfc/efx.c
drivers/net/ethernet/sfc/rx.c
drivers/net/ethernet/smsc/smc91x.c
drivers/net/ethernet/smsc/smsc911x.c
drivers/net/ethernet/stmicro/stmmac/stmmac.h
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
drivers/net/ethernet/xilinx/ll_temac_main.c
drivers/net/hyperv/netvsc.c
drivers/net/ieee802154/at86rf230.c
drivers/net/macvlan.c
drivers/net/phy/Kconfig
drivers/net/phy/amd-xgbe-phy.c
drivers/net/phy/bcm7xxx.c
drivers/net/phy/dp83640.c
drivers/net/phy/mdio-gpio.c
drivers/net/phy/micrel.c
drivers/net/phy/phy.c
drivers/net/ppp/pppoe.c
drivers/net/usb/cdc_ncm.c
drivers/net/usb/r8152.c
drivers/net/usb/usbnet.c
drivers/net/vxlan.c
drivers/net/wireless/ath/ath9k/xmit.c
drivers/net/wireless/brcm80211/brcmfmac/msgbuf.c
drivers/net/wireless/iwlwifi/Kconfig
drivers/net/wireless/iwlwifi/iwl-7000.c
drivers/net/wireless/iwlwifi/iwl-eeprom-parse.c
drivers/net/wireless/iwlwifi/iwl-eeprom-parse.h
drivers/net/wireless/iwlwifi/iwl-fw-file.h
drivers/net/wireless/iwlwifi/iwl-nvm-parse.c
drivers/net/wireless/iwlwifi/iwl-trans.h
drivers/net/wireless/iwlwifi/mvm/coex_legacy.c
drivers/net/wireless/iwlwifi/mvm/d3.c
drivers/net/wireless/iwlwifi/mvm/fw-api-power.h
drivers/net/wireless/iwlwifi/mvm/fw-api-scan.h
drivers/net/wireless/iwlwifi/mvm/fw-api.h
drivers/net/wireless/iwlwifi/mvm/fw.c
drivers/net/wireless/iwlwifi/mvm/mac80211.c
drivers/net/wireless/iwlwifi/mvm/mvm.h
drivers/net/wireless/iwlwifi/mvm/ops.c
drivers/net/wireless/iwlwifi/mvm/rs.c
drivers/net/wireless/iwlwifi/mvm/rx.c
drivers/net/wireless/iwlwifi/mvm/scan.c
drivers/net/wireless/iwlwifi/pcie/internal.h
drivers/net/wireless/iwlwifi/pcie/trans.c
drivers/net/wireless/iwlwifi/pcie/tx.c
drivers/net/wireless/rtlwifi/usb.c
drivers/net/xen-netback/netback.c
drivers/net/xen-netback/xenbus.c
drivers/net/xen-netfront.c
drivers/ntb/ntb_hw.c
drivers/of/base.c
drivers/of/dynamic.c
drivers/parisc/superio.c
drivers/pci/setup-bus.c
drivers/phy/Kconfig
drivers/phy/phy-core.c
drivers/phy/phy-omap-usb2.c
drivers/phy/phy-rcar-gen2.c
drivers/pinctrl/bcm/pinctrl-cygnus-gpio.c
drivers/pinctrl/core.c
drivers/pinctrl/core.h
drivers/pinctrl/devicetree.c
drivers/pinctrl/intel/pinctrl-cherryview.c
drivers/pinctrl/mediatek/pinctrl-mtk-common.c
drivers/pinctrl/meson/pinctrl-meson.c
drivers/pinctrl/meson/pinctrl-meson8b.c
drivers/pinctrl/mvebu/pinctrl-armada-370.c
drivers/pinctrl/qcom/pinctrl-spmi-gpio.c
drivers/pinctrl/qcom/pinctrl-spmi-mpp.c
drivers/platform/x86/thinkpad_acpi.c
drivers/power/axp288_fuel_gauge.c
drivers/power/bq27x00_battery.c
drivers/power/collie_battery.c
drivers/power/reset/Kconfig
drivers/power/reset/at91-reset.c
drivers/power/reset/ltc2952-poweroff.c
drivers/pwm/pwm-img.c
drivers/regulator/da9052-regulator.c
drivers/rtc/rtc-armada38x.c
drivers/s390/crypto/ap_bus.c
drivers/scsi/be2iscsi/be.h
drivers/scsi/be2iscsi/be_cmds.c
drivers/scsi/be2iscsi/be_cmds.h
drivers/scsi/be2iscsi/be_iscsi.c
drivers/scsi/be2iscsi/be_iscsi.h
drivers/scsi/be2iscsi/be_main.c
drivers/scsi/be2iscsi/be_main.h
drivers/scsi/be2iscsi/be_mgmt.c
drivers/scsi/be2iscsi/be_mgmt.h
drivers/scsi/lpfc/lpfc_scsi.c
drivers/scsi/qla2xxx/tcm_qla2xxx.c
drivers/scsi/sd.c
drivers/scsi/storvsc_drv.c
drivers/soc/mediatek/Kconfig
drivers/soc/mediatek/mtk-pmic-wrap.c
drivers/spi/Kconfig
drivers/spi/spi-bcm2835.c
drivers/spi/spi-bitbang.c
drivers/spi/spi-fsl-cpm.c
drivers/spi/spi-fsl-espi.c
drivers/spi/spi-omap2-mcspi.c
drivers/spi/spi.c
drivers/ssb/driver_chipcommon_pmu.c
drivers/ssb/driver_pcicore.c
drivers/staging/gdm724x/gdm_mux.c
drivers/staging/lustre/lustre/include/linux/lustre_compat25.h
drivers/staging/lustre/lustre/libcfs/linux/linux-cpu.c
drivers/staging/lustre/lustre/llite/llite_internal.h
drivers/staging/lustre/lustre/llite/symlink.c
drivers/staging/lustre/lustre/ptlrpc/service.c
drivers/staging/ozwpan/ozhcd.c
drivers/staging/ozwpan/ozusbif.h
drivers/staging/ozwpan/ozusbsvc1.c
drivers/staging/rtl8712/rtl8712_led.c
drivers/staging/rtl8712/rtl871x_cmd.c
drivers/staging/rtl8712/rtl871x_ioctl_linux.c
drivers/staging/rtl8712/rtl871x_mlme.c
drivers/staging/rtl8712/rtl871x_pwrctrl.c
drivers/staging/rtl8712/rtl871x_sta_mgt.c
drivers/staging/sm750fb/sm750.c
drivers/staging/vt6655/card.c
drivers/staging/vt6655/card.h
drivers/staging/vt6655/device_main.c
drivers/staging/vt6656/rxtx.c
drivers/target/iscsi/iscsi_target.c
drivers/target/iscsi/iscsi_target_login.c
drivers/target/iscsi/iscsi_target_tpg.c
drivers/target/target_core_alua.c
drivers/target/target_core_configfs.c
drivers/target/target_core_device.c
drivers/target/target_core_file.c
drivers/target/target_core_iblock.c
drivers/target/target_core_internal.h
drivers/target/target_core_pr.c
drivers/target/target_core_pscsi.c
drivers/target/target_core_pscsi.h
drivers/target/target_core_rd.c
drivers/target/target_core_sbc.c
drivers/target/target_core_transport.c
drivers/target/target_core_user.c
drivers/target/target_core_xcopy.c
drivers/thermal/armada_thermal.c
drivers/thermal/intel_powerclamp.c
drivers/thermal/rockchip_thermal.c
drivers/thermal/thermal_core.h
drivers/thermal/ti-soc-thermal/dra752-thermal-data.c
drivers/thermal/ti-soc-thermal/omap5-thermal-data.c
drivers/thermal/ti-soc-thermal/ti-bandgap.c
drivers/thermal/ti-soc-thermal/ti-bandgap.h
drivers/tty/hvc/hvc_xen.c
drivers/tty/mips_ejtag_fdc.c
drivers/tty/n_gsm.c
drivers/tty/n_hdlc.c
drivers/tty/n_tty.c
drivers/tty/pty.c
drivers/tty/serial/8250/8250_omap.c
drivers/tty/serial/amba-pl011.c
drivers/tty/serial/earlycon.c
drivers/tty/serial/imx.c
drivers/tty/serial/omap-serial.c
drivers/tty/tty_buffer.c
drivers/usb/chipidea/debug.c
drivers/usb/core/quirks.c
drivers/usb/dwc3/core.h
drivers/usb/dwc3/dwc3-omap.c
drivers/usb/gadget/configfs.c
drivers/usb/gadget/function/f_fs.c
drivers/usb/gadget/function/f_hid.c
drivers/usb/gadget/function/f_midi.c
drivers/usb/gadget/function/f_uac1.c
drivers/usb/gadget/function/u_serial.c
drivers/usb/gadget/legacy/acm_ms.c
drivers/usb/gadget/legacy/audio.c
drivers/usb/gadget/legacy/cdc2.c
drivers/usb/gadget/legacy/dbgp.c
drivers/usb/gadget/legacy/ether.c
drivers/usb/gadget/legacy/g_ffs.c
drivers/usb/gadget/legacy/gmidi.c
drivers/usb/gadget/legacy/hid.c
drivers/usb/gadget/legacy/mass_storage.c
drivers/usb/gadget/legacy/multi.c
drivers/usb/gadget/legacy/ncm.c
drivers/usb/gadget/legacy/nokia.c
drivers/usb/gadget/legacy/printer.c
drivers/usb/gadget/legacy/serial.c
drivers/usb/gadget/legacy/tcm_usb_gadget.c
drivers/usb/gadget/legacy/webcam.c
drivers/usb/gadget/legacy/zero.c
drivers/usb/gadget/udc/at91_udc.c
drivers/usb/gadget/udc/atmel_usba_udc.c
drivers/usb/gadget/udc/fsl_udc_core.c
drivers/usb/gadget/udc/fusb300_udc.c
drivers/usb/gadget/udc/m66592-udc.c
drivers/usb/gadget/udc/r8a66597-udc.c
drivers/usb/gadget/udc/s3c2410_udc.c
drivers/usb/gadget/udc/udc-xilinx.c
drivers/usb/host/xhci-ring.c
drivers/usb/host/xhci.c
drivers/usb/host/xhci.h
drivers/usb/musb/musb_core.c
drivers/usb/phy/phy-ab8500-usb.c
drivers/usb/phy/phy-isp1301-omap.c
drivers/usb/phy/phy-tahvo.c
drivers/usb/renesas_usbhs/fifo.c
drivers/usb/serial/cp210x.c
drivers/usb/serial/ftdi_sio.c
drivers/usb/serial/ftdi_sio_ids.h
drivers/usb/serial/pl2303.c
drivers/usb/serial/pl2303.h
drivers/usb/serial/visor.c
drivers/usb/storage/unusual_devs.h
drivers/vfio/pci/vfio_pci.c
drivers/vfio/vfio.c
drivers/vhost/scsi.c
drivers/video/backlight/pwm_bl.c
drivers/virtio/virtio_pci_common.c
drivers/xen/events/events_2l.c
drivers/xen/events/events_base.c
drivers/xen/gntdev.c
drivers/xen/grant-table.c
drivers/xen/manage.c
drivers/xen/swiotlb-xen.c
drivers/xen/xen-pciback/conf_space.c
drivers/xen/xen-pciback/conf_space.h
drivers/xen/xen-pciback/conf_space_header.c
drivers/xen/xenbus/xenbus_probe.c
fs/9p/v9fs.h
fs/9p/vfs_inode.c
fs/9p/vfs_inode_dotl.c
fs/autofs4/symlink.c
fs/befs/linuxvfs.c
fs/binfmt_elf.c
fs/btrfs/backref.c
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.c
fs/btrfs/free-space-cache.c
fs/btrfs/ordered-data.c
fs/btrfs/volumes.c
fs/ceph/inode.c
fs/cifs/cifs_dfs_ref.c
fs/cifs/cifs_unicode.c
fs/cifs/cifsfs.c
fs/cifs/cifsfs.h
fs/cifs/cifsproto.h
fs/cifs/cifssmb.c
fs/cifs/connect.c
fs/cifs/dir.c
fs/cifs/file.c
fs/cifs/inode.c
fs/cifs/link.c
fs/cifs/readdir.c
fs/cifs/smb1ops.c
fs/cifs/smb2pdu.c
fs/configfs/symlink.c
fs/dcache.c
fs/debugfs/file.c
fs/debugfs/inode.c
fs/ecryptfs/inode.c
fs/efivarfs/super.c
fs/exec.c
fs/exofs/Kbuild
fs/exofs/exofs.h
fs/exofs/inode.c
fs/exofs/namei.c
fs/exofs/symlink.c [deleted file]
fs/ext2/inode.c
fs/ext2/namei.c
fs/ext2/symlink.c
fs/ext3/inode.c
fs/ext3/namei.c
fs/ext3/symlink.c
fs/ext4/ext4.h
fs/ext4/ext4_jbd2.c
fs/ext4/extents.c
fs/ext4/inode.c
fs/ext4/namei.c
fs/ext4/super.c
fs/ext4/symlink.c
fs/f2fs/data.c
fs/f2fs/f2fs.h
fs/f2fs/namei.c
fs/f2fs/super.c
fs/fhandle.c
fs/freevxfs/vxfs_extern.h
fs/freevxfs/vxfs_immed.c
fs/freevxfs/vxfs_inode.c
fs/fuse/dir.c
fs/gfs2/inode.c
fs/hostfs/hostfs_kern.c
fs/hppfs/hppfs.c
fs/inode.c
fs/jbd2/recovery.c
fs/jbd2/revoke.c
fs/jbd2/transaction.c
fs/jffs2/dir.c
fs/jffs2/fs.c
fs/jffs2/symlink.c
fs/jfs/inode.c
fs/jfs/namei.c
fs/jfs/symlink.c
fs/kernfs/dir.c
fs/kernfs/symlink.c
fs/libfs.c
fs/logfs/dir.c
fs/mount.h
fs/namei.c
fs/namespace.c
fs/nfs/nfs4proc.c
fs/nfs/symlink.c
fs/nfs/write.c
fs/nfsd/blocklayout.c
fs/nfsd/nfs4callback.c
fs/nfsd/nfs4state.c
fs/nfsd/state.h
fs/nfsd/xdr4.h
fs/ntfs/namei.c
fs/omfs/bitmap.c
fs/omfs/inode.c
fs/open.c
fs/overlayfs/copy_up.c
fs/overlayfs/dir.c
fs/overlayfs/inode.c
fs/overlayfs/super.c
fs/proc/base.c
fs/proc/inode.c
fs/proc/namespaces.c
fs/proc/self.c
fs/proc/thread_self.c
fs/select.c
fs/splice.c
fs/sysv/Makefile
fs/sysv/inode.c
fs/sysv/symlink.c [deleted file]
fs/sysv/sysv.h
fs/ubifs/dir.c
fs/ubifs/file.c
fs/ubifs/super.c
fs/ufs/inode.c
fs/ufs/namei.c
fs/ufs/symlink.c
fs/xfs/libxfs/xfs_attr_leaf.c
fs/xfs/libxfs/xfs_attr_leaf.h
fs/xfs/libxfs/xfs_bmap.c
fs/xfs/libxfs/xfs_ialloc.c
fs/xfs/xfs_attr_inactive.c
fs/xfs/xfs_file.c
fs/xfs/xfs_inode.c
fs/xfs/xfs_iops.c
fs/xfs/xfs_mount.c
include/asm-generic/barrier.h
include/asm-generic/cmpxchg.h
include/asm-generic/futex.h
include/asm-generic/preempt.h
include/asm-generic/qspinlock.h [new file with mode: 0644]
include/asm-generic/qspinlock_types.h [new file with mode: 0644]
include/drm/drm_pciids.h
include/linux/backing-dev.h
include/linux/blk_types.h
include/linux/blkdev.h
include/linux/bottom_half.h
include/linux/brcmphy.h
include/linux/compiler.h
include/linux/cpumask.h
include/linux/debugfs.h
include/linux/fs.h
include/linux/ftrace_event.h
include/linux/gfp.h
include/linux/hardirq.h
include/linux/hid-sensor-hub.h
include/linux/highmem.h
include/linux/init_task.h
include/linux/intel-iommu.h
include/linux/io-mapping.h
include/linux/irqchip/arm-gic.h
include/linux/kernel.h
include/linux/ktime.h
include/linux/lglock.h
include/linux/libata.h
include/linux/lockdep.h
include/linux/memcontrol.h
include/linux/namei.h
include/linux/netdevice.h
include/linux/of.h
include/linux/osq_lock.h
include/linux/pci_ids.h
include/linux/percpu_counter.h
include/linux/perf_event.h
include/linux/platform_data/si5351.h
include/linux/preempt.h
include/linux/preempt_mask.h [deleted file]
include/linux/rculist.h
include/linux/rcupdate.h
include/linux/rcutiny.h
include/linux/rcutree.h
include/linux/rhashtable.h
include/linux/sched.h
include/linux/sched/rt.h
include/linux/security.h
include/linux/skbuff.h
include/linux/spinlock.h
include/linux/tcp.h
include/linux/topology.h
include/linux/tty.h
include/linux/uaccess.h
include/linux/uidgid.h
include/linux/wait.h
include/net/cfg802154.h
include/net/codel.h
include/net/inet_connection_sock.h
include/net/mac80211.h
include/net/mac802154.h
include/net/sctp/sctp.h
include/net/tcp.h
include/rdma/ib_addr.h
include/rdma/ib_cm.h
include/rdma/iw_portmap.h
include/sound/hda_regmap.h
include/target/target_core_backend.h
include/target/target_core_configfs.h
include/target/target_core_fabric.h
include/trace/events/kmem.h
include/trace/events/sched.h
include/trace/events/writeback.h
include/uapi/drm/radeon_drm.h
include/uapi/linux/inet_diag.h
include/uapi/linux/mpls.h
include/uapi/linux/netfilter/nf_conntrack_tcp.h
include/uapi/linux/perf_event.h
include/uapi/linux/rtnetlink.h
include/uapi/linux/tcp.h
include/uapi/linux/virtio_balloon.h
include/uapi/rdma/rdma_netlink.h
include/xen/events.h
include/xen/grant_table.h
include/xen/xen-ops.h
init/Kconfig
init/do_mounts.c
ipc/mqueue.c
kernel/Kconfig.locks
kernel/compat.c
kernel/cpu.c
kernel/events/core.c
kernel/events/internal.h
kernel/events/ring_buffer.c
kernel/fork.c
kernel/futex.c
kernel/irq/dummychip.c
kernel/locking/Makefile
kernel/locking/lglock.c
kernel/locking/lockdep.c
kernel/locking/lockdep_proc.c
kernel/locking/locktorture.c
kernel/locking/mcs_spinlock.h
kernel/locking/qrwlock.c
kernel/locking/qspinlock.c [new file with mode: 0644]
kernel/locking/qspinlock_paravirt.h [new file with mode: 0644]
kernel/locking/rtmutex.c
kernel/locking/rwsem-xadd.c
kernel/module.c
kernel/rcu/rcutorture.c
kernel/rcu/srcu.c
kernel/rcu/tiny.c
kernel/rcu/tiny_plugin.h
kernel/rcu/tree.c
kernel/rcu/tree.h
kernel/rcu/tree_plugin.h
kernel/rcu/tree_trace.c
kernel/rcu/update.c
kernel/sched/Makefile
kernel/sched/auto_group.c
kernel/sched/auto_group.h
kernel/sched/core.c
kernel/sched/cputime.c
kernel/sched/deadline.c
kernel/sched/debug.c
kernel/sched/fair.c
kernel/sched/loadavg.c [moved from kernel/sched/proc.c with 62% similarity]
kernel/sched/rt.c
kernel/sched/sched.h
kernel/sched/stats.h
kernel/sched/wait.c
kernel/signal.c
kernel/stop_machine.c
kernel/time/clockevents.c
kernel/time/hrtimer.c
kernel/time/posix-cpu-timers.c
kernel/torture.c
kernel/trace/ring_buffer_benchmark.c
kernel/trace/trace_events_filter.c
kernel/trace/trace_output.c
kernel/watchdog.c
lib/Kconfig.debug
lib/cpu_rmap.c
lib/cpumask.c
lib/mpi/longlong.h
lib/percpu_counter.c
lib/radix-tree.c
lib/rhashtable.c
lib/strnlen_user.c
lib/swiotlb.c
mm/backing-dev.c
mm/kmemleak.c
mm/memcontrol.c
mm/memory.c
mm/memory_hotplug.c
mm/mempolicy.c
mm/page-writeback.c
mm/page_isolation.c
mm/shmem.c
mm/zsmalloc.c
net/8021q/vlan.c
net/bluetooth/hci_core.c
net/bridge/br_fdb.c
net/bridge/br_multicast.c
net/bridge/br_netfilter.c
net/bridge/br_stp_timer.c
net/caif/caif_socket.c
net/ceph/osd_client.c
net/core/dev.c
net/core/net_namespace.c
net/core/rtnetlink.c
net/core/skbuff.c
net/core/sock.c
net/dsa/dsa.c
net/ieee802154/Makefile
net/ieee802154/nl-phy.c
net/ieee802154/nl802154.c
net/ieee802154/rdev-ops.h
net/ieee802154/trace.c [new file with mode: 0644]
net/ieee802154/trace.h [new file with mode: 0644]
net/ipv4/esp4.c
net/ipv4/fib_trie.c
net/ipv4/inet_diag.c
net/ipv4/ip_vti.c
net/ipv4/netfilter/arp_tables.c
net/ipv4/netfilter/ip_tables.c
net/ipv4/route.c
net/ipv4/tcp.c
net/ipv4/tcp_cong.c
net/ipv4/tcp_dctcp.c
net/ipv4/tcp_fastopen.c
net/ipv4/tcp_illinois.c
net/ipv4/tcp_input.c
net/ipv4/tcp_minisocks.c
net/ipv4/tcp_vegas.c
net/ipv4/tcp_vegas.h
net/ipv4/tcp_westwood.c
net/ipv4/udp.c
net/ipv6/addrconf_core.c
net/ipv6/esp6.c
net/ipv6/ip6_fib.c
net/ipv6/ip6_output.c
net/ipv6/ip6_vti.c
net/ipv6/netfilter/ip6_tables.c
net/ipv6/route.c
net/ipv6/tcp_ipv6.c
net/ipv6/udp.c
net/mac80211/cfg.c
net/mac80211/ieee80211_i.h
net/mac80211/iface.c
net/mac80211/key.c
net/mac80211/key.h
net/mac80211/rx.c
net/mac80211/sta_info.c
net/mac80211/util.c
net/mac80211/wep.c
net/mac802154/cfg.c
net/mac802154/ieee802154_i.h
net/mac802154/iface.c
net/mac802154/llsec.c
net/mac802154/main.c
net/mpls/af_mpls.c
net/mpls/internal.h
net/netfilter/Kconfig
net/netfilter/ipvs/ip_vs_ctl.c
net/netfilter/nf_conntrack_proto_tcp.c
net/netfilter/nf_tables_api.c
net/netfilter/nfnetlink_log.c
net/netfilter/nfnetlink_queue_core.c
net/netlink/af_netlink.c
net/openvswitch/vport-netdev.c
net/packet/af_packet.c
net/rds/connection.c
net/rds/ib_cm.c
net/rds/tcp_connect.c
net/rds/tcp_listen.c
net/sched/cls_api.c
net/sched/sch_api.c
net/sched/sch_codel.c
net/sched/sch_fq_codel.c
net/sched/sch_gred.c
net/sctp/auth.c
net/sunrpc/auth_gss/gss_rpc_xdr.c
net/switchdev/switchdev.c
net/tipc/socket.c
net/unix/af_unix.c
net/wireless/wext-compat.c
net/xfrm/xfrm_input.c
net/xfrm/xfrm_replay.c
net/xfrm/xfrm_state.c
scripts/checkpatch.pl
scripts/gdb/linux/modules.py
security/capability.c
security/security.c
security/selinux/avc.c
security/selinux/hooks.c
security/selinux/include/avc.h
sound/atmel/ac97c.c
sound/core/pcm_lib.c
sound/hda/hdac_regmap.c
sound/mips/Kconfig
sound/pci/hda/hda_codec.c
sound/pci/hda/hda_generic.c
sound/pci/hda/hda_intel.c
sound/pci/hda/hda_local.h
sound/pci/hda/patch_conexant.c
sound/pci/hda/patch_realtek.c
sound/pci/hda/patch_sigmatel.c
sound/pci/hda/patch_via.c
sound/pci/hda/thinkpad_helper.c
sound/soc/codecs/mc13783.c
sound/soc/codecs/uda1380.c
sound/soc/codecs/wm8960.c
sound/soc/codecs/wm8994.c
sound/soc/davinci/davinci-mcasp.c
sound/soc/soc-dapm.c
sound/usb/mixer.c
sound/usb/mixer_maps.c
sound/usb/quirks.c
tools/Makefile
tools/arch/alpha/include/asm/barrier.h [new file with mode: 0644]
tools/arch/arm/include/asm/barrier.h [new file with mode: 0644]
tools/arch/arm64/include/asm/barrier.h [new file with mode: 0644]
tools/arch/ia64/include/asm/barrier.h [new file with mode: 0644]
tools/arch/mips/include/asm/barrier.h [new file with mode: 0644]
tools/arch/powerpc/include/asm/barrier.h [new file with mode: 0644]
tools/arch/s390/include/asm/barrier.h [new file with mode: 0644]
tools/arch/sh/include/asm/barrier.h [new file with mode: 0644]
tools/arch/sparc/include/asm/barrier.h [new file with mode: 0644]
tools/arch/sparc/include/asm/barrier_32.h [new file with mode: 0644]
tools/arch/sparc/include/asm/barrier_64.h [new file with mode: 0644]
tools/arch/tile/include/asm/barrier.h [new file with mode: 0644]
tools/arch/x86/include/asm/atomic.h [new file with mode: 0644]
tools/arch/x86/include/asm/barrier.h [new file with mode: 0644]
tools/arch/x86/include/asm/rmwcc.h [new file with mode: 0644]
tools/arch/xtensa/include/asm/barrier.h [new file with mode: 0644]
tools/build/Makefile.build
tools/build/Makefile.feature
tools/build/tests/ex/Build
tools/build/tests/ex/empty2/README [new file with mode: 0644]
tools/include/asm-generic/atomic-gcc.h [new file with mode: 0644]
tools/include/asm-generic/barrier.h [new file with mode: 0644]
tools/include/asm/atomic.h [new file with mode: 0644]
tools/include/asm/barrier.h [new file with mode: 0644]
tools/include/linux/atomic.h [new file with mode: 0644]
tools/include/linux/compiler.h
tools/include/linux/kernel.h [moved from tools/perf/util/include/linux/kernel.h with 97% similarity]
tools/include/linux/list.h [moved from tools/perf/util/include/linux/list.h with 90% similarity]
tools/include/linux/poison.h [new file with mode: 0644]
tools/include/linux/types.h
tools/lib/api/Makefile
tools/lib/lockdep/Makefile
tools/lib/lockdep/uinclude/linux/kernel.h
tools/lib/traceevent/.gitignore
tools/lib/traceevent/Makefile
tools/lib/traceevent/event-parse.c
tools/lib/traceevent/event-parse.h
tools/lib/traceevent/plugin_cfg80211.c
tools/net/bpf_jit_disasm.c
tools/perf/.gitignore
tools/perf/Documentation/callchain-overhead-calculation.txt [new file with mode: 0644]
tools/perf/Documentation/perf-bench.txt
tools/perf/Documentation/perf-inject.txt
tools/perf/Documentation/perf-kmem.txt
tools/perf/Documentation/perf-kvm.txt
tools/perf/Documentation/perf-probe.txt
tools/perf/Documentation/perf-record.txt
tools/perf/Documentation/perf-report.txt
tools/perf/Documentation/perf-script.txt
tools/perf/Documentation/perf-top.txt
tools/perf/Documentation/perf-trace.txt
tools/perf/MANIFEST
tools/perf/Makefile
tools/perf/Makefile.perf
tools/perf/arch/arm64/Build
tools/perf/arch/arm64/include/perf_regs.h
tools/perf/arch/arm64/tests/Build [new file with mode: 0644]
tools/perf/arch/arm64/tests/dwarf-unwind.c [new file with mode: 0644]
tools/perf/arch/arm64/tests/regs_load.S [new file with mode: 0644]
tools/perf/arch/common.c
tools/perf/arch/powerpc/util/Build
tools/perf/arch/powerpc/util/sym-handling.c [new file with mode: 0644]
tools/perf/bench/Build
tools/perf/bench/bench.h
tools/perf/bench/futex-requeue.c
tools/perf/bench/futex-wake-parallel.c [new file with mode: 0644]
tools/perf/bench/futex-wake.c
tools/perf/bench/numa.c
tools/perf/builtin-annotate.c
tools/perf/builtin-bench.c
tools/perf/builtin-buildid-list.c
tools/perf/builtin-diff.c
tools/perf/builtin-inject.c
tools/perf/builtin-kmem.c
tools/perf/builtin-kvm.c
tools/perf/builtin-lock.c
tools/perf/builtin-mem.c
tools/perf/builtin-probe.c
tools/perf/builtin-record.c
tools/perf/builtin-report.c
tools/perf/builtin-sched.c
tools/perf/builtin-script.c
tools/perf/builtin-stat.c
tools/perf/builtin-timechart.c
tools/perf/builtin-top.c
tools/perf/builtin-trace.c
tools/perf/config/Makefile
tools/perf/config/utilities.mak
tools/perf/perf-sys.h
tools/perf/perf.h
tools/perf/tests/Build
tools/perf/tests/builtin-test.c
tools/perf/tests/code-reading.c
tools/perf/tests/dso-data.c
tools/perf/tests/dwarf-unwind.c
tools/perf/tests/evsel-roundtrip-name.c
tools/perf/tests/hists_common.c
tools/perf/tests/hists_cumulate.c
tools/perf/tests/hists_filter.c
tools/perf/tests/hists_link.c
tools/perf/tests/hists_output.c
tools/perf/tests/keep-tracking.c
tools/perf/tests/kmod-path.c
tools/perf/tests/make
tools/perf/tests/mmap-basic.c
tools/perf/tests/mmap-thread-lookup.c
tools/perf/tests/openat-syscall-all-cpus.c [moved from tools/perf/tests/open-syscall-all-cpus.c with 90% similarity]
tools/perf/tests/openat-syscall-tp-fields.c [moved from tools/perf/tests/open-syscall-tp-fields.c with 94% similarity]
tools/perf/tests/openat-syscall.c [moved from tools/perf/tests/open-syscall.c with 79% similarity]
tools/perf/tests/parse-events.c
tools/perf/tests/perf-time-to-tsc.c
tools/perf/tests/pmu.c
tools/perf/tests/switch-tracking.c
tools/perf/tests/tests.h
tools/perf/tests/thread-mg-share.c
tools/perf/tests/vmlinux-kallsyms.c
tools/perf/ui/browsers/annotate.c
tools/perf/ui/browsers/hists.c
tools/perf/ui/tui/setup.c
tools/perf/util/Build
tools/perf/util/annotate.c
tools/perf/util/annotate.h
tools/perf/util/auxtrace.c [new file with mode: 0644]
tools/perf/util/auxtrace.h [new file with mode: 0644]
tools/perf/util/build-id.c
tools/perf/util/cache.h
tools/perf/util/callchain.h
tools/perf/util/cgroup.c
tools/perf/util/cgroup.h
tools/perf/util/comm.c
tools/perf/util/data-convert-bt.c
tools/perf/util/db-export.c
tools/perf/util/dso.c
tools/perf/util/dso.h
tools/perf/util/dwarf-aux.c
tools/perf/util/dwarf-aux.h
tools/perf/util/environment.c
tools/perf/util/event.c
tools/perf/util/event.h
tools/perf/util/evlist.c
tools/perf/util/evlist.h
tools/perf/util/evsel.c
tools/perf/util/evsel.h
tools/perf/util/header.c
tools/perf/util/header.h
tools/perf/util/hist.c
tools/perf/util/hist.h
tools/perf/util/include/linux/poison.h [deleted file]
tools/perf/util/include/linux/rbtree.h
tools/perf/util/machine.c
tools/perf/util/machine.h
tools/perf/util/map.c
tools/perf/util/map.h
tools/perf/util/pager.c
tools/perf/util/parse-branch-options.c [new file with mode: 0644]
tools/perf/util/parse-branch-options.h [new file with mode: 0644]
tools/perf/util/parse-events.c
tools/perf/util/parse-events.h
tools/perf/util/parse-events.l
tools/perf/util/parse-events.y
tools/perf/util/parse-options.h
tools/perf/util/pmu.c
tools/perf/util/pmu.h
tools/perf/util/probe-event.c
tools/perf/util/probe-event.h
tools/perf/util/probe-finder.c
tools/perf/util/probe-finder.h
tools/perf/util/pstack.c
tools/perf/util/pstack.h
tools/perf/util/python-ext-sources
tools/perf/util/record.c
tools/perf/util/session.c
tools/perf/util/session.h
tools/perf/util/sort.c
tools/perf/util/sort.h
tools/perf/util/stat-shadow.c [new file with mode: 0644]
tools/perf/util/stat.c
tools/perf/util/stat.h
tools/perf/util/strfilter.c
tools/perf/util/strfilter.h
tools/perf/util/symbol-elf.c
tools/perf/util/symbol.c
tools/perf/util/symbol.h
tools/perf/util/thread-stack.c
tools/perf/util/thread-stack.h
tools/perf/util/thread.c
tools/perf/util/thread.h
tools/perf/util/thread_map.c
tools/perf/util/tool.h
tools/perf/util/trace-event-parse.c
tools/perf/util/unwind-libunwind.c
tools/perf/util/util.c
tools/perf/util/util.h
tools/perf/util/vdso.c
tools/perf/util/vdso.h
tools/perf/util/xyarray.c
tools/perf/util/xyarray.h
tools/power/x86/turbostat/turbostat.c
tools/testing/selftests/rcutorture/bin/configinit.sh
tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
tools/testing/selftests/rcutorture/bin/kvm.sh
tools/testing/selftests/rcutorture/configs/rcu/CFcommon
tools/testing/selftests/rcutorture/configs/rcu/SRCU-N
tools/testing/selftests/rcutorture/configs/rcu/SRCU-P
tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot
tools/testing/selftests/rcutorture/configs/rcu/TASKS01
tools/testing/selftests/rcutorture/configs/rcu/TASKS02
tools/testing/selftests/rcutorture/configs/rcu/TASKS03
tools/testing/selftests/rcutorture/configs/rcu/TINY02
tools/testing/selftests/rcutorture/configs/rcu/TINY02.boot
tools/testing/selftests/rcutorture/configs/rcu/TREE01
tools/testing/selftests/rcutorture/configs/rcu/TREE02
tools/testing/selftests/rcutorture/configs/rcu/TREE02-T
tools/testing/selftests/rcutorture/configs/rcu/TREE03
tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot [new file with mode: 0644]
tools/testing/selftests/rcutorture/configs/rcu/TREE04
tools/testing/selftests/rcutorture/configs/rcu/TREE05
tools/testing/selftests/rcutorture/configs/rcu/TREE06
tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot
tools/testing/selftests/rcutorture/configs/rcu/TREE07
tools/testing/selftests/rcutorture/configs/rcu/TREE08
tools/testing/selftests/rcutorture/configs/rcu/TREE08-T
tools/testing/selftests/rcutorture/configs/rcu/TREE08-T.boot [new file with mode: 0644]
tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot
tools/testing/selftests/rcutorture/configs/rcu/TREE09
tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
tools/testing/selftests/x86/Makefile
tools/testing/selftests/x86/check_cc.sh [new file with mode: 0755]
tools/testing/selftests/x86/entry_from_vm86.c [new file with mode: 0644]
tools/testing/selftests/x86/run_x86_tests.sh [deleted file]
tools/testing/selftests/x86/trivial_32bit_program.c
tools/testing/selftests/x86/trivial_64bit_program.c [new file with mode: 0644]
tools/thermal/tmon/Makefile
tools/vm/Makefile

diff --git a/CREDITS b/CREDITS
index 40cc4bf..ec7e6c7 100644 (file)
--- a/CREDITS
+++ b/CREDITS
@@ -3709,6 +3709,13 @@ N: Dirk Verworner
 D: Co-author of German book ``Linux-Kernel-Programmierung''
 D: Co-founder of Berlin Linux User Group
 
+N: Andrew Victor
+E: linux@maxim.org.za
+W: http://maxim.org.za/at91_26.html
+D: First maintainer of Atmel ARM-based SoC, aka AT91
+D: Introduced support for at91rm9200, the first chip of AT91 family
+S: South Africa
+
 N: Riku Voipio
 E: riku.voipio@iki.fi
 D: Author of PCA9532 LED and Fintek f75375s hwmon driver
index 99983e6..da95513 100644 (file)
@@ -162,7 +162,7 @@ Description:        Discover CPUs in the same CPU frequency coordination domain
 What:          /sys/devices/system/cpu/cpu*/cache/index3/cache_disable_{0,1}
 Date:          August 2008
 KernelVersion: 2.6.27
-Contact:       discuss@x86-64.org
+Contact:       Linux kernel mailing list <linux-kernel@vger.kernel.org>
 Description:   Disable L3 cache indices
 
                These files exist in every CPU's cache/index3 directory. Each
index 453ebe6..f05a9af 100644 (file)
@@ -10,7 +10,19 @@ also be used to protect arrays.  Three situations are as follows:
 
 3.  Resizeable Arrays
 
-Each of these situations are discussed below.
+Each of these three situations involves an RCU-protected pointer to an
+array that is separately indexed.  It might be tempting to consider use
+of RCU to instead protect the index into an array, however, this use
+case is -not- supported.  The problem with RCU-protected indexes into
+arrays is that compilers can play way too many optimization games with
+integers, which means that the rules governing handling of these indexes
+are far more trouble than they are worth.  If RCU-protected indexes into
+arrays prove to be particularly valuable (which they have not thus far),
+explicit cooperation from the compiler will be required to permit them
+to be safely used.
+
+That aside, each of the three RCU-protected pointer situations are
+described in the following sections.
 
 
 Situation 1: Hash Tables
@@ -36,9 +48,9 @@ Quick Quiz:  Why is it so important that updates be rare when
 Situation 3: Resizeable Arrays
 
 Use of RCU for resizeable arrays is demonstrated by the grow_ary()
-function used by the System V IPC code.  The array is used to map from
-semaphore, message-queue, and shared-memory IDs to the data structure
-that represents the corresponding IPC construct.  The grow_ary()
+function formerly used by the System V IPC code.  The array is used
+to map from semaphore, message-queue, and shared-memory IDs to the data
+structure that represents the corresponding IPC construct.  The grow_ary()
 function does not acquire any locks; instead its caller must hold the
 ids->sem semaphore.
 
index cd83d23..da51d30 100644 (file)
@@ -47,11 +47,6 @@ checking of rcu_dereference() primitives:
                Use explicit check expression "c" along with
                srcu_read_lock_held()().  This is useful in code that
                is invoked by both SRCU readers and updaters.
-       rcu_dereference_index_check(p, c):
-               Use explicit check expression "c", but the caller
-               must supply one of the rcu_read_lock_held() functions.
-               This is useful in code that uses RCU-protected arrays
-               that is invoked by both RCU readers and updaters.
        rcu_dereference_raw(p):
                Don't check.  (Use sparingly, if at all.)
        rcu_dereference_protected(p, c):
@@ -64,11 +59,6 @@ checking of rcu_dereference() primitives:
                but retain the compiler constraints that prevent duplicating
                or coalescsing.  This is useful when when testing the
                value of the pointer itself, for example, against NULL.
-       rcu_access_index(idx):
-               Return the value of the index and omit all barriers, but
-               retain the compiler constraints that prevent duplicating
-               or coalescsing.  This is useful when when testing the
-               value of the index itself, for example, against -1.
 
 The rcu_dereference_check() check expression can be any boolean
 expression, but would normally include a lockdep expression.  However,
index ceb05da..1e6c0da 100644 (file)
@@ -25,17 +25,6 @@ o    You must use one of the rcu_dereference() family of primitives
        for an example where the compiler can in fact deduce the exact
        value of the pointer, and thus cause misordering.
 
-o      Do not use single-element RCU-protected arrays.  The compiler
-       is within its right to assume that the value of an index into
-       such an array must necessarily evaluate to zero.  The compiler
-       could then substitute the constant zero for the computation, so
-       that the array index no longer depended on the value returned
-       by rcu_dereference().  If the array index no longer depends
-       on rcu_dereference(), then both the compiler and the CPU
-       are within their rights to order the array access before the
-       rcu_dereference(), which can cause the array access to return
-       garbage.
-
 o      Avoid cancellation when using the "+" and "-" infix arithmetic
        operators.  For example, for a given variable "x", avoid
        "(x-x)".  There are similar arithmetic pitfalls from other
@@ -76,14 +65,15 @@ o   Do not use the results from the boolean "&&" and "||" when
        dereferencing.  For example, the following (rather improbable)
        code is buggy:
 
-               int a[2];
-               int index;
-               int force_zero_index = 1;
+               int *p;
+               int *q;
 
                ...
 
-               r1 = rcu_dereference(i1)
-               r2 = a[r1 && force_zero_index];  /* BUGGY!!! */
+               p = rcu_dereference(gp)
+               q = &global_q;
+               q += p != &oom_p1 && p != &oom_p2;
+               r1 = *q;  /* BUGGY!!! */
 
        The reason this is buggy is that "&&" and "||" are often compiled
        using branches.  While weak-memory machines such as ARM or PowerPC
@@ -94,14 +84,15 @@ o   Do not use the results from relational operators ("==", "!=",
        ">", ">=", "<", or "<=") when dereferencing.  For example,
        the following (quite strange) code is buggy:
 
-               int a[2];
-               int index;
-               int flip_index = 0;
+               int *p;
+               int *q;
 
                ...
 
-               r1 = rcu_dereference(i1)
-               r2 = a[r1 != flip_index];  /* BUGGY!!! */
+               p = rcu_dereference(gp)
+               q = &global_q;
+               q += p > &oom_p;
+               r1 = *q;  /* BUGGY!!! */
 
        As before, the reason this is buggy is that relational operators
        are often compiled using branches.  And as before, although
@@ -193,6 +184,11 @@ o  Be very careful about comparing pointers obtained from
                pointer.  Note that the volatile cast in rcu_dereference()
                will normally prevent the compiler from knowing too much.
 
+               However, please note that if the compiler knows that the
+               pointer takes on only one of two values, a not-equal
+               comparison will provide exactly the information that the
+               compiler needs to deduce the value of the pointer.
+
 o      Disable any value-speculation optimizations that your compiler
        might provide, especially if you are making use of feedback-based
        optimizations that take data collected from prior runs.  Such
index 88dfce1..5746b0c 100644 (file)
@@ -256,7 +256,9 @@ rcu_dereference()
        If you are going to be fetching multiple fields from the
        RCU-protected structure, using the local variable is of
        course preferred.  Repeated rcu_dereference() calls look
-       ugly and incur unnecessary overhead on Alpha CPUs.
+       ugly, do not guarantee that the same pointer will be returned
+       if an update happened while in the critical section, and incur
+       unnecessary overhead on Alpha CPUs.
 
        Note that the value returned by rcu_dereference() is valid
        only within the enclosing RCU read-side critical section.
@@ -879,9 +881,7 @@ SRCU:       Initialization/cleanup
 
 All:  lockdep-checked RCU-protected pointer access
 
-       rcu_access_index
        rcu_access_pointer
-       rcu_dereference_index_check
        rcu_dereference_raw
        rcu_lockdep_assert
        rcu_sleep_check
index 750401f..15dfce7 100644 (file)
@@ -253,7 +253,7 @@ input driver:
 GPIO support
 ~~~~~~~~~~~~
 ACPI 5 introduced two new resources to describe GPIO connections: GpioIo
-and GpioInt. These resources are used be used to pass GPIO numbers used by
+and GpioInt. These resources can be used to pass GPIO numbers used by
 the device to the driver. ACPI 5.1 extended this with _DSD (Device
 Specific Data) which made it possible to name the GPIOs among other things.
 
index ae36fcf..f35dad1 100644 (file)
@@ -1,9 +1,9 @@
 _DSD Device Properties Related to GPIO
 --------------------------------------
 
-With the release of ACPI 5.1 and the _DSD configuration objecte names
-can finally be given to GPIOs (and other things as well) returned by
-_CRS.  Previously, we were only able to use an integer index to find
+With the release of ACPI 5.1, the _DSD configuration object finally
+allows names to be given to GPIOs (and other things as well) returned
+by _CRS.  Previously, we were only able to use an integer index to find
 the corresponding GPIO, which is pretty error prone (it depends on
 the _CRS output ordering, for example).
 
index 0aad6de..12b1b25 100644 (file)
@@ -1,6 +1,6 @@
 
 Export CPU topology info via sysfs. Items (attributes) are similar
-to /proc/cpuinfo.
+to /proc/cpuinfo output of some architectures:
 
 1) /sys/devices/system/cpu/cpuX/topology/physical_package_id:
 
@@ -23,20 +23,35 @@ to /proc/cpuinfo.
 4) /sys/devices/system/cpu/cpuX/topology/thread_siblings:
 
        internal kernel map of cpuX's hardware threads within the same
-       core as cpuX
+       core as cpuX.
 
-5) /sys/devices/system/cpu/cpuX/topology/core_siblings:
+5) /sys/devices/system/cpu/cpuX/topology/thread_siblings_list:
+
+       human-readable list of cpuX's hardware threads within the same
+       core as cpuX.
+
+6) /sys/devices/system/cpu/cpuX/topology/core_siblings:
 
        internal kernel map of cpuX's hardware threads within the same
        physical_package_id.
 
-6) /sys/devices/system/cpu/cpuX/topology/book_siblings:
+7) /sys/devices/system/cpu/cpuX/topology/core_siblings_list:
+
+       human-readable list of cpuX's hardware threads within the same
+       physical_package_id.
+
+8) /sys/devices/system/cpu/cpuX/topology/book_siblings:
 
        internal kernel map of cpuX's hardware threads within the same
        book_id.
 
+9) /sys/devices/system/cpu/cpuX/topology/book_siblings_list:
+
+       human-readable list of cpuX's hardware threads within the same
+       book_id.
+
 To implement it in an architecture-neutral way, a new source file,
-drivers/base/topology.c, is to export the 4 or 6 attributes. The two book
+drivers/base/topology.c, is to export the 6 or 9 attributes. The three book
 related sysfs files will only be created if CONFIG_SCHED_BOOK is selected.
 
 For an architecture to support this feature, it must define some of
@@ -44,20 +59,22 @@ these macros in include/asm-XXX/topology.h:
 #define topology_physical_package_id(cpu)
 #define topology_core_id(cpu)
 #define topology_book_id(cpu)
-#define topology_thread_cpumask(cpu)
+#define topology_sibling_cpumask(cpu)
 #define topology_core_cpumask(cpu)
 #define topology_book_cpumask(cpu)
 
-The type of **_id is int.
-The type of siblings is (const) struct cpumask *.
+The type of **_id macros is int.
+The type of **_cpumask macros is (const) struct cpumask *. The latter
+correspond with appropriate **_siblings sysfs attributes (except for
+topology_sibling_cpumask() which corresponds with thread_siblings).
 
 To be consistent on all architectures, include/linux/topology.h
 provides default definitions for any of the above macros that are
 not defined by include/asm-XXX/topology.h:
 1) physical_package_id: -1
 2) core_id: 0
-3) thread_siblings: just the given CPU
-4) core_siblings: just the given CPU
+3) sibling_cpumask: just the given CPU
+4) core_cpumask: just the given CPU
 
 For architectures that don't support books (CONFIG_SCHED_BOOK) there are no
 default definitions for topology_book_id() and topology_book_cpumask().
index 974624e..161448d 100644 (file)
@@ -6,6 +6,7 @@ provided by Arteris.
 Required properties:
 - compatible : Should be "ti,omap3-l3-smx" for OMAP3 family
                Should be "ti,omap4-l3-noc" for OMAP4 family
+               Should be "ti,omap5-l3-noc" for OMAP5 family
               Should be "ti,dra7-l3-noc" for DRA7 family
                Should be "ti,am4372-l3-noc" for AM43 family
 - reg: Contains L3 register address range for each noc domain.
index 7a4d492..5ba6450 100644 (file)
@@ -248,7 +248,7 @@ Required properties for peripheral clocks:
 - #address-cells : shall be 1 (reg is used to encode clk id).
 - clocks : shall be the master clock phandle.
        e.g. clocks = <&mck>;
-- name: device tree node describing a specific system clock.
+- name: device tree node describing a specific peripheral clock.
        * #clock-cells : from common clock binding; shall be set to 0.
        * reg: peripheral id. See Atmel's datasheets to get a full
          list of peripheral ids.
index c40711e..28b2830 100644 (file)
@@ -17,7 +17,8 @@ Required properties:
 - #clock-cells: from common clock binding; shall be set to 1.
 - clocks: from common clock binding; list of parent clock
   handles, shall be xtal reference clock or xtal and clkin for
-  si5351c only.
+  si5351c only. Corresponding clock input names are "xtal" and
+  "clkin" respectively.
 - #address-cells: shall be set to 1.
 - #size-cells: shall be set to 0.
 
@@ -71,6 +72,7 @@ i2c-master-node {
 
                /* connect xtal input to 25MHz reference */
                clocks = <&ref25>;
+               clock-names = "xtal";
 
                /* connect xtal input as source of pll0 and pll1 */
                silabs,pll-source = <0 0>, <1 0>;
index a4873e5..e30e184 100644 (file)
@@ -38,7 +38,7 @@ dma_apbx: dma-apbx@80024000 {
                      80 81 68 69
                      70 71 72 73
                      74 75 76 77>;
-       interrupt-names = "auart4-rx", "aurat4-tx", "spdif-tx", "empty",
+       interrupt-names = "auart4-rx", "auart4-tx", "spdif-tx", "empty",
                          "saif0", "saif1", "i2c0", "i2c1",
                          "auart0-rx", "auart0-tx", "auart1-rx", "auart1-tx",
                          "auart2-rx", "auart2-tx", "auart3-rx", "auart3-tx";
index 4b641c7..09089a6 100644 (file)
@@ -32,8 +32,8 @@ Example:
                touchscreen-fuzz-x = <4>;
                touchscreen-fuzz-y = <7>;
                touchscreen-fuzz-pressure = <2>;
-               touchscreen-max-x = <4096>;
-               touchscreen-max-y = <4096>;
+               touchscreen-size-x = <4096>;
+               touchscreen-size-y = <4096>;
                touchscreen-max-pressure = <2048>;
 
                ti,x-plate-ohms = <280>;
@@ -8,8 +8,8 @@ Required properties:
                is not Linux-only, but in case of Linux, see the "m25p_ids"
                table in drivers/mtd/devices/m25p80.c for the list of supported
                chips.
-               Must also include "nor-jedec" for any SPI NOR flash that can be
-               identified by the JEDEC READ ID opcode (0x9F).
+               Must also include "jedec,spi-nor" for any SPI NOR flash that can
+               be identified by the JEDEC READ ID opcode (0x9F).
 - reg : Chip-Select number
 - spi-max-frequency : Maximum frequency of the SPI bus the chip can operate at
 
@@ -25,7 +25,7 @@ Example:
        flash: m25p80@0 {
                #address-cells = <1>;
                #size-cells = <1>;
-               compatible = "spansion,m25p80", "nor-jedec";
+               compatible = "spansion,m25p80", "jedec,spi-nor";
                reg = <0>;
                spi-max-frequency = <40000000>;
                m25p,fast-read;
index abd67c1..4451ee9 100644 (file)
@@ -3,7 +3,8 @@
 Required properties:
 - compatible: Should be "cdns,[<chip>-]{emac}"
   Use "cdns,at91rm9200-emac" Atmel at91rm9200 SoC.
-  or the generic form: "cdns,emac".
+  Use "cdns,zynq-gem" Xilinx Zynq-7xxx SoC.
+  Or the generic form: "cdns,emac".
 - reg: Address and length of the register set for the device
 - interrupts: Should contain macb interrupt
 - phy-mode: see ethernet.txt file in the same directory.
index dc2a18f..ddbe304 100644 (file)
@@ -15,10 +15,8 @@ Optional properties:
   - phys: phandle + phy specifier pair
   - phy-names: must be "usb"
   - dmas: Must contain a list of references to DMA specifiers.
-  - dma-names : Must contain a list of DMA names:
-   - tx0 ... tx<n>
-   - rx0 ... rx<n>
-    - This <n> means DnFIFO in USBHS module.
+  - dma-names : named "ch%d", where %d is the channel number ranging from zero
+                to the number of channels (DnFIFOs) minus one.
 
 Example:
        usbhs: usb@e6590000 {
index 0a926e2..6a34a0f 100644 (file)
@@ -50,8 +50,8 @@ prototypes:
        int (*rename2) (struct inode *, struct dentry *,
                        struct inode *, struct dentry *, unsigned int);
        int (*readlink) (struct dentry *, char __user *,int);
-       void * (*follow_link) (struct dentry *, struct nameidata *);
-       void (*put_link) (struct dentry *, struct nameidata *, void *);
+       const char *(*follow_link) (struct dentry *, void **);
+       void (*put_link) (struct inode *, void *);
        void (*truncate) (struct inode *);
        int (*permission) (struct inode *, int, unsigned int);
        int (*get_acl)(struct inode *, int);
index 7cac200..7eb762e 100644 (file)
@@ -1,41 +1,15 @@
-Support is available for filesystems that wish to do automounting support (such
-as kAFS which can be found in fs/afs/). This facility includes allowing
-in-kernel mounts to be performed and mountpoint degradation to be
-requested. The latter can also be requested by userspace.
+Support is available for filesystems that wish to do automounting
+support (such as kAFS which can be found in fs/afs/ and NFS in
+fs/nfs/). This facility includes allowing in-kernel mounts to be
+performed and mountpoint degradation to be requested. The latter can
+also be requested by userspace.
 
 
 ======================
 IN-KERNEL AUTOMOUNTING
 ======================
 
-A filesystem can now mount another filesystem on one of its directories by the
-following procedure:
-
- (1) Give the directory a follow_link() operation.
-
-     When the directory is accessed, the follow_link op will be called, and
-     it will be provided with the location of the mountpoint in the nameidata
-     structure (vfsmount and dentry).
-
- (2) Have the follow_link() op do the following steps:
-
-     (a) Call vfs_kern_mount() to call the appropriate filesystem to set up a
-         superblock and gain a vfsmount structure representing it.
-
-     (b) Copy the nameidata provided as an argument and substitute the dentry
-        argument into it the copy.
-
-     (c) Call do_add_mount() to install the new vfsmount into the namespace's
-        mountpoint tree, thus making it accessible to userspace. Use the
-        nameidata set up in (b) as the destination.
-
-        If the mountpoint will be automatically expired, then do_add_mount()
-        should also be given the location of an expiration list (see further
-        down).
-
-     (d) Release the path in the nameidata argument and substitute in the new
-        vfsmount and its root dentry. The ref counts on these will need
-        incrementing.
+See section "Mount Traps" of  Documentation/filesystems/autofs4.txt
 
 Then from userspace, you can just do something like:
 
@@ -61,17 +35,18 @@ AUTOMATIC MOUNTPOINT EXPIRY
 ===========================
 
 Automatic expiration of mountpoints is easy, provided you've mounted the
-mountpoint to be expired in the automounting procedure outlined above.
+mountpoint to be expired in the automounting procedure outlined separately.
 
 To do expiration, you need to follow these steps:
 
- (3) Create at least one list off which the vfsmounts to be expired can be
-     hung. Access to this list will be governed by the vfsmount_lock.
+ (1) Create at least one list off which the vfsmounts to be expired can be
+     hung.
 
- (4) In step (2c) above, the call to do_add_mount() should be provided with a
-     pointer to this list. It will hang the vfsmount off of it if it succeeds.
+ (2) When a new mountpoint is created in the ->d_automount method, add
+     the mnt to the list using mnt_set_expiry()
+             mnt_set_expiry(newmnt, &afs_vfsmounts);
 
- (5) When you want mountpoints to be expired, call mark_mounts_for_expiry()
+ (3) When you want mountpoints to be expired, call mark_mounts_for_expiry()
      with a pointer to this list. This will process the list, marking every
      vfsmount thereon for potential expiry on the next call.
 
index e69274d..3eae250 100644 (file)
@@ -483,3 +483,20 @@ in your dentry operations instead.
 --
 [mandatory]
        ->aio_read/->aio_write are gone.  Use ->read_iter/->write_iter.
+---
+[recommended]
+       for embedded ("fast") symlinks just set inode->i_link to wherever the
+       symlink body is and use simple_follow_link() as ->follow_link().
+--
+[mandatory]
+       calling conventions for ->follow_link() have changed.  Instead of returning
+       cookie and using nd_set_link() to store the body to traverse, we return
+       the body to traverse and store the cookie using explicit void ** argument.
+       nameidata isn't passed at all - nd_jump_link() doesn't need it and
+       nd_[gs]et_link() is gone.
+--
+[mandatory]
+       calling conventions for ->put_link() have changed.  It gets inode instead of
+       dentry,  it does not get nameidata at all and it gets called only when cookie
+       is non-NULL.  Note that link body isn't available anymore, so if you need it,
+       store it as cookie.
index 5d833b3..b403b29 100644 (file)
@@ -350,8 +350,8 @@ struct inode_operations {
        int (*rename2) (struct inode *, struct dentry *,
                        struct inode *, struct dentry *, unsigned int);
        int (*readlink) (struct dentry *, char __user *,int);
-        void * (*follow_link) (struct dentry *, struct nameidata *);
-        void (*put_link) (struct dentry *, struct nameidata *, void *);
+       const char *(*follow_link) (struct dentry *, void **);
+       void (*put_link) (struct inode *, void *);
        int (*permission) (struct inode *, int);
        int (*get_acl)(struct inode *, int);
        int (*setattr) (struct dentry *, struct iattr *);
@@ -436,16 +436,18 @@ otherwise noted.
 
   follow_link: called by the VFS to follow a symbolic link to the
        inode it points to.  Only required if you want to support
-       symbolic links.  This method returns a void pointer cookie
-       that is passed to put_link().
+       symbolic links.  This method returns the symlink body
+       to traverse (and possibly resets the current position with
+       nd_jump_link()).  If the body won't go away until the inode
+       is gone, nothing else is needed; if it needs to be otherwise
+       pinned, the data needed to release whatever we'd grabbed
+       is to be stored in void * variable passed by address to
+       follow_link() instance.
 
   put_link: called by the VFS to release resources allocated by
-       follow_link().  The cookie returned by follow_link() is passed
-       to this method as the last parameter.  It is used by
-       filesystems such as NFS where page cache is not stable
-       (i.e. page that was installed when the symbolic link walk
-       started might not be in the page cache at the end of the
-       walk).
+       follow_link().  The cookie stored by follow_link() is passed
+       to this method as the last parameter; only called when
+       cookie isn't NULL.
 
   permission: called by the VFS to check for access rights on a POSIX-like
        filesystem.
index 8eb88e9..711f75e 100644 (file)
@@ -20,7 +20,7 @@ Supported chips:
     Datasheet: http://focus.ti.com/docs/prod/folders/print/tmp432.html
   * Texas Instruments TMP435
     Prefix: 'tmp435'
-    Addresses scanned: I2C 0x37, 0x48 - 0x4f
+    Addresses scanned: I2C 0x48 - 0x4f
     Datasheet: http://focus.ti.com/docs/prod/folders/print/tmp435.html
 
 Authors:
index 389bb5d..b228ca5 100644 (file)
@@ -31,10 +31,10 @@ User manual
 ===========
 
 I2C slave backends behave like standard I2C clients. So, you can instantiate
-them like described in the document 'instantiating-devices'. A quick example
-for instantiating the slave-eeprom driver from userspace:
+them as described in the document 'instantiating-devices'. A quick example for
+instantiating the slave-eeprom driver from userspace at address 0x64 on bus 1:
 
-  # echo 0-0064 > /sys/bus/i2c/drivers/i2c-slave-eeprom/bind
+  # echo slave-24c02 0x64 > /sys/bus/i2c/devices/i2c-1/new_device
 
 Each backend should come with separate documentation to describe its specific
 behaviour and setup.
index 61ab162..60c9d6d 100644 (file)
@@ -1481,6 +1481,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                        By default, super page will be supported if Intel IOMMU
                        has the capability. With this option, super page will
                        not be supported.
+               ecs_off [Default Off]
+                       By default, extended context tables will be supported if
+                       the hardware advertises that it has support both for the
+                       extended tables themselves, and also PASID support. With
+                       this option set, extended tables will not be used even
+                       on hardware which claims to support them.
 
        intel_idle.max_cstate=  [KNL,HW,ACPI,X86]
                        0       disables intel_idle and fall back on acpi_idle.
@@ -2992,11 +2998,34 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                        Set maximum number of finished RCU callbacks to
                        process in one batch.
 
+       rcutree.dump_tree=      [KNL]
+                       Dump the structure of the rcu_node combining tree
+                       out at early boot.  This is used for diagnostic
+                       purposes, to verify correct tree setup.
+
+       rcutree.gp_cleanup_delay=       [KNL]
+                       Set the number of jiffies to delay each step of
+                       RCU grace-period cleanup.  This only has effect
+                       when CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP is set.
+
        rcutree.gp_init_delay=  [KNL]
                        Set the number of jiffies to delay each step of
                        RCU grace-period initialization.  This only has
-                       effect when CONFIG_RCU_TORTURE_TEST_SLOW_INIT is
-                       set.
+                       effect when CONFIG_RCU_TORTURE_TEST_SLOW_INIT
+                       is set.
+
+       rcutree.gp_preinit_delay=       [KNL]
+                       Set the number of jiffies to delay each step of
+                       RCU grace-period pre-initialization, that is,
+                       the propagation of recent CPU-hotplug changes up
+                       the rcu_node combining tree.  This only has effect
+                       when CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT is set.
+
+       rcutree.rcu_fanout_exact= [KNL]
+                       Disable autobalancing of the rcu_node combining
+                       tree.  This is used by rcutorture, and might
+                       possibly be useful for architectures having high
+                       cache-to-cache transfer latencies.
 
        rcutree.rcu_fanout_leaf= [KNL]
                        Increase the number of CPUs assigned to each
@@ -3101,7 +3130,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                        test, hence the "fake".
 
        rcutorture.nreaders= [KNL]
-                       Set number of RCU readers.
+                       Set number of RCU readers.  The value -1 selects
+                       N-1, where N is the number of CPUs.  A value
+                       "n" less than -1 selects N-n-2, where N is again
+                       the number of CPUs.  For example, -2 selects N
+                       (the number of CPUs), -3 selects N+1, and so on.
 
        rcutorture.object_debug= [KNL]
                        Enable debug-object double-call_rcu() testing.
index f957461..13feb69 100644 (file)
@@ -617,16 +617,16 @@ case what's actually required is:
 However, stores are not speculated.  This means that ordering -is- provided
 for load-store control dependencies, as in the following example:
 
-       q = ACCESS_ONCE(a);
+       q = READ_ONCE_CTRL(a);
        if (q) {
                ACCESS_ONCE(b) = p;
        }
 
-Control dependencies pair normally with other types of barriers.
-That said, please note that ACCESS_ONCE() is not optional!  Without the
-ACCESS_ONCE(), might combine the load from 'a' with other loads from
-'a', and the store to 'b' with other stores to 'b', with possible highly
-counterintuitive effects on ordering.
+Control dependencies pair normally with other types of barriers.  That
+said, please note that READ_ONCE_CTRL() is not optional!  Without the
+READ_ONCE_CTRL(), the compiler might combine the load from 'a' with
+other loads from 'a', and the store to 'b' with other stores to 'b',
+with possible highly counterintuitive effects on ordering.
 
 Worse yet, if the compiler is able to prove (say) that the value of
 variable 'a' is always non-zero, it would be well within its rights
@@ -636,12 +636,15 @@ as follows:
        q = a;
        b = p;  /* BUG: Compiler and CPU can both reorder!!! */
 
-So don't leave out the ACCESS_ONCE().
+Finally, the READ_ONCE_CTRL() includes an smp_read_barrier_depends()
+that DEC Alpha needs in order to respect control depedencies.
+
+So don't leave out the READ_ONCE_CTRL().
 
 It is tempting to try to enforce ordering on identical stores on both
 branches of the "if" statement as follows:
 
-       q = ACCESS_ONCE(a);
+       q = READ_ONCE_CTRL(a);
        if (q) {
                barrier();
                ACCESS_ONCE(b) = p;
@@ -655,7 +658,7 @@ branches of the "if" statement as follows:
 Unfortunately, current compilers will transform this as follows at high
 optimization levels:
 
-       q = ACCESS_ONCE(a);
+       q = READ_ONCE_CTRL(a);
        barrier();
        ACCESS_ONCE(b) = p;  /* BUG: No ordering vs. load from a!!! */
        if (q) {
@@ -685,7 +688,7 @@ memory barriers, for example, smp_store_release():
 In contrast, without explicit memory barriers, two-legged-if control
 ordering is guaranteed only when the stores differ, for example:
 
-       q = ACCESS_ONCE(a);
+       q = READ_ONCE_CTRL(a);
        if (q) {
                ACCESS_ONCE(b) = p;
                do_something();
@@ -694,14 +697,14 @@ ordering is guaranteed only when the stores differ, for example:
                do_something_else();
        }
 
-The initial ACCESS_ONCE() is still required to prevent the compiler from
-proving the value of 'a'.
+The initial READ_ONCE_CTRL() is still required to prevent the compiler
+from proving the value of 'a'.
 
 In addition, you need to be careful what you do with the local variable 'q',
 otherwise the compiler might be able to guess the value and again remove
 the needed conditional.  For example:
 
-       q = ACCESS_ONCE(a);
+       q = READ_ONCE_CTRL(a);
        if (q % MAX) {
                ACCESS_ONCE(b) = p;
                do_something();
@@ -714,7 +717,7 @@ If MAX is defined to be 1, then the compiler knows that (q % MAX) is
 equal to zero, in which case the compiler is within its rights to
 transform the above code into the following:
 
-       q = ACCESS_ONCE(a);
+       q = READ_ONCE_CTRL(a);
        ACCESS_ONCE(b) = p;
        do_something_else();
 
@@ -725,7 +728,7 @@ is gone, and the barrier won't bring it back.  Therefore, if you are
 relying on this ordering, you should make sure that MAX is greater than
 one, perhaps as follows:
 
-       q = ACCESS_ONCE(a);
+       q = READ_ONCE_CTRL(a);
        BUILD_BUG_ON(MAX <= 1); /* Order load from a with store to b. */
        if (q % MAX) {
                ACCESS_ONCE(b) = p;
@@ -742,14 +745,15 @@ of the 'if' statement.
 You must also be careful not to rely too much on boolean short-circuit
 evaluation.  Consider this example:
 
-       q = ACCESS_ONCE(a);
+       q = READ_ONCE_CTRL(a);
        if (a || 1 > 0)
                ACCESS_ONCE(b) = 1;
 
-Because the second condition is always true, the compiler can transform
-this example as following, defeating control dependency:
+Because the first condition cannot fault and the second condition is
+always true, the compiler can transform this example as following,
+defeating control dependency:
 
-       q = ACCESS_ONCE(a);
+       q = READ_ONCE_CTRL(a);
        ACCESS_ONCE(b) = 1;
 
 This example underscores the need to ensure that the compiler cannot
@@ -762,8 +766,8 @@ demonstrated by two related examples, with the initial values of
 x and y both being zero:
 
        CPU 0                     CPU 1
-       =====================     =====================
-       r1 = ACCESS_ONCE(x);      r2 = ACCESS_ONCE(y);
+       =======================   =======================
+       r1 = READ_ONCE_CTRL(x);   r2 = READ_ONCE_CTRL(y);
        if (r1 > 0)               if (r2 > 0)
          ACCESS_ONCE(y) = 1;       ACCESS_ONCE(x) = 1;
 
@@ -783,7 +787,8 @@ But because control dependencies do -not- provide transitivity, the above
 assertion can fail after the combined three-CPU example completes.  If you
 need the three-CPU example to provide ordering, you will need smp_mb()
 between the loads and stores in the CPU 0 and CPU 1 code fragments,
-that is, just before or just after the "if" statements.
+that is, just before or just after the "if" statements.  Furthermore,
+the original two-CPU example is very fragile and should be avoided.
 
 These two examples are the LB and WWC litmus tests from this paper:
 http://www.cl.cam.ac.uk/users/pes20/ppc-supplemental/test6.pdf and this
@@ -791,6 +796,12 @@ site: https://www.cl.cam.ac.uk/~pes20/ppcmem/index.html.
 
 In summary:
 
+  (*) Control dependencies must be headed by READ_ONCE_CTRL().
+      Or, as a much less preferable alternative, interpose
+      be headed by READ_ONCE() or an ACCESS_ONCE() read and must
+      have smp_read_barrier_depends() between this read and the
+      control-dependent write.
+
   (*) Control dependencies can order prior loads against later stores.
       However, they do -not- guarantee any other sort of ordering:
       Not prior loads against later loads, nor prior stores against
@@ -1662,7 +1673,7 @@ CPU from reordering them.
 
 There are some more advanced barrier functions:
 
- (*) set_mb(var, value)
+ (*) smp_store_mb(var, value)
 
      This assigns the value to the variable and then inserts a full memory
      barrier after it, depending on the function.  It isn't guaranteed to
@@ -1784,10 +1795,9 @@ for each construct.  These operations all imply certain barriers:
 
      Memory operations issued before the ACQUIRE may be completed after
      the ACQUIRE operation has completed.  An smp_mb__before_spinlock(),
-     combined with a following ACQUIRE, orders prior loads against
-     subsequent loads and stores and also orders prior stores against
-     subsequent stores.  Note that this is weaker than smp_mb()!  The
-     smp_mb__before_spinlock() primitive is free on many architectures.
+     combined with a following ACQUIRE, orders prior stores against
+     subsequent loads and stores. Note that this is weaker than smp_mb()!
+     The smp_mb__before_spinlock() primitive is free on many architectures.
 
  (2) RELEASE operation implication:
 
@@ -1975,7 +1985,7 @@ after it has altered the task state:
        CPU 1
        ===============================
        set_current_state();
-         set_mb();
+         smp_store_mb();
            STORE current->state
            <general barrier>
        LOAD event_indicated
@@ -2016,7 +2026,7 @@ between the STORE to indicate the event and the STORE to set TASK_RUNNING:
        CPU 1                           CPU 2
        =============================== ===============================
        set_current_state();            STORE event_indicated
-         set_mb();                     wake_up();
+         smp_store_mb();               wake_up();
            STORE current->state          <write barrier>
            <general barrier>             STORE current->state
        LOAD event_indicated
index d727a38..53a7268 100644 (file)
@@ -20,7 +20,7 @@
        files/UDP-Lite-HOWTO.txt
 
    o The Wireshark UDP-Lite WiKi (with capture files):
-       http://wiki.wireshark.org/Lightweight_User_Datagram_Protocol
+       https://wiki.wireshark.org/Lightweight_User_Datagram_Protocol
 
    o The Protocol Spec, RFC 3828, http://www.ietf.org/rfc/rfc3828.txt
 
index 21461a0..e114513 100644 (file)
@@ -8,6 +8,10 @@ CONTENTS
  1. Overview
  2. Scheduling algorithm
  3. Scheduling Real-Time Tasks
+   3.1 Definitions
+   3.2 Schedulability Analysis for Uniprocessor Systems
+   3.3 Schedulability Analysis for Multiprocessor Systems
+   3.4 Relationship with SCHED_DEADLINE Parameters
  4. Bandwidth management
    4.1 System-wide settings
    4.2 Task interface
@@ -43,7 +47,7 @@ CONTENTS
  "deadline", to schedule tasks. A SCHED_DEADLINE task should receive
  "runtime" microseconds of execution time every "period" microseconds, and
  these "runtime" microseconds are available within "deadline" microseconds
- from the beginning of the period.  In order to implement this behaviour,
+ from the beginning of the period.  In order to implement this behavior,
  every time the task wakes up, the scheduler computes a "scheduling deadline"
  consistent with the guarantee (using the CBS[2,3] algorithm). Tasks are then
  scheduled using EDF[1] on these scheduling deadlines (the task with the
@@ -52,7 +56,7 @@ CONTENTS
  "admission control" strategy (see Section "4. Bandwidth management") is used
  (clearly, if the system is overloaded this guarantee cannot be respected).
 
- Summing up, the CBS[2,3] algorithms assigns scheduling deadlines to tasks so
+ Summing up, the CBS[2,3] algorithm assigns scheduling deadlines to tasks so
  that each task runs for at most its runtime every period, avoiding any
  interference between different tasks (bandwidth isolation), while the EDF[1]
  algorithm selects the task with the earliest scheduling deadline as the one
@@ -63,7 +67,7 @@ CONTENTS
  In more details, the CBS algorithm assigns scheduling deadlines to
  tasks in the following way:
 
-  - Each SCHED_DEADLINE task is characterised by the "runtime",
+  - Each SCHED_DEADLINE task is characterized by the "runtime",
     "deadline", and "period" parameters;
 
   - The state of the task is described by a "scheduling deadline", and
@@ -78,7 +82,7 @@ CONTENTS
 
     then, if the scheduling deadline is smaller than the current time, or
     this condition is verified, the scheduling deadline and the
-    remaining runtime are re-initialised as
+    remaining runtime are re-initialized as
 
          scheduling deadline = current time + deadline
          remaining runtime = runtime
@@ -126,31 +130,37 @@ CONTENTS
  suited for periodic or sporadic real-time tasks that need guarantees on their
  timing behavior, e.g., multimedia, streaming, control applications, etc.
 
+3.1 Definitions
+------------------------
+
  A typical real-time task is composed of a repetition of computation phases
  (task instances, or jobs) which are activated on a periodic or sporadic
  fashion.
- Each job J_j (where J_j is the j^th job of the task) is characterised by an
+ Each job J_j (where J_j is the j^th job of the task) is characterized by an
  arrival time r_j (the time when the job starts), an amount of computation
  time c_j needed to finish the job, and a job absolute deadline d_j, which
  is the time within which the job should be finished. The maximum execution
- time max_j{c_j} is called "Worst Case Execution Time" (WCET) for the task.
+ time max{c_j} is called "Worst Case Execution Time" (WCET) for the task.
  A real-time task can be periodic with period P if r_{j+1} = r_j + P, or
  sporadic with minimum inter-arrival time P is r_{j+1} >= r_j + P. Finally,
  d_j = r_j + D, where D is the task's relative deadline.
- The utilisation of a real-time task is defined as the ratio between its
+ Summing up, a real-time task can be described as
+       Task = (WCET, D, P)
+
+ The utilization of a real-time task is defined as the ratio between its
  WCET and its period (or minimum inter-arrival time), and represents
  the fraction of CPU time needed to execute the task.
 
- If the total utilisation sum_i(WCET_i/P_i) is larger than M (with M equal
+ If the total utilization U=sum(WCET_i/P_i) is larger than M (with M equal
  to the number of CPUs), then the scheduler is unable to respect all the
  deadlines.
- Note that total utilisation is defined as the sum of the utilisations
+ Note that total utilization is defined as the sum of the utilizations
  WCET_i/P_i over all the real-time tasks in the system. When considering
  multiple real-time tasks, the parameters of the i-th task are indicated
  with the "_i" suffix.
- Moreover, if the total utilisation is larger than M, then we risk starving
+ Moreover, if the total utilization is larger than M, then we risk starving
  non- real-time tasks by real-time tasks.
- If, instead, the total utilisation is smaller than M, then non real-time
+ If, instead, the total utilization is smaller than M, then non real-time
  tasks will not be starved and the system might be able to respect all the
  deadlines.
  As a matter of fact, in this case it is possible to provide an upper bound
@@ -159,38 +169,119 @@ CONTENTS
  More precisely, it can be proven that using a global EDF scheduler the
  maximum tardiness of each task is smaller or equal than
        ((M âˆ’ 1) Â· WCET_max âˆ’ WCET_min)/(M âˆ’ (M âˆ’ 2) Â· U_max) + WCET_max
- where WCET_max = max_i{WCET_i} is the maximum WCET, WCET_min=min_i{WCET_i}
- is the minimum WCET, and U_max = max_i{WCET_i/P_i} is the maximum utilisation.
+ where WCET_max = max{WCET_i} is the maximum WCET, WCET_min=min{WCET_i}
+ is the minimum WCET, and U_max = max{WCET_i/P_i} is the maximum
+ utilization[12].
+
+3.2 Schedulability Analysis for Uniprocessor Systems
+------------------------
 
  If M=1 (uniprocessor system), or in case of partitioned scheduling (each
  real-time task is statically assigned to one and only one CPU), it is
  possible to formally check if all the deadlines are respected.
  If D_i = P_i for all tasks, then EDF is able to respect all the deadlines
- of all the tasks executing on a CPU if and only if the total utilisation
+ of all the tasks executing on a CPU if and only if the total utilization
  of the tasks running on such a CPU is smaller or equal than 1.
  If D_i != P_i for some task, then it is possible to define the density of
- a task as C_i/min{D_i,T_i}, and EDF is able to respect all the deadlines
- of all the tasks running on a CPU if the sum sum_i C_i/min{D_i,T_i} of the
- densities of the tasks running on such a CPU is smaller or equal than 1
- (notice that this condition is only sufficient, and not necessary).
+ a task as WCET_i/min{D_i,P_i}, and EDF is able to respect all the deadlines
+ of all the tasks running on a CPU if the sum of the densities of the tasks
+ running on such a CPU is smaller or equal than 1:
+       sum(WCET_i / min{D_i, P_i}) <= 1
+ It is important to notice that this condition is only sufficient, and not
+ necessary: there are task sets that are schedulable, but do not respect the
+ condition. For example, consider the task set {Task_1,Task_2} composed by
+ Task_1=(50ms,50ms,100ms) and Task_2=(10ms,100ms,100ms).
+ EDF is clearly able to schedule the two tasks without missing any deadline
+ (Task_1 is scheduled as soon as it is released, and finishes just in time
+ to respect its deadline; Task_2 is scheduled immediately after Task_1, hence
+ its response time cannot be larger than 50ms + 10ms = 60ms) even if
+       50 / min{50,100} + 10 / min{100, 100} = 50 / 50 + 10 / 100 = 1.1
+ Of course it is possible to test the exact schedulability of tasks with
+ D_i != P_i (checking a condition that is both sufficient and necessary),
+ but this cannot be done by comparing the total utilization or density with
+ a constant. Instead, the so called "processor demand" approach can be used,
+ computing the total amount of CPU time h(t) needed by all the tasks to
+ respect all of their deadlines in a time interval of size t, and comparing
+ such a time with the interval size t. If h(t) is smaller than t (that is,
+ the amount of time needed by the tasks in a time interval of size t is
+ smaller than the size of the interval) for all the possible values of t, then
+ EDF is able to schedule the tasks respecting all of their deadlines. Since
+ performing this check for all possible values of t is impossible, it has been
+ proven[4,5,6] that it is sufficient to perform the test for values of t
+ between 0 and a maximum value L. The cited papers contain all of the
+ mathematical details and explain how to compute h(t) and L.
+ In any case, this kind of analysis is too complex as well as too
+ time-consuming to be performed on-line. Hence, as explained in Section
+ 4 Linux uses an admission test based on the tasks' utilizations.
+
+3.3 Schedulability Analysis for Multiprocessor Systems
+------------------------
 
  On multiprocessor systems with global EDF scheduling (non partitioned
  systems), a sufficient test for schedulability can not be based on the
- utilisations (it can be shown that task sets with utilisations slightly
- larger than 1 can miss deadlines regardless of the number of CPUs M).
- However, as previously stated, enforcing that the total utilisation is smaller
- than M is enough to guarantee that non real-time tasks are not starved and
- that the tardiness of real-time tasks has an upper bound.
+ utilizations or densities: it can be shown that even if D_i = P_i task
+ sets with utilizations slightly larger than 1 can miss deadlines regardless
+ of the number of CPUs.
+
+ Consider a set {Task_1,...Task_{M+1}} of M+1 tasks on a system with M
+ CPUs, with the first task Task_1=(P,P,P) having period, relative deadline
+ and WCET equal to P. The remaining M tasks Task_i=(e,P-1,P-1) have an
+ arbitrarily small worst case execution time (indicated as "e" here) and a
+ period smaller than the one of the first task. Hence, if all the tasks
+ activate at the same time t, global EDF schedules these M tasks first
+ (because their absolute deadlines are equal to t + P - 1, hence they are
+ smaller than the absolute deadline of Task_1, which is t + P). As a
+ result, Task_1 can be scheduled only at time t + e, and will finish at
+ time t + e + P, after its absolute deadline. The total utilization of the
+ task set is U = M Â· e / (P - 1) + P / P = M Â· e / (P - 1) + 1, and for small
+ values of e this can become very close to 1. This is known as "Dhall's
+ effect"[7]. Note: the example in the original paper by Dhall has been
+ slightly simplified here (for example, Dhall more correctly computed
+ lim_{e->0}U).
+
+ More complex schedulability tests for global EDF have been developed in
+ real-time literature[8,9], but they are not based on a simple comparison
+ between total utilization (or density) and a fixed constant. If all tasks
+ have D_i = P_i, a sufficient schedulability condition can be expressed in
+ a simple way:
+       sum(WCET_i / P_i) <= M - (M - 1) Â· U_max
+ where U_max = max{WCET_i / P_i}[10]. Notice that for U_max = 1,
+ M - (M - 1) Â· U_max becomes M - M + 1 = 1 and this schedulability condition
+ just confirms the Dhall's effect. A more complete survey of the literature
+ about schedulability tests for multi-processor real-time scheduling can be
+ found in [11].
+
+ As seen, enforcing that the total utilization is smaller than M does not
+ guarantee that global EDF schedules the tasks without missing any deadline
+ (in other words, global EDF is not an optimal scheduling algorithm). However,
+ a total utilization smaller than M is enough to guarantee that non real-time
+ tasks are not starved and that the tardiness of real-time tasks has an upper
+ bound[12] (as previously noted). Different bounds on the maximum tardiness
+ experienced by real-time tasks have been developed in various papers[13,14],
+ but the theoretical result that is important for SCHED_DEADLINE is that if
+ the total utilization is smaller or equal than M then the response times of
+ the tasks are limited.
+
+3.4 Relationship with SCHED_DEADLINE Parameters
+------------------------
 
- SCHED_DEADLINE can be used to schedule real-time tasks guaranteeing that
- the jobs' deadlines of a task are respected. In order to do this, a task
- must be scheduled by setting:
+ Finally, it is important to understand the relationship between the
+ SCHED_DEADLINE scheduling parameters described in Section 2 (runtime,
+ deadline and period) and the real-time task parameters (WCET, D, P)
+ described in this section. Note that the tasks' temporal constraints are
+ represented by its absolute deadlines d_j = r_j + D described above, while
+ SCHED_DEADLINE schedules the tasks according to scheduling deadlines (see
+ Section 2).
+ If an admission test is used to guarantee that the scheduling deadlines
+ are respected, then SCHED_DEADLINE can be used to schedule real-time tasks
+ guaranteeing that all the jobs' deadlines of a task are respected.
+ In order to do this, a task must be scheduled by setting:
 
   - runtime >= WCET
   - deadline = D
   - period <= P
 
- IOW, if runtime >= WCET and if period is >= P, then the scheduling deadlines
+ IOW, if runtime >= WCET and if period is <= P, then the scheduling deadlines
  and the absolute deadlines (d_j) coincide, so a proper admission control
  allows to respect the jobs' absolute deadlines for this task (this is what is
  called "hard schedulability property" and is an extension of Lemma 1 of [2]).
@@ -206,6 +297,39 @@ CONTENTS
       Symposium, 1998. http://retis.sssup.it/~giorgio/paps/1998/rtss98-cbs.pdf
   3 - L. Abeni. Server Mechanisms for Multimedia Applications. ReTiS Lab
       Technical Report. http://disi.unitn.it/~abeni/tr-98-01.pdf
+  4 - J. Y. Leung and M.L. Merril. A Note on Preemptive Scheduling of
+      Periodic, Real-Time Tasks. Information Processing Letters, vol. 11,
+      no. 3, pp. 115-118, 1980.
+  5 - S. K. Baruah, A. K. Mok and L. E. Rosier. Preemptively Scheduling
+      Hard-Real-Time Sporadic Tasks on One Processor. Proceedings of the
+      11th IEEE Real-time Systems Symposium, 1990.
+  6 - S. K. Baruah, L. E. Rosier and R. R. Howell. Algorithms and Complexity
+      Concerning the Preemptive Scheduling of Periodic Real-Time tasks on
+      One Processor. Real-Time Systems Journal, vol. 4, no. 2, pp 301-324,
+      1990.
+  7 - S. J. Dhall and C. L. Liu. On a real-time scheduling problem. Operations
+      research, vol. 26, no. 1, pp 127-140, 1978.
+  8 - T. Baker. Multiprocessor EDF and Deadline Monotonic Schedulability
+      Analysis. Proceedings of the 24th IEEE Real-Time Systems Symposium, 2003.
+  9 - T. Baker. An Analysis of EDF Schedulability on a Multiprocessor.
+      IEEE Transactions on Parallel and Distributed Systems, vol. 16, no. 8,
+      pp 760-768, 2005.
+  10 - J. Goossens, S. Funk and S. Baruah, Priority-Driven Scheduling of
+       Periodic Task Systems on Multiprocessors. Real-Time Systems Journal,
+       vol. 25, no. 2–3, pp. 187–205, 2003.
+  11 - R. Davis and A. Burns. A Survey of Hard Real-Time Scheduling for
+       Multiprocessor Systems. ACM Computing Surveys, vol. 43, no. 4, 2011.
+       http://www-users.cs.york.ac.uk/~robdavis/papers/MPSurveyv5.0.pdf
+  12 - U. C. Devi and J. H. Anderson. Tardiness Bounds under Global EDF
+       Scheduling on a Multiprocessor. Real-Time Systems Journal, vol. 32,
+       no. 2, pp 133-189, 2008.
+  13 - P. Valente and G. Lipari. An Upper Bound to the Lateness of Soft
+       Real-Time Tasks Scheduled by EDF on Multiprocessors. Proceedings of
+       the 26th IEEE Real-Time Systems Symposium, 2005.
+  14 - J. Erickson, U. Devi and S. Baruah. Improved tardiness bounds for
+       Global EDF. Proceedings of the 22nd Euromicro Conference on
+       Real-Time Systems, 2010.
+
 
 4. Bandwidth management
 =======================
@@ -218,10 +342,10 @@ CONTENTS
  no guarantee can be given on the actual scheduling of the -deadline tasks.
 
  As already stated in Section 3, a necessary condition to be respected to
- correctly schedule a set of real-time tasks is that the total utilisation
+ correctly schedule a set of real-time tasks is that the total utilization
  is smaller than M. When talking about -deadline tasks, this requires that
  the sum of the ratio between runtime and period for all tasks is smaller
- than M. Notice that the ratio runtime/period is equivalent to the utilisation
+ than M. Notice that the ratio runtime/period is equivalent to the utilization
  of a "traditional" real-time task, and is also often referred to as
  "bandwidth".
  The interface used to control the CPU bandwidth that can be allocated
@@ -251,7 +375,7 @@ CONTENTS
  The system wide settings are configured under the /proc virtual file system.
 
  For now the -rt knobs are used for -deadline admission control and the
- -deadline runtime is accounted against the -rt runtime. We realise that this
+ -deadline runtime is accounted against the -rt runtime. We realize that this
  isn't entirely desirable; however, it is better to have a small interface for
  now, and be able to change it easily later. The ideal situation (see 5.) is to
  run -rt tasks from a -deadline server; in which case the -rt bandwidth is a
index 1e52d67..dbe6623 100644 (file)
@@ -198,6 +198,9 @@ TTY_IO_ERROR                If set, causes all subsequent userspace read/write
 
 TTY_OTHER_CLOSED       Device is a pty and the other side has closed.
 
+TTY_OTHER_DONE         Device is a pty and the other side has closed and
+                       all pending input processing has been completed.
+
 TTY_NO_WRITE_SPLIT     Prevent driver from splitting up writes into
                        smaller chunks.
 
index 43e94ea..263b907 100644 (file)
@@ -15,8 +15,7 @@ Contents:
   a) Discovering and configuring TCMU uio devices
   b) Waiting for events on the device(s)
   c) Managing the command ring
-3) Command filtering and pass_level
-4) A final note
+3) A final note
 
 
 TCM Userspace Design
@@ -324,7 +323,7 @@ int handle_device_events(int fd, void *map)
   /* Process events from cmd ring until we catch up with cmd_head */
   while (ent != (void *)mb + mb->cmdr_off + mb->cmd_head) {
 
-    if (tcmu_hdr_get_op(&ent->hdr) == TCMU_OP_CMD) {
+    if (tcmu_hdr_get_op(ent->hdr.len_op) == TCMU_OP_CMD) {
       uint8_t *cdb = (void *)mb + ent->req.cdb_off;
       bool success = true;
 
@@ -339,8 +338,12 @@ int handle_device_events(int fd, void *map)
         ent->rsp.scsi_status = SCSI_CHECK_CONDITION;
       }
     }
+    else if (tcmu_hdr_get_op(ent->hdr.len_op) != TCMU_OP_PAD) {
+      /* Tell the kernel we didn't handle unknown opcodes */
+      ent->hdr.uflags |= TCMU_UFLAG_UNKNOWN_OP;
+    }
     else {
-      /* Do nothing for PAD entries */
+      /* Do nothing for PAD entries except update cmd_tail */
     }
 
     /* update cmd_tail */
@@ -360,28 +363,6 @@ int handle_device_events(int fd, void *map)
 }
 
 
-Command filtering and pass_level
---------------------------------
-
-TCMU supports a "pass_level" option with valid values of 0 or 1.  When
-the value is 0 (the default), nearly all SCSI commands received for
-the device are passed through to the handler. This allows maximum
-flexibility but increases the amount of code required by the handler,
-to support all mandatory SCSI commands. If pass_level is set to 1,
-then only IO-related commands are presented, and the rest are handled
-by LIO's in-kernel command emulation. The commands presented at level
-1 include all versions of:
-
-READ
-WRITE
-WRITE_VERIFY
-XDWRITEREAD
-WRITE_SAME
-COMPARE_AND_WRITE
-SYNCHRONIZE_CACHE
-UNMAP
-
-
 A final note
 ------------
 
index 53838d9..c59bd9b 100644 (file)
@@ -169,6 +169,10 @@ Shadow pages contain the following information:
     Contains the value of cr4.smep && !cr0.wp for which the page is valid
     (pages for which this is true are different from other pages; see the
     treatment of cr0.wp=0 below).
+  role.smap_andnot_wp:
+    Contains the value of cr4.smap && !cr0.wp for which the page is valid
+    (pages for which this is true are different from other pages; see the
+    treatment of cr0.wp=0 below).
   gfn:
     Either the guest page table containing the translations shadowed by this
     page, or the base page frame for linear translations.  See role.direct.
@@ -344,10 +348,16 @@ on fault type:
 
 (user write faults generate a #PF)
 
-In the first case there is an additional complication if CR4.SMEP is
-enabled: since we've turned the page into a kernel page, the kernel may now
-execute it.  We handle this by also setting spte.nx.  If we get a user
-fetch or read fault, we'll change spte.u=1 and spte.nx=gpte.nx back.
+In the first case there are two additional complications:
+- if CR4.SMEP is enabled: since we've turned the page into a kernel page,
+  the kernel may now execute it.  We handle this by also setting spte.nx.
+  If we get a user fetch or read fault, we'll change spte.u=1 and
+  spte.nx=gpte.nx back.
+- if CR4.SMAP is disabled: since the page has been changed to a kernel
+  page, it can not be reused when CR4.SMAP is enabled. We set
+  CR4.SMAP && !CR0.WP into shadow page's role to avoid this case. Note,
+  here we do not care the case that CR4.SMAP is enabled since KVM will
+  directly inject #PF to guest due to failed permission check.
 
 To prevent an spte that was converted into a kernel page with cr0.wp=0
 from being written by the kernel after cr0.wp has changed to 1, we make
index b399b34..4303a64 100644 (file)
@@ -51,9 +51,9 @@ trivial patch so apply some common sense.
        or does something very odd once a month document it.
 
        PLEASE remember that submissions must be made under the terms
-       of the OSDL certificate of contribution and should include a
-       Signed-off-by: line.  The current version of this "Developer's
-       Certificate of Origin" (DCO) is listed in the file
+       of the Linux Foundation certificate of contribution and should
+       include a Signed-off-by: line.  The current version of this
+       "Developer's Certificate of Origin" (DCO) is listed in the file
        Documentation/SubmittingPatches.
 
 6.     Make sure you have the right to send any changes you make. If you
@@ -892,11 +892,10 @@ S:        Maintained
 F:     arch/arm/mach-alpine/
 
 ARM/ATMEL AT91RM9200 AND AT91SAM ARM ARCHITECTURES
-M:     Andrew Victor <linux@maxim.org.za>
 M:     Nicolas Ferre <nicolas.ferre@atmel.com>
+M:     Alexandre Belloni <alexandre.belloni@free-electrons.com>
 M:     Jean-Christophe Plagniol-Villard <plagnioj@jcrosoft.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-W:     http://maxim.org.za/at91_26.html
 W:     http://www.linux4sam.org
 S:     Supported
 F:     arch/arm/mach-at91/
@@ -975,7 +974,7 @@ S:  Maintained
 ARM/CORTINA SYSTEMS GEMINI ARM ARCHITECTURE
 M:     Hans Ulli Kroll <ulli.kroll@googlemail.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-T:     git git://git.berlios.de/gemini-board
+T:     git git://github.com/ulli-kroll/linux.git
 S:     Maintained
 F:     arch/arm/mach-gemini/
 
@@ -990,6 +989,12 @@ F: drivers/clocksource/timer-prima2.c
 F:     drivers/clocksource/timer-atlas7.c
 N:     [^a-z]sirf
 
+ARM/CONEXANT DIGICOLOR MACHINE SUPPORT
+M:     Baruch Siach <baruch@tkos.co.il>
+L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+S:     Maintained
+N:     digicolor
+
 ARM/EBSA110 MACHINE SUPPORT
 M:     Russell King <linux@arm.linux.org.uk>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@ -1188,7 +1193,7 @@ ARM/MAGICIAN MACHINE SUPPORT
 M:     Philipp Zabel <philipp.zabel@gmail.com>
 S:     Maintained
 
-ARM/Marvell Armada 370 and Armada XP SOC support
+ARM/Marvell Kirkwood and Armada 370, 375, 38x, XP SOC support
 M:     Jason Cooper <jason@lakedaemon.net>
 M:     Andrew Lunn <andrew@lunn.ch>
 M:     Gregory Clement <gregory.clement@free-electrons.com>
@@ -1197,12 +1202,17 @@ L:      linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 F:     arch/arm/mach-mvebu/
 F:     drivers/rtc/rtc-armada38x.c
+F:     arch/arm/boot/dts/armada*
+F:     arch/arm/boot/dts/kirkwood*
+
 
 ARM/Marvell Berlin SoC support
 M:     Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 F:     arch/arm/mach-berlin/
+F:     arch/arm/boot/dts/berlin*
+
 
 ARM/Marvell Dove/MV78xx0/Orion SOC support
 M:     Jason Cooper <jason@lakedaemon.net>
@@ -1215,6 +1225,9 @@ F:        arch/arm/mach-dove/
 F:     arch/arm/mach-mv78xx0/
 F:     arch/arm/mach-orion5x/
 F:     arch/arm/plat-orion/
+F:     arch/arm/boot/dts/dove*
+F:     arch/arm/boot/dts/orion5x*
+
 
 ARM/Orion SoC/Technologic Systems TS-78xx platform support
 M:     Alexander Clouter <alex@digriz.org.uk>
@@ -1366,6 +1379,7 @@ N:        rockchip
 
 ARM/SAMSUNG EXYNOS ARM ARCHITECTURES
 M:     Kukjin Kim <kgene@kernel.org>
+M:     Krzysztof Kozlowski <k.kozlowski@samsung.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 L:     linux-samsung-soc@vger.kernel.org (moderated for non-subscribers)
 S:     Maintained
@@ -1439,9 +1453,10 @@ ARM/SOCFPGA ARCHITECTURE
 M:     Dinh Nguyen <dinguyen@opensource.altera.com>
 S:     Maintained
 F:     arch/arm/mach-socfpga/
+F:     arch/arm/boot/dts/socfpga*
+F:     arch/arm/configs/socfpga_defconfig
 W:     http://www.rocketboards.org
-T:     git://git.rocketboards.org/linux-socfpga.git
-T:     git://git.rocketboards.org/linux-socfpga-next.git
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/dinguyen/linux.git
 
 ARM/SOCFPGA CLOCK FRAMEWORK SUPPORT
 M:     Dinh Nguyen <dinguyen@opensource.altera.com>
@@ -1929,7 +1944,7 @@ S:        Maintained
 F:     drivers/net/wireless/b43legacy/
 
 BACKLIGHT CLASS/SUBSYSTEM
-M:     Jingoo Han <jg1.han@samsung.com>
+M:     Jingoo Han <jingoohan1@gmail.com>
 M:     Lee Jones <lee.jones@linaro.org>
 S:     Maintained
 F:     drivers/video/backlight/
@@ -2116,8 +2131,9 @@ S:        Supported
 F:     drivers/net/ethernet/broadcom/bnx2x/
 
 BROADCOM BCM281XX/BCM11XXX/BCM216XX ARM ARCHITECTURE
-M:     Christian Daudt <bcm@fixthebug.org>
 M:     Florian Fainelli <f.fainelli@gmail.com>
+M:     Ray Jui <rjui@broadcom.com>
+M:     Scott Branden <sbranden@broadcom.com>
 L:     bcm-kernel-feedback-list@broadcom.com
 T:     git git://github.com/broadcom/mach-bcm
 S:     Maintained
@@ -2168,7 +2184,6 @@ S:        Maintained
 F:     drivers/usb/gadget/udc/bcm63xx_udc.*
 
 BROADCOM BCM7XXX ARM ARCHITECTURE
-M:     Marc Carino <marc.ceeeee@gmail.com>
 M:     Brian Norris <computersforpeace@gmail.com>
 M:     Gregory Fong <gregory.0xf0@gmail.com>
 M:     Florian Fainelli <f.fainelli@gmail.com>
@@ -2412,7 +2427,6 @@ L:        linux-security-module@vger.kernel.org
 S:     Supported
 F:     include/linux/capability.h
 F:     include/uapi/linux/capability.h
-F:     security/capability.c
 F:     security/commoncap.c
 F:     kernel/capability.c
 
@@ -3810,10 +3824,11 @@ M:      David Woodhouse <dwmw2@infradead.org>
 L:     linux-embedded@vger.kernel.org
 S:     Maintained
 
-EMULEX LPFC FC SCSI DRIVER
-M:     James Smart <james.smart@emulex.com>
+EMULEX/AVAGO LPFC FC/FCOE SCSI DRIVER
+M:     James Smart <james.smart@avagotech.com>
+M:     Dick Kennedy <dick.kennedy@avagotech.com>
 L:     linux-scsi@vger.kernel.org
-W:     http://sourceforge.net/projects/lpfcxxxx
+W:     http://www.avagotech.com
 S:     Supported
 F:     drivers/scsi/lpfc/
 
@@ -3912,7 +3927,7 @@ F:        drivers/extcon/
 F:     Documentation/extcon/
 
 EXYNOS DP DRIVER
-M:     Jingoo Han <jg1.han@samsung.com>
+M:     Jingoo Han <jingoohan1@gmail.com>
 L:     dri-devel@lists.freedesktop.org
 S:     Maintained
 F:     drivers/gpu/drm/exynos/exynos_dp*
@@ -4371,11 +4386,10 @@ F:      fs/gfs2/
 F:     include/uapi/linux/gfs2_ondisk.h
 
 GIGASET ISDN DRIVERS
-M:     Hansjoerg Lipp <hjlipp@web.de>
-M:     Tilman Schmidt <tilman@imap.cc>
+M:     Paul Bolle <pebolle@tiscali.nl>
 L:     gigaset307x-common@lists.sourceforge.net
 W:     http://gigaset307x.sourceforge.net/
-S:     Maintained
+S:     Odd Fixes
 F:     Documentation/isdn/README.gigaset
 F:     drivers/isdn/gigaset/
 F:     include/uapi/linux/gigaset_dev.h
@@ -4522,7 +4536,7 @@ M:        Jean Delvare <jdelvare@suse.de>
 M:     Guenter Roeck <linux@roeck-us.net>
 L:     lm-sensors@lm-sensors.org
 W:     http://www.lm-sensors.org/
-T:     quilt kernel.org/pub/linux/kernel/people/jdelvare/linux-2.6/jdelvare-hwmon/
+T:     quilt http://jdelvare.nerim.net/devel/linux/jdelvare-hwmon/
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging.git
 S:     Maintained
 F:     Documentation/hwmon/
@@ -5042,17 +5056,19 @@ S:      Orphan
 F:     drivers/video/fbdev/imsttfb.c
 
 INFINIBAND SUBSYSTEM
-M:     Roland Dreier <roland@kernel.org>
+M:     Doug Ledford <dledford@redhat.com>
 M:     Sean Hefty <sean.hefty@intel.com>
 M:     Hal Rosenstock <hal.rosenstock@gmail.com>
 L:     linux-rdma@vger.kernel.org
 W:     http://www.openfabrics.org/
 Q:     http://patchwork.kernel.org/project/linux-rdma/list/
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband.git
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma.git
 S:     Supported
 F:     Documentation/infiniband/
 F:     drivers/infiniband/
 F:     include/uapi/linux/if_infiniband.h
+F:     include/uapi/rdma/
+F:     include/rdma/
 
 INOTIFY
 M:     John McCutchan <john@johnmccutchan.com>
@@ -6951,6 +6967,17 @@ T:       git git://git.rocketboards.org/linux-socfpga-next.git
 S:     Maintained
 F:     arch/nios2/
 
+NOKIA N900 POWER SUPPLY DRIVERS
+M:     Pali Rohár <pali.rohar@gmail.com>
+S:     Maintained
+F:     include/linux/power/bq2415x_charger.h
+F:     include/linux/power/bq27x00_battery.h
+F:     include/linux/power/isp1704_charger.h
+F:     drivers/power/bq2415x_charger.c
+F:     drivers/power/bq27x00_battery.c
+F:     drivers/power/isp1704_charger.c
+F:     drivers/power/rx51_battery.c
+
 NTB DRIVER
 M:     Jon Mason <jdmason@kudzu.us>
 M:     Dave Jiang <dave.jiang@intel.com>
@@ -7539,7 +7566,7 @@ S:        Maintained
 F:     drivers/pci/host/*rcar*
 
 PCI DRIVER FOR SAMSUNG EXYNOS
-M:     Jingoo Han <jg1.han@samsung.com>
+M:     Jingoo Han <jingoohan1@gmail.com>
 L:     linux-pci@vger.kernel.org
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 L:     linux-samsung-soc@vger.kernel.org (moderated for non-subscribers)
@@ -7547,7 +7574,8 @@ S:        Maintained
 F:     drivers/pci/host/pci-exynos.c
 
 PCI DRIVER FOR SYNOPSIS DESIGNWARE
-M:     Jingoo Han <jg1.han@samsung.com>
+M:     Jingoo Han <jingoohan1@gmail.com>
+M:     Pratyush Anand <pratyush.anand@gmail.com>
 L:     linux-pci@vger.kernel.org
 S:     Maintained
 F:     drivers/pci/host/*designware*
@@ -7561,8 +7589,9 @@ F:        Documentation/devicetree/bindings/pci/host-generic-pci.txt
 F:     drivers/pci/host/pci-host-generic.c
 
 PCIE DRIVER FOR ST SPEAR13XX
+M:     Pratyush Anand <pratyush.anand@gmail.com>
 L:     linux-pci@vger.kernel.org
-S:     Orphan
+S:     Maintained
 F:     drivers/pci/host/*spear*
 
 PCMCIA SUBSYSTEM
@@ -7605,7 +7634,6 @@ F:        kernel/delayacct.c
 
 PERFORMANCE EVENTS SUBSYSTEM
 M:     Peter Zijlstra <a.p.zijlstra@chello.nl>
-M:     Paul Mackerras <paulus@samba.org>
 M:     Ingo Molnar <mingo@redhat.com>
 M:     Arnaldo Carvalho de Melo <acme@kernel.org>
 L:     linux-kernel@vger.kernel.org
@@ -8503,7 +8531,7 @@ S:        Supported
 F:     sound/soc/samsung/
 
 SAMSUNG FRAMEBUFFER DRIVER
-M:     Jingoo Han <jg1.han@samsung.com>
+M:     Jingoo Han <jingoohan1@gmail.com>
 L:     linux-fbdev@vger.kernel.org
 S:     Maintained
 F:     drivers/video/fbdev/s3c-fb.c
@@ -8802,16 +8830,19 @@ F:      drivers/misc/phantom.c
 F:     include/uapi/linux/phantom.h
 
 SERVER ENGINES 10Gbps iSCSI - BladeEngine 2 DRIVER
-M:     Jayamohan Kallickal <jayamohan.kallickal@emulex.com>
+M:     Jayamohan Kallickal <jayamohan.kallickal@avagotech.com>
+M:     Minh Tran <minh.tran@avagotech.com>
+M:     John Soni Jose <sony.john-n@avagotech.com>
 L:     linux-scsi@vger.kernel.org
-W:     http://www.emulex.com
+W:     http://www.avagotech.com
 S:     Supported
 F:     drivers/scsi/be2iscsi/
 
-SERVER ENGINES 10Gbps NIC - BladeEngine 2 DRIVER
-M:     Sathya Perla <sathya.perla@emulex.com>
-M:     Subbu Seetharaman <subbu.seetharaman@emulex.com>
-M:     Ajit Khaparde <ajit.khaparde@emulex.com>
+Emulex 10Gbps NIC BE2, BE3-R, Lancer, Skyhawk-R DRIVER
+M:     Sathya Perla <sathya.perla@avagotech.com>
+M:     Ajit Khaparde <ajit.khaparde@avagotech.com>
+M:     Padmanabh Ratnakar <padmanabh.ratnakar@avagotech.com>
+M:     Sriharsha Basavapatna <sriharsha.basavapatna@avagotech.com>
 L:     netdev@vger.kernel.org
 W:     http://www.emulex.com
 S:     Supported
@@ -10557,8 +10588,7 @@ F:      drivers/virtio/virtio_input.c
 F:     include/uapi/linux/virtio_input.h
 
 VIA RHINE NETWORK DRIVER
-M:     Roger Luethi <rl@hellgate.ch>
-S:     Maintained
+S:     Orphan
 F:     drivers/net/ethernet/via/via-rhine.c
 
 VIA SD/MMC CARD CONTROLLER DRIVER
index 2da553f..6c6f146 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 1
 SUBLEVEL = 0
-EXTRAVERSION = -rc2
+EXTRAVERSION =
 NAME = Hurr durr I'ma sheep
 
 # *DOCUMENTATION*
@@ -215,7 +215,6 @@ VPATH               := $(srctree)$(if $(KBUILD_EXTMOD),:$(KBUILD_EXTMOD))
 
 export srctree objtree VPATH
 
-
 # SUBARCH tells the usermode build what the underlying arch is.  That is set
 # first, and if a usermode build is happening, the "ARCH=um" on the command
 # line overrides the setting of ARCH below.  If a native build is happening,
@@ -1497,11 +1496,11 @@ image_name:
 # Clear a bunch of variables before executing the submake
 tools/: FORCE
        $(Q)mkdir -p $(objtree)/tools
-       $(Q)$(MAKE) LDFLAGS= MAKEFLAGS="$(filter --j% -j,$(MAKEFLAGS))" O=$(objtree) subdir=tools -C $(src)/tools/
+       $(Q)$(MAKE) LDFLAGS= MAKEFLAGS="$(filter --j% -j,$(MAKEFLAGS))" O=$(O) subdir=tools -C $(src)/tools/
 
 tools/%: FORCE
        $(Q)mkdir -p $(objtree)/tools
-       $(Q)$(MAKE) LDFLAGS= MAKEFLAGS="$(filter --j% -j,$(MAKEFLAGS))" O=$(objtree) subdir=tools -C $(src)/tools/ $*
+       $(Q)$(MAKE) LDFLAGS= MAKEFLAGS="$(filter --j% -j,$(MAKEFLAGS))" O=$(O) subdir=tools -C $(src)/tools/ $*
 
 # Single targets
 # ---------------------------------------------------------------------------
index cd14388..8399bd0 100644 (file)
@@ -14,6 +14,9 @@ targets               := vmlinux.gz vmlinux \
                   tools/bootpzh bootloader bootpheader bootpzheader 
 OBJSTRIP       := $(obj)/tools/objstrip
 
+HOSTCFLAGS     := -Wall -I$(objtree)/usr/include
+BOOTCFLAGS     += -I$(obj) -I$(srctree)/$(obj)
+
 # SRM bootable image.  Copy to offset 512 of a partition.
 $(obj)/bootimage: $(addprefix $(obj)/tools/,mkbb lxboot bootlx) $(obj)/vmlinux.nh
        ( cat $(obj)/tools/lxboot $(obj)/tools/bootlx $(obj)/vmlinux.nh ) > $@ 
@@ -96,13 +99,14 @@ $(obj)/tools/bootph: $(obj)/bootpheader $(OBJSTRIP) FORCE
 $(obj)/tools/bootpzh: $(obj)/bootpzheader $(OBJSTRIP) FORCE
        $(call if_changed,objstrip)
 
-LDFLAGS_bootloader   := -static -uvsprintf -T  #-N -relax
-LDFLAGS_bootpheader  := -static -uvsprintf -T  #-N -relax
-LDFLAGS_bootpzheader := -static -uvsprintf -T  #-N -relax
+LDFLAGS_bootloader   := -static -T # -N -relax
+LDFLAGS_bootloader   := -static -T # -N -relax
+LDFLAGS_bootpheader  := -static -T # -N -relax
+LDFLAGS_bootpzheader := -static -T # -N -relax
 
-OBJ_bootlx   := $(obj)/head.o $(obj)/main.o
-OBJ_bootph   := $(obj)/head.o $(obj)/bootp.o
-OBJ_bootpzh  := $(obj)/head.o $(obj)/bootpz.o $(obj)/misc.o
+OBJ_bootlx   := $(obj)/head.o $(obj)/stdio.o $(obj)/main.o
+OBJ_bootph   := $(obj)/head.o $(obj)/stdio.o $(obj)/bootp.o
+OBJ_bootpzh  := $(obj)/head.o $(obj)/stdio.o $(obj)/bootpz.o $(obj)/misc.o
 
 $(obj)/bootloader: $(obj)/bootloader.lds $(OBJ_bootlx) $(LIBS_Y) FORCE
        $(call if_changed,ld)
index 3baf2d1..dd6eb4a 100644 (file)
@@ -19,7 +19,6 @@
 
 #include "ksize.h"
 
-extern int vsprintf(char *, const char *, va_list);
 extern unsigned long switch_to_osf_pal(unsigned long nr,
        struct pcb_struct * pcb_va, struct pcb_struct * pcb_pa,
        unsigned long *vptb);
diff --git a/arch/alpha/boot/stdio.c b/arch/alpha/boot/stdio.c
new file mode 100644 (file)
index 0000000..f844dae
--- /dev/null
@@ -0,0 +1,306 @@
+/*
+ * Copyright (C) Paul Mackerras 1997.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <stdarg.h>
+#include <stddef.h>
+
+size_t strnlen(const char * s, size_t count)
+{
+       const char *sc;
+
+       for (sc = s; count-- && *sc != '\0'; ++sc)
+               /* nothing */;
+       return sc - s;
+}
+
+# define do_div(n, base) ({                                            \
+       unsigned int __base = (base);                                   \
+       unsigned int __rem;                                             \
+       __rem = ((unsigned long long)(n)) % __base;                     \
+       (n) = ((unsigned long long)(n)) / __base;                       \
+       __rem;                                                          \
+})
+
+
+static int skip_atoi(const char **s)
+{
+       int i, c;
+
+       for (i = 0; '0' <= (c = **s) && c <= '9'; ++*s)
+               i = i*10 + c - '0';
+       return i;
+}
+
+#define ZEROPAD        1               /* pad with zero */
+#define SIGN   2               /* unsigned/signed long */
+#define PLUS   4               /* show plus */
+#define SPACE  8               /* space if plus */
+#define LEFT   16              /* left justified */
+#define SPECIAL        32              /* 0x */
+#define LARGE  64              /* use 'ABCDEF' instead of 'abcdef' */
+
+static char * number(char * str, unsigned long long num, int base, int size, int precision, int type)
+{
+       char c,sign,tmp[66];
+       const char *digits="0123456789abcdefghijklmnopqrstuvwxyz";
+       int i;
+
+       if (type & LARGE)
+               digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+       if (type & LEFT)
+               type &= ~ZEROPAD;
+       if (base < 2 || base > 36)
+               return 0;
+       c = (type & ZEROPAD) ? '0' : ' ';
+       sign = 0;
+       if (type & SIGN) {
+               if ((signed long long)num < 0) {
+                       sign = '-';
+                       num = - (signed long long)num;
+                       size--;
+               } else if (type & PLUS) {
+                       sign = '+';
+                       size--;
+               } else if (type & SPACE) {
+                       sign = ' ';
+                       size--;
+               }
+       }
+       if (type & SPECIAL) {
+               if (base == 16)
+                       size -= 2;
+               else if (base == 8)
+                       size--;
+       }
+       i = 0;
+       if (num == 0)
+               tmp[i++]='0';
+       else while (num != 0) {
+               tmp[i++] = digits[do_div(num, base)];
+       }
+       if (i > precision)
+               precision = i;
+       size -= precision;
+       if (!(type&(ZEROPAD+LEFT)))
+               while(size-->0)
+                       *str++ = ' ';
+       if (sign)
+               *str++ = sign;
+       if (type & SPECIAL) {
+               if (base==8)
+                       *str++ = '0';
+               else if (base==16) {
+                       *str++ = '0';
+                       *str++ = digits[33];
+               }
+       }
+       if (!(type & LEFT))
+               while (size-- > 0)
+                       *str++ = c;
+       while (i < precision--)
+               *str++ = '0';
+       while (i-- > 0)
+               *str++ = tmp[i];
+       while (size-- > 0)
+               *str++ = ' ';
+       return str;
+}
+
+int vsprintf(char *buf, const char *fmt, va_list args)
+{
+       int len;
+       unsigned long long num;
+       int i, base;
+       char * str;
+       const char *s;
+
+       int flags;              /* flags to number() */
+
+       int field_width;        /* width of output field */
+       int precision;          /* min. # of digits for integers; max
+                                  number of chars for from string */
+       int qualifier;          /* 'h', 'l', or 'L' for integer fields */
+                               /* 'z' support added 23/7/1999 S.H.    */
+                               /* 'z' changed to 'Z' --davidm 1/25/99 */
+
+
+       for (str=buf ; *fmt ; ++fmt) {
+               if (*fmt != '%') {
+                       *str++ = *fmt;
+                       continue;
+               }
+
+               /* process flags */
+               flags = 0;
+               repeat:
+                       ++fmt;          /* this also skips first '%' */
+                       switch (*fmt) {
+                               case '-': flags |= LEFT; goto repeat;
+                               case '+': flags |= PLUS; goto repeat;
+                               case ' ': flags |= SPACE; goto repeat;
+                               case '#': flags |= SPECIAL; goto repeat;
+                               case '0': flags |= ZEROPAD; goto repeat;
+                               }
+
+               /* get field width */
+               field_width = -1;
+               if ('0' <= *fmt && *fmt <= '9')
+                       field_width = skip_atoi(&fmt);
+               else if (*fmt == '*') {
+                       ++fmt;
+                       /* it's the next argument */
+                       field_width = va_arg(args, int);
+                       if (field_width < 0) {
+                               field_width = -field_width;
+                               flags |= LEFT;
+                       }
+               }
+
+               /* get the precision */
+               precision = -1;
+               if (*fmt == '.') {
+                       ++fmt;
+                       if ('0' <= *fmt && *fmt <= '9')
+                               precision = skip_atoi(&fmt);
+                       else if (*fmt == '*') {
+                               ++fmt;
+                               /* it's the next argument */
+                               precision = va_arg(args, int);
+                       }
+                       if (precision < 0)
+                               precision = 0;
+               }
+
+               /* get the conversion qualifier */
+               qualifier = -1;
+               if (*fmt == 'l' && *(fmt + 1) == 'l') {
+                       qualifier = 'q';
+                       fmt += 2;
+               } else if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L'
+                       || *fmt == 'Z') {
+                       qualifier = *fmt;
+                       ++fmt;
+               }
+
+               /* default base */
+               base = 10;
+
+               switch (*fmt) {
+               case 'c':
+                       if (!(flags & LEFT))
+                               while (--field_width > 0)
+                                       *str++ = ' ';
+                       *str++ = (unsigned char) va_arg(args, int);
+                       while (--field_width > 0)
+                               *str++ = ' ';
+                       continue;
+
+               case 's':
+                       s = va_arg(args, char *);
+                       if (!s)
+                               s = "<NULL>";
+
+                       len = strnlen(s, precision);
+
+                       if (!(flags & LEFT))
+                               while (len < field_width--)
+                                       *str++ = ' ';
+                       for (i = 0; i < len; ++i)
+                               *str++ = *s++;
+                       while (len < field_width--)
+                               *str++ = ' ';
+                       continue;
+
+               case 'p':
+                       if (field_width == -1) {
+                               field_width = 2*sizeof(void *);
+                               flags |= ZEROPAD;
+                       }
+                       str = number(str,
+                               (unsigned long) va_arg(args, void *), 16,
+                               field_width, precision, flags);
+                       continue;
+
+
+               case 'n':
+                       if (qualifier == 'l') {
+                               long * ip = va_arg(args, long *);
+                               *ip = (str - buf);
+                       } else if (qualifier == 'Z') {
+                               size_t * ip = va_arg(args, size_t *);
+                               *ip = (str - buf);
+                       } else {
+                               int * ip = va_arg(args, int *);
+                               *ip = (str - buf);
+                       }
+                       continue;
+
+               case '%':
+                       *str++ = '%';
+                       continue;
+
+               /* integer number formats - set up the flags and "break" */
+               case 'o':
+                       base = 8;
+                       break;
+
+               case 'X':
+                       flags |= LARGE;
+               case 'x':
+                       base = 16;
+                       break;
+
+               case 'd':
+               case 'i':
+                       flags |= SIGN;
+               case 'u':
+                       break;
+
+               default:
+                       *str++ = '%';
+                       if (*fmt)
+                               *str++ = *fmt;
+                       else
+                               --fmt;
+                       continue;
+               }
+               if (qualifier == 'l') {
+                       num = va_arg(args, unsigned long);
+                       if (flags & SIGN)
+                               num = (signed long) num;
+               } else if (qualifier == 'q') {
+                       num = va_arg(args, unsigned long long);
+                       if (flags & SIGN)
+                               num = (signed long long) num;
+               } else if (qualifier == 'Z') {
+                       num = va_arg(args, size_t);
+               } else if (qualifier == 'h') {
+                       num = (unsigned short) va_arg(args, int);
+                       if (flags & SIGN)
+                               num = (signed short) num;
+               } else {
+                       num = va_arg(args, unsigned int);
+                       if (flags & SIGN)
+                               num = (signed int) num;
+               }
+               str = number(str, num, base, field_width, precision, flags);
+       }
+       *str = '\0';
+       return str-buf;
+}
+
+int sprintf(char * buf, const char *fmt, ...)
+{
+       va_list args;
+       int i;
+
+       va_start(args, fmt);
+       i=vsprintf(buf,fmt,args);
+       va_end(args);
+       return i;
+}
index 367d53d..dee8269 100644 (file)
@@ -27,6 +27,9 @@
 #include <linux/param.h>
 #ifdef __ELF__
 # include <linux/elf.h>
+# define elfhdr elf64_hdr
+# define elf_phdr elf64_phdr
+# define elf_check_arch(x) ((x)->e_machine == EM_ALPHA)
 #endif
 
 /* bootfile size must be multiple of BLOCK_SIZE: */
index 429e8cd..e511776 100644 (file)
@@ -66,6 +66,4 @@
 #undef __ASM__MB
 #undef ____cmpxchg
 
-#define __HAVE_ARCH_CMPXCHG 1
-
 #endif /* _ALPHA_CMPXCHG_H */
index f61e1a5..4cb4b6d 100644 (file)
@@ -2,6 +2,5 @@
 #define _ALPHA_TYPES_H
 
 #include <asm-generic/int-ll64.h>
-#include <uapi/asm/types.h>
 
 #endif /* _ALPHA_TYPES_H */
index c509d30..a56e608 100644 (file)
@@ -3,7 +3,7 @@
 
 #include <uapi/asm/unistd.h>
 
-#define NR_SYSCALLS                    511
+#define NR_SYSCALLS                    514
 
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_STAT64
index d214a03..aa33bf5 100644 (file)
 #define __NR_sched_setattr             508
 #define __NR_sched_getattr             509
 #define __NR_renameat2                 510
+#define __NR_getrandom                 511
+#define __NR_memfd_create              512
+#define __NR_execveat                  513
 
 #endif /* _UAPI_ALPHA_UNISTD_H */
index 253cf1a..51267ac 100644 (file)
@@ -6,7 +6,6 @@
  *     Error handling code supporting Alpha systems
  */
 
-#include <linux/init.h>
 #include <linux/sched.h>
 
 #include <asm/io.h>
index 7b2be25..51f2c86 100644 (file)
@@ -19,7 +19,6 @@
 #include <linux/ptrace.h>
 #include <linux/interrupt.h>
 #include <linux/random.h>
-#include <linux/init.h>
 #include <linux/irq.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
index e51f578..36dc91a 100644 (file)
@@ -1019,14 +1019,13 @@ SYSCALL_DEFINE2(osf_settimeofday, struct timeval32 __user *, tv,
        if (tv) {
                if (get_tv32((struct timeval *)&kts, tv))
                        return -EFAULT;
+               kts.tv_nsec *= 1000;
        }
        if (tz) {
                if (copy_from_user(&ktz, tz, sizeof(*tz)))
                        return -EFAULT;
        }
 
-       kts.tv_nsec *= 1000;
-
        return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL);
 }
 
index 1941a07..84d1326 100644 (file)
@@ -236,12 +236,11 @@ release_thread(struct task_struct *dead_task)
 }
 
 /*
- * Copy an alpha thread..
+ * Copy architecture-specific thread state
  */
-
 int
 copy_thread(unsigned long clone_flags, unsigned long usp,
-           unsigned long arg,
+           unsigned long kthread_arg,
            struct task_struct *p)
 {
        extern void ret_from_fork(void);
@@ -262,7 +261,7 @@ copy_thread(unsigned long clone_flags, unsigned long usp,
                        sizeof(struct switch_stack) + sizeof(struct pt_regs));
                childstack->r26 = (unsigned long) ret_from_kernel_thread;
                childstack->r9 = usp;   /* function */
-               childstack->r10 = arg;
+               childstack->r10 = kthread_arg;
                childregs->hae = alpha_mv.hae_cache,
                childti->pcb.usp = 0;
                return 0;
index 99ac36d..2f24447 100644 (file)
@@ -63,7 +63,6 @@ static struct {
 enum ipi_message_type {
        IPI_RESCHEDULE,
        IPI_CALL_FUNC,
-       IPI_CALL_FUNC_SINGLE,
        IPI_CPU_STOP,
 };
 
@@ -506,7 +505,6 @@ setup_profiling_timer(unsigned int multiplier)
        return -EINVAL;
 }
 
-\f
 static void
 send_ipi_message(const struct cpumask *to_whom, enum ipi_message_type operation)
 {
@@ -552,10 +550,6 @@ handle_ipi(struct pt_regs *regs)
                        generic_smp_call_function_interrupt();
                        break;
 
-               case IPI_CALL_FUNC_SINGLE:
-                       generic_smp_call_function_single_interrupt();
-                       break;
-
                case IPI_CPU_STOP:
                        halt();
 
@@ -606,7 +600,7 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask)
 
 void arch_send_call_function_single_ipi(int cpu)
 {
-       send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE);
+       send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC);
 }
 
 static void
index 6f01d9a..72b5951 100644 (file)
@@ -237,8 +237,7 @@ srmcons_init(void)
 
        return -ENODEV;
 }
-
-module_init(srmcons_init);
+device_initcall(srmcons_init);
 
 \f
 /*
index f21d61f..24e41bd 100644 (file)
@@ -331,7 +331,7 @@ marvel_map_irq(const struct pci_dev *cdev, u8 slot, u8 pin)
        pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &intline);
        irq = intline;
 
-       msi_loc = pci_find_capability(dev, PCI_CAP_ID_MSI);
+       msi_loc = dev->msi_cap;
        msg_ctl = 0;
        if (msi_loc) 
                pci_read_config_word(dev, msi_loc + PCI_MSI_FLAGS, &msg_ctl);
index 2478971..9b62e3f 100644 (file)
@@ -529,6 +529,9 @@ sys_call_table:
        .quad sys_sched_setattr
        .quad sys_sched_getattr
        .quad sys_renameat2                     /* 510 */
+       .quad sys_getrandom
+       .quad sys_memfd_create
+       .quad sys_execveat
 
        .size sys_call_table, . - sys_call_table
        .type sys_call_table, @object
index 9c4c189..74aceea 100644 (file)
@@ -14,7 +14,6 @@
 #include <linux/tty.h>
 #include <linux/delay.h>
 #include <linux/module.h>
-#include <linux/init.h>
 #include <linux/kallsyms.h>
 #include <linux/ratelimit.h>
 
index 9d0ac09..4a905bd 100644 (file)
@@ -23,8 +23,7 @@
 #include <linux/smp.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
-
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 extern void die_if_kernel(char *,struct pt_regs *,long, unsigned long *);
 
@@ -107,7 +106,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
 
        /* If we're in an interrupt context, or have no user context,
           we must not take the fault.  */
-       if (!mm || in_atomic())
+       if (!mm || faulthandler_disabled())
                goto no_context;
 
 #ifdef CONFIG_ALPHA_LARGE_VMALLOC
index 18aa9b4..086a0d5 100644 (file)
@@ -8,7 +8,6 @@
  */
 
 #include <linux/oprofile.h>
-#include <linux/init.h>
 #include <linux/smp.h>
 #include <asm/ptrace.h>
 
index c32f8a0..c300f5e 100644 (file)
@@ -8,7 +8,6 @@
  */
 
 #include <linux/oprofile.h>
-#include <linux/init.h>
 #include <linux/smp.h>
 #include <asm/ptrace.h>
 
index 1c84cc2..02edf59 100644 (file)
@@ -8,7 +8,6 @@
  */
 
 #include <linux/oprofile.h>
-#include <linux/init.h>
 #include <linux/smp.h>
 #include <asm/ptrace.h>
 
index 34a57a1..adb1744 100644 (file)
@@ -9,7 +9,6 @@
  */
 
 #include <linux/oprofile.h>
-#include <linux/init.h>
 #include <linux/smp.h>
 #include <asm/ptrace.h>
 
index a7fc0da..ff6a4b5 100644 (file)
@@ -2,19 +2,6 @@ menu "Kernel hacking"
 
 source "lib/Kconfig.debug"
 
-config EARLY_PRINTK
-       bool "Early printk" if EMBEDDED
-       default y
-       help
-         Write kernel log output directly into the VGA buffer or to a serial
-         port.
-
-         This is useful for kernel debugging when your machine crashes very
-         early before the console code is initialized. For normal operation
-         it is not recommended because it looks ugly and doesn't cooperate
-         with klogd/syslogd or the X server. You should normally N here,
-         unless you want to debug such a crash.
-
 config 16KSTACKS
        bool "Use 16Kb for kernel stacks instead of 8Kb"
        help
index 067551b..9917a45 100644 (file)
@@ -99,7 +99,7 @@ static inline void atomic_##op(int i, atomic_t *v)                    \
        atomic_ops_unlock(flags);                                       \
 }
 
-#define ATOMIC_OP_RETURN(op, c_op)                                     \
+#define ATOMIC_OP_RETURN(op, c_op, asm_op)                             \
 static inline int atomic_##op##_return(int i, atomic_t *v)             \
 {                                                                      \
        unsigned long flags;                                            \
index 4dc64dd..05b5aaf 100644 (file)
@@ -53,7 +53,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
        if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
                return -EFAULT;
 
-       pagefault_disable();    /* implies preempt_disable() */
+       pagefault_disable();
 
        switch (op) {
        case FUTEX_OP_SET:
@@ -75,7 +75,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
                ret = -ENOSYS;
        }
 
-       pagefault_enable();     /* subsumes preempt_enable() */
+       pagefault_enable();
 
        if (!ret) {
                switch (cmp) {
@@ -104,7 +104,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
        return ret;
 }
 
-/* Compare-xchg with preemption disabled.
+/* Compare-xchg with pagefaults disabled.
  *  Notes:
  *      -Best-Effort: Exchg happens only if compare succeeds.
  *          If compare fails, returns; leaving retry/looping to upper layers
@@ -121,7 +121,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval,
        if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
                return -EFAULT;
 
-       pagefault_disable();    /* implies preempt_disable() */
+       pagefault_disable();
 
        /* TBD : can use llock/scond */
        __asm__ __volatile__(
@@ -142,7 +142,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval,
        : "r"(oldval), "r"(newval), "r"(uaddr), "ir"(-EFAULT)
        : "cc", "memory");
 
-       pagefault_enable();     /* subsumes preempt_enable() */
+       pagefault_enable();
 
        *uval = val;
        return val;
index 8c3a3e0..12b2100 100644 (file)
@@ -266,7 +266,7 @@ static inline void __cache_line_loop(unsigned long paddr, unsigned long vaddr,
  * Machine specific helpers for Entire D-Cache or Per Line ops
  */
 
-static unsigned int __before_dc_op(const int op)
+static inline unsigned int __before_dc_op(const int op)
 {
        unsigned int reg = reg;
 
@@ -284,7 +284,7 @@ static unsigned int __before_dc_op(const int op)
        return reg;
 }
 
-static void __after_dc_op(const int op, unsigned int reg)
+static inline void __after_dc_op(const int op, unsigned int reg)
 {
        if (op & OP_FLUSH)      /* flush / flush-n-inv both wait */
                while (read_aux_reg(ARC_REG_DC_CTRL) & DC_CTRL_FLUSH_STATUS);
index 6a2e006..d948e4e 100644 (file)
@@ -86,7 +86,7 @@ void do_page_fault(unsigned long address, struct pt_regs *regs)
         * If we're in an interrupt or have no user
         * context, we must not take the fault..
         */
-       if (in_atomic() || !mm)
+       if (faulthandler_disabled() || !mm)
                goto no_context;
 
        if (user_mode(regs))
index 86217db..992736b 100644 (file)
@@ -223,7 +223,7 @@ dtb-$(CONFIG_SOC_IMX25) += \
        imx25-eukrea-mbimxsd25-baseboard-dvi-vga.dtb \
        imx25-karo-tx25.dtb \
        imx25-pdk.dtb
-dtb-$(CONFIG_SOC_IMX31) += \
+dtb-$(CONFIG_SOC_IMX27) += \
        imx27-apf27.dtb \
        imx27-apf27dev.dtb \
        imx27-eukrea-mbimxsd27-baseboard.dtb \
index c3255e0..dbb3f4d 100644 (file)
 /include/ "tps65217.dtsi"
 
 &tps {
+       /*
+        * Configure pmic to enter OFF-state instead of SLEEP-state ("RTC-only
+        * mode") at poweroff.  Most BeagleBone versions do not support RTC-only
+        * mode and risk hardware damage if this mode is entered.
+        *
+        * For details, see linux-omap mailing list May 2015 thread
+        *      [PATCH] ARM: dts: am335x-bone* enable pmic-shutdown-controller
+        * In particular, messages:
+        *      http://www.spinics.net/lists/linux-omap/msg118585.html
+        *      http://www.spinics.net/lists/linux-omap/msg118615.html
+        *
+        * You can override this later with
+        *      &tps {  /delete-property/ ti,pmic-shutdown-controller;  }
+        * if you want to use RTC-only mode and made sure you are not affected
+        * by the hardware problems. (Tip: double-check by performing a current
+        * measurement after shutdown: it should be less than 1 mA.)
+        */
+       ti,pmic-shutdown-controller;
+
        regulators {
                dcdc1_reg: regulator@0 {
                        regulator-name = "vdds_dpr";
index 5c42d25..901739f 100644 (file)
@@ -80,7 +80,3 @@
                status = "okay";
        };
 };
-
-&rtc {
-       system-power-controller;
-};
index 87fc7a3..156d05e 100644 (file)
        wlcore: wlcore@2 {
                compatible = "ti,wl1271";
                reg = <2>;
-               interrupt-parent = <&gpio1>;
+               interrupt-parent = <&gpio0>;
                interrupts = <31 IRQ_TYPE_LEVEL_HIGH>; /* gpio 31 */
                ref-clock-frequency = <38400000>;
        };
index 518b8fd..18cc826 100644 (file)
@@ -12,7 +12,7 @@
                #clock-cells = <0>;
                compatible = "ti,am35xx-gate-clock";
                clocks = <&ipss_ick>;
-               reg = <0x059c>;
+               reg = <0x032c>;
                ti,bit-shift = <1>;
        };
 
@@ -20,7 +20,7 @@
                #clock-cells = <0>;
                compatible = "ti,gate-clock";
                clocks = <&rmii_ck>;
-               reg = <0x059c>;
+               reg = <0x032c>;
                ti,bit-shift = <9>;
        };
 
@@ -28,7 +28,7 @@
                #clock-cells = <0>;
                compatible = "ti,am35xx-gate-clock";
                clocks = <&ipss_ick>;
-               reg = <0x059c>;
+               reg = <0x032c>;
                ti,bit-shift = <2>;
        };
 
@@ -36,7 +36,7 @@
                #clock-cells = <0>;
                compatible = "ti,gate-clock";
                clocks = <&pclk_ck>;
-               reg = <0x059c>;
+               reg = <0x032c>;
                ti,bit-shift = <10>;
        };
 
@@ -44,7 +44,7 @@
                #clock-cells = <0>;
                compatible = "ti,am35xx-gate-clock";
                clocks = <&ipss_ick>;
-               reg = <0x059c>;
+               reg = <0x032c>;
                ti,bit-shift = <0>;
        };
 
@@ -52,7 +52,7 @@
                #clock-cells = <0>;
                compatible = "ti,gate-clock";
                clocks = <&sys_ck>;
-               reg = <0x059c>;
+               reg = <0x032c>;
                ti,bit-shift = <8>;
        };
 
@@ -60,7 +60,7 @@
                #clock-cells = <0>;
                compatible = "ti,am35xx-gate-clock";
                clocks = <&sys_ck>;
-               reg = <0x059c>;
+               reg = <0x032c>;
                ti,bit-shift = <3>;
        };
 };
index 8ae29c9..c17097d 100644 (file)
@@ -49,7 +49,7 @@
                pinctrl-0 = <&matrix_keypad_pins>;
 
                debounce-delay-ms = <5>;
-               col-scan-delay-us = <1500>;
+               col-scan-delay-us = <5>;
 
                row-gpios = <&gpio5 5 GPIO_ACTIVE_HIGH          /* Bank5, pin5 */
                                &gpio5 6 GPIO_ACTIVE_HIGH>;     /* Bank5, pin6 */
                interrupt-parent = <&gpio0>;
                interrupts = <31 0>;
 
-               wake-gpios = <&gpio1 28 GPIO_ACTIVE_HIGH>;
+               reset-gpios = <&gpio1 28 GPIO_ACTIVE_LOW>;
 
                touchscreen-size-x = <480>;
                touchscreen-size-y = <272>;
index 15f198e..7128fad 100644 (file)
@@ -18,6 +18,7 @@
        aliases {
                rtc0 = &mcp_rtc;
                rtc1 = &tps659038_rtc;
+               rtc2 = &rtc;
        };
 
        memory {
@@ -83,7 +84,7 @@
        gpio_fan: gpio_fan {
                /* Based on 5v 500mA AFB02505HHB */
                compatible = "gpio-fan";
-               gpios =  <&tps659038_gpio 1 GPIO_ACTIVE_HIGH>;
+               gpios =  <&tps659038_gpio 2 GPIO_ACTIVE_HIGH>;
                gpio-fan,speed-map = <0     0>,
                                     <13000 1>;
                #cooling-cells = <2>;
 
        uart3_pins_default: uart3_pins_default {
                pinctrl-single,pins = <
-                       0x248 (PIN_INPUT_SLEW | MUX_MODE0) /* uart3_rxd.rxd */
-                       0x24c (PIN_INPUT_SLEW | MUX_MODE0) /* uart3_txd.txd */
+                       0x3f8 (PIN_INPUT_SLEW | MUX_MODE2) /* uart2_ctsn.uart3_rxd */
+                       0x3fc (PIN_INPUT_SLEW | MUX_MODE1) /* uart2_rtsn.uart3_txd */
                >;
        };
 
        mcp_rtc: rtc@6f {
                compatible = "microchip,mcp7941x";
                reg = <0x6f>;
-               interrupts = <GIC_SPI 2 IRQ_TYPE_LEVEL_LOW>;  /* IRQ_SYS_1N */
+               interrupts = <GIC_SPI 2 IRQ_TYPE_EDGE_RISING>;  /* IRQ_SYS_1N */
 
                pinctrl-names = "default";
                pinctrl-0 = <&mcp79410_pins_default>;
 &uart3 {
        status = "okay";
        interrupts-extended = <&crossbar_mpu GIC_SPI 69 IRQ_TYPE_LEVEL_HIGH>,
-                             <&dra7_pmx_core 0x248>;
+                             <&dra7_pmx_core 0x3f8>;
 
        pinctrl-names = "default";
        pinctrl-0 = <&uart3_pins_default>;
index c675257..f076ff8 100644 (file)
@@ -69,7 +69,7 @@
                mainpll: mainpll {
                        compatible = "fixed-clock";
                        #clock-cells = <0>;
-                       clock-frequency = <2000000000>;
+                       clock-frequency = <1000000000>;
                };
                /* 25 MHz reference crystal */
                refclk: oscillator {
index ed2dd8b..218a2ac 100644 (file)
                mainpll: mainpll {
                        compatible = "fixed-clock";
                        #clock-cells = <0>;
-                       clock-frequency = <2000000000>;
+                       clock-frequency = <1000000000>;
                };
 
                /* 25 MHz reference crystal */
index 0e85fc1..ecd1318 100644 (file)
                mainpll: mainpll {
                        compatible = "fixed-clock";
                        #clock-cells = <0>;
-                       clock-frequency = <2000000000>;
+                       clock-frequency = <1000000000>;
                };
        };
 };
index a2cf215..fdd187c 100644 (file)
 
                internal-regs {
 
+                       rtc@10300 {
+                               /* No crystal connected to the internal RTC */
+                               status = "disabled";
+                       };
+
                        /* J10: VCC, NC, RX, NC, TX, GND  */
                        serial@12000 {
                                status = "okay";
index e3b08fb..990e8a2 100644 (file)
                };
 
                internal-regs {
+                       rtc@10300 {
+                               /* No crystal connected to the internal RTC */
+                               status = "disabled";
+                       };
                        serial@12000 {
                                status = "okay";
                        };
index de8427b..289806a 100644 (file)
                        ti,hwmods = "usb_otg_hs";
 
                        usb0: usb@47401000 {
-                               compatible = "ti,musb-am33xx";
+                               compatible = "ti,musb-dm816";
                                reg = <0x47401400 0x400
                                       0x47401000 0x200>;
                                reg-names = "mc", "control";
                        };
 
                        usb1: usb@47401800 {
-                               compatible = "ti,musb-am33xx";
+                               compatible = "ti,musb-dm816";
                                reg = <0x47401c00 0x400
                                       0x47401800 0x200>;
                                reg-names = "mc", "control";
index aae7efc..e6fa251 100644 (file)
@@ -87,6 +87,7 @@
 
                /* connect xtal input to 25MHz reference */
                clocks = <&ref25>;
+               clock-names = "xtal";
 
                /* connect xtal input as source of pll0 and pll1 */
                silabs,pll-source = <0 0>, <1 0>;
index 5332b57..f03a091 100644 (file)
                        ti,clock-cycles = <16>;
 
                        reg = <0x4ae07ddc 0x4>, <0x4ae07de0 0x4>,
-                             <0x4ae06014 0x4>, <0x4a003b20 0x8>,
+                             <0x4ae06014 0x4>, <0x4a003b20 0xc>,
                              <0x4ae0c158 0x4>;
                        reg-names = "setup-address", "control-address",
                                    "int-address", "efuse-address",
                        ti,clock-cycles = <16>;
 
                        reg = <0x4ae07e34 0x4>, <0x4ae07e24 0x4>,
-                             <0x4ae06010 0x4>, <0x4a0025cc 0x8>,
+                             <0x4ae06010 0x4>, <0x4a0025cc 0xc>,
                              <0x4a002470 0x4>;
                        reg-names = "setup-address", "control-address",
                                    "int-address", "efuse-address",
                        ti,clock-cycles = <16>;
 
                        reg = <0x4ae07e30 0x4>, <0x4ae07e20 0x4>,
-                             <0x4ae06010 0x4>, <0x4a0025e0 0x8>,
+                             <0x4ae06010 0x4>, <0x4a0025e0 0xc>,
                              <0x4a00246c 0x4>;
                        reg-names = "setup-address", "control-address",
                                    "int-address", "efuse-address",
                        ti,clock-cycles = <16>;
 
                        reg = <0x4ae07de4 0x4>, <0x4ae07de8 0x4>,
-                             <0x4ae06010 0x4>, <0x4a003b08 0x8>,
+                             <0x4ae06010 0x4>, <0x4a003b08 0xc>,
                              <0x4ae0c154 0x4>;
                        reg-names = "setup-address", "control-address",
                                    "int-address", "efuse-address",
                        status = "disabled";
                };
 
-               rtc@48838000 {
+               rtc: rtc@48838000 {
                        compatible = "ti,am3352-rtc";
                        reg = <0x48838000 0x100>;
                        interrupts = <GIC_SPI 217 IRQ_TYPE_LEVEL_HIGH>,
index 8de12af..d6b49e5 100644 (file)
@@ -9,6 +9,7 @@
 
 #include <dt-bindings/sound/samsung-i2s.h>
 #include <dt-bindings/input/input.h>
+#include <dt-bindings/clock/maxim,max77686.h>
 #include "exynos4412.dtsi"
 
 / {
 
        rtc@10070000 {
                status = "okay";
+               clocks = <&clock CLK_RTC>, <&max77686 MAX77686_CLK_AP>;
+               clock-names = "rtc", "rtc_src";
        };
 
        g2d@10800000 {
index 173ffa4..792394d 100644 (file)
 
                        display-timings {
                                timing-0 {
-                                       clock-frequency = <0>;
+                                       clock-frequency = <57153600>;
                                        hactive = <720>;
                                        vactive = <1280>;
                                        hfront-porch = <5>;
index 2657e84..1eca97e 100644 (file)
        num-slots = <1>;
        broken-cd;
        cap-sdio-irq;
+       keep-power-in-suspend;
        card-detect-delay = <200>;
        samsung,dw-mshc-ciu-div = <3>;
        samsung,dw-mshc-sdr-timing = <2 3>;
index 0788d08..146e711 100644 (file)
        num-slots = <1>;
        broken-cd;
        cap-sdio-irq;
+       keep-power-in-suspend;
        card-detect-delay = <200>;
        clock-frequency = <400000000>;
        samsung,dw-mshc-ciu-div = <1>;
index 5d31fc1..2180a01 100644 (file)
@@ -28,7 +28,7 @@ trips {
                type = "active";
        };
        cpu-crit-0 {
-               temperature = <1200000>; /* millicelsius */
+               temperature = <120000>; /* millicelsius */
                hysteresis = <0>; /* millicelsius */
                type = "critical";
        };
index f67b23f..4531753 100644 (file)
                clock-names = "dp";
                phys = <&dp_phy>;
                phy-names = "dp";
+               power-domains = <&disp_pd>;
        };
 
        mipi_phy: video-phy@10040714 {
index 48adfa8..356e963 100644 (file)
@@ -18,7 +18,7 @@ trips {
                type = "active";
        };
        cpu-crit-0 {
-               temperature = <1050000>; /* millicelsius */
+               temperature = <105000>; /* millicelsius */
                hysteresis = <0>; /* millicelsius */
                type = "critical";
        };
index 412f41d..02eb8b1 100644 (file)
        num-slots = <1>;
        broken-cd;
        cap-sdio-irq;
+       keep-power-in-suspend;
        card-detect-delay = <200>;
        clock-frequency = <400000000>;
        samsung,dw-mshc-ciu-div = <1>;
index 7e6eef2..8204539 100644 (file)
@@ -12,6 +12,7 @@
  */
 
 /dts-v1/;
+#include <dt-bindings/gpio/gpio.h>
 #include "imx23.dtsi"
 
 / {
@@ -93,6 +94,7 @@
 
        ahb@80080000 {
                usb0: usb@80080000 {
+                       dr_mode = "host";
                        vbus-supply = <&reg_usb0_vbus>;
                        status = "okay";
                };
 
                user {
                        label = "green";
-                       gpios = <&gpio2 1 1>;
+                       gpios = <&gpio2 1 GPIO_ACTIVE_HIGH>;
                };
        };
 };
index e4d3aec..677f81d 100644 (file)
 
                        pwm4: pwm@53fc8000 {
                                compatible = "fsl,imx25-pwm", "fsl,imx27-pwm";
+                               #pwm-cells = <2>;
                                reg = <0x53fc8000 0x4000>;
                                clocks = <&clks 108>, <&clks 52>;
                                clock-names = "ipg", "per";
index 6951b66..bc215e4 100644 (file)
 
                        fec: ethernet@1002b000 {
                                compatible = "fsl,imx27-fec";
-                               reg = <0x1002b000 0x4000>;
+                               reg = <0x1002b000 0x1000>;
                                interrupts = <50>;
                                clocks = <&clks IMX27_CLK_FEC_IPG_GATE>,
                                         <&clks IMX27_CLK_FEC_AHB_GATE>;
index 25e25f8..4e073e8 100644 (file)
                                              80 81 68 69
                                              70 71 72 73
                                              74 75 76 77>;
-                               interrupt-names = "auart4-rx", "aurat4-tx", "spdif-tx", "empty",
+                               interrupt-names = "auart4-rx", "auart4-tx", "spdif-tx", "empty",
                                                  "saif0", "saif1", "i2c0", "i2c1",
                                                  "auart0-rx", "auart0-tx", "auart1-rx", "auart1-tx",
                                                  "auart2-rx", "auart2-tx", "auart3-rx", "auart3-tx";
index 19cc269..1ce6133 100644 (file)
@@ -31,6 +31,7 @@
                        regulator-min-microvolt = <5000000>;
                        regulator-max-microvolt = <5000000>;
                        gpio = <&gpio4 15 0>;
+                       enable-active-high;
                };
 
                reg_usb_h1_vbus: regulator@1 {
@@ -40,6 +41,7 @@
                        regulator-min-microvolt = <5000000>;
                        regulator-max-microvolt = <5000000>;
                        gpio = <&gpio1 0 0>;
+                       enable-active-high;
                };
        };
 
index 46b2fed..3b24b12 100644 (file)
 &i2c3 {
        pinctrl-names = "default";
        pinctrl-0 = <&pinctrl_i2c3>;
-       pinctrl-assert-gpios = <&gpio5 4 GPIO_ACTIVE_HIGH>;
        status = "okay";
 
        max7310_a: gpio@30 {
index 134d3f2..921de66 100644 (file)
        nand@0,0 {
                reg = <0 0 4>; /* CS0, offset 0, IO size 4 */
                nand-bus-width = <16>;
+               gpmc,device-width = <2>;
+               ti,nand-ecc-opt = "sw";
 
                gpmc,sync-clk-ps = <0>;
                gpmc,cs-on-ns = <0>;
index a293158..5f5e0f3 100644 (file)
                DRVDD-supply = <&vmmc2>;
                IOVDD-supply = <&vio>;
                DVDD-supply = <&vio>;
+
+               ai3x-micbias-vg = <1>;
        };
 
        tlv320aic3x_aux: tlv320aic3x@19 {
                DRVDD-supply = <&vmmc2>;
                IOVDD-supply = <&vio>;
                DVDD-supply = <&vio>;
+
+               ai3x-micbias-vg = <2>;
        };
 
        tsl2563: tsl2563@29 {
                touchscreen-fuzz-x = <4>;
                touchscreen-fuzz-y = <7>;
                touchscreen-fuzz-pressure = <2>;
-               touchscreen-max-x = <4096>;
-               touchscreen-max-y = <4096>;
+               touchscreen-size-x = <4096>;
+               touchscreen-size-y = <4096>;
                touchscreen-max-pressure = <2048>;
 
                ti,x-plate-ohms = <280>;
index d18a90f..69a40cf 100644 (file)
                };
 
                mmu_isp: mmu@480bd400 {
+                       #iommu-cells = <0>;
                        compatible = "ti,omap2-iommu";
                        reg = <0x480bd400 0x80>;
                        interrupts = <24>;
                };
 
                mmu_iva: mmu@5d000000 {
+                       #iommu-cells = <0>;
                        compatible = "ti,omap2-iommu";
                        reg = <0x5d000000 0x80>;
                        interrupts = <28>;
index efe5f73..7d24ae0 100644 (file)
         * hierarchy.
         */
        ocp {
-               compatible = "ti,omap4-l3-noc", "simple-bus";
+               compatible = "ti,omap5-l3-noc", "simple-bus";
                #address-cells = <1>;
                #size-cells = <1>;
                ranges;
index 74c3212..824ddab 100644 (file)
                compatible = "adi,adv7511w";
                reg = <0x39>;
                interrupt-parent = <&gpio3>;
-               interrupts = <29 IRQ_TYPE_EDGE_FALLING>;
+               interrupts = <29 IRQ_TYPE_LEVEL_LOW>;
 
                adi,input-depth = <8>;
                adi,input-colorspace = "rgb";
index bfd3f1c..2201cd5 100644 (file)
                        status = "disabled";
                };
 
-               vmmci: regulator-gpio {
-                       compatible = "regulator-gpio";
-
-                       regulator-min-microvolt = <1800000>;
-                       regulator-max-microvolt = <2900000>;
-                       regulator-name = "mmci-reg";
-                       regulator-type = "voltage";
-
-                       startup-delay-us = <100>;
-                       enable-active-high;
-
-                       states = <1800000 0x1
-                                 2900000 0x0>;
-
-                       status = "disabled";
-               };
-
                mcde@a0350000 {
                        compatible = "stericsson,mcde";
                        reg = <0xa0350000 0x1000>, /* MCDE */
index bf8f0ed..744c1e3 100644 (file)
                        pinctrl-1 = <&i2c3_sleep_mode>;
                };
 
+               vmmci: regulator-gpio {
+                       compatible = "regulator-gpio";
+
+                       regulator-min-microvolt = <1800000>;
+                       regulator-max-microvolt = <2900000>;
+                       regulator-name = "mmci-reg";
+                       regulator-type = "voltage";
+
+                       startup-delay-us = <100>;
+                       enable-active-high;
+
+                       states = <1800000 0x1
+                                 2900000 0x0>;
+               };
+
                // External Micro SD slot
                sdi0_per1@80126000 {
                        arm,primecell-periphid = <0x10480180>;
index 206826a..1bc84eb 100644 (file)
                };
 
                vmmci: regulator-gpio {
+                       compatible = "regulator-gpio";
+
                        gpios = <&gpio7 4 0x4>;
                        enable-gpio = <&gpio6 25 0x4>;
+
+                       regulator-min-microvolt = <1800000>;
+                       regulator-max-microvolt = <2900000>;
+                       regulator-name = "mmci-reg";
+                       regulator-type = "voltage";
+
+                       startup-delay-us = <100>;
+                       enable-active-high;
+
+                       states = <1800000 0x1
+                                 2900000 0x0>;
                };
 
                // External Micro SD slot
index cf01c81..13cc7ca 100644 (file)
                         <&tegra_car TEGRA124_CLK_PLL_U>,
                         <&tegra_car TEGRA124_CLK_USBD>;
                clock-names = "reg", "pll_u", "utmi-pads";
-               resets = <&tegra_car 59>, <&tegra_car 22>;
+               resets = <&tegra_car 22>, <&tegra_car 22>;
                reset-names = "usb", "utmi-pads";
                nvidia,hssync-start-delay = <0>;
                nvidia,idle-wait-delay = <17>;
                nvidia,hssquelch-level = <2>;
                nvidia,hsdiscon-level = <5>;
                nvidia,xcvr-hsslew = <12>;
+               nvidia,has-utmi-pad-registers;
                status = "disabled";
        };
 
                         <&tegra_car TEGRA124_CLK_PLL_U>,
                         <&tegra_car TEGRA124_CLK_USBD>;
                clock-names = "reg", "pll_u", "utmi-pads";
-               resets = <&tegra_car 22>, <&tegra_car 22>;
+               resets = <&tegra_car 58>, <&tegra_car 22>;
                reset-names = "usb", "utmi-pads";
                nvidia,hssync-start-delay = <0>;
                nvidia,idle-wait-delay = <17>;
                nvidia,hssquelch-level = <2>;
                nvidia,hsdiscon-level = <5>;
                nvidia,xcvr-hsslew = <12>;
-               nvidia,has-utmi-pad-registers;
                status = "disabled";
        };
 
                         <&tegra_car TEGRA124_CLK_PLL_U>,
                         <&tegra_car TEGRA124_CLK_USBD>;
                clock-names = "reg", "pll_u", "utmi-pads";
-               resets = <&tegra_car 58>, <&tegra_car 22>;
+               resets = <&tegra_car 59>, <&tegra_car 22>;
                reset-names = "usb", "utmi-pads";
                nvidia,hssync-start-delay = <0>;
                nvidia,idle-wait-delay = <17>;
index 7a2aeac..107395c 100644 (file)
                compatible = "arm,cortex-a15-pmu";
                interrupts = <0 68 4>,
                             <0 69 4>;
+               interrupt-affinity = <&cpu0>, <&cpu1>;
        };
 
        oscclk6a: oscclk6a {
index 23662b5..d949fac 100644 (file)
                #address-cells = <1>;
                #size-cells = <0>;
 
-               cpu@0 {
+               A9_0: cpu@0 {
                        device_type = "cpu";
                        compatible = "arm,cortex-a9";
                        reg = <0>;
                        next-level-cache = <&L2>;
                };
 
-               cpu@1 {
+               A9_1: cpu@1 {
                        device_type = "cpu";
                        compatible = "arm,cortex-a9";
                        reg = <1>;
                        next-level-cache = <&L2>;
                };
 
-               cpu@2 {
+               A9_2: cpu@2 {
                        device_type = "cpu";
                        compatible = "arm,cortex-a9";
                        reg = <2>;
                        next-level-cache = <&L2>;
                };
 
-               cpu@3 {
+               A9_3: cpu@3 {
                        device_type = "cpu";
                        compatible = "arm,cortex-a9";
                        reg = <3>;
                compatible = "arm,pl310-cache";
                reg = <0x1e00a000 0x1000>;
                interrupts = <0 43 4>;
+               cache-unified;
                cache-level = <2>;
                arm,data-latency = <1 1 1>;
                arm,tag-latency = <1 1 1>;
                             <0 61 4>,
                             <0 62 4>,
                             <0 63 4>;
+               interrupt-affinity = <&A9_0>, <&A9_1>, <&A9_2>, <&A9_3>;
+
        };
 
        dcc {
index a5cd2ed..9ea54b3 100644 (file)
                };
 
                gem0: ethernet@e000b000 {
-                       compatible = "cdns,gem";
+                       compatible = "cdns,zynq-gem";
                        reg = <0xe000b000 0x1000>;
                        status = "disabled";
                        interrupts = <0 22 4>;
                };
 
                gem1: ethernet@e000c000 {
-                       compatible = "cdns,gem";
+                       compatible = "cdns,zynq-gem";
                        reg = <0xe000c000 0x1000>;
                        status = "disabled";
                        interrupts = <0 45 4>;
index ab86655..fbbb191 100644 (file)
@@ -39,11 +39,14 @@ CONFIG_ARCH_HIP04=y
 CONFIG_ARCH_KEYSTONE=y
 CONFIG_ARCH_MESON=y
 CONFIG_ARCH_MXC=y
+CONFIG_SOC_IMX50=y
 CONFIG_SOC_IMX51=y
 CONFIG_SOC_IMX53=y
 CONFIG_SOC_IMX6Q=y
 CONFIG_SOC_IMX6SL=y
+CONFIG_SOC_IMX6SX=y
 CONFIG_SOC_VF610=y
+CONFIG_SOC_LS1021A=y
 CONFIG_ARCH_OMAP3=y
 CONFIG_ARCH_OMAP4=y
 CONFIG_SOC_OMAP5=y
@@ -426,7 +429,7 @@ CONFIG_USB_EHCI_EXYNOS=y
 CONFIG_USB_EHCI_TEGRA=y
 CONFIG_USB_EHCI_HCD_STI=y
 CONFIG_USB_EHCI_HCD_PLATFORM=y
-CONFIG_USB_ISP1760_HCD=y
+CONFIG_USB_ISP1760=y
 CONFIG_USB_OHCI_HCD=y
 CONFIG_USB_OHCI_HCD_STI=y
 CONFIG_USB_OHCI_HCD_PLATFORM=y
index 9ff7b54..3743ca2 100644 (file)
@@ -393,7 +393,7 @@ CONFIG_TI_EDMA=y
 CONFIG_DMA_OMAP=y
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_EXTCON=m
-CONFIG_EXTCON_GPIO=m
+CONFIG_EXTCON_USB_GPIO=m
 CONFIG_EXTCON_PALMAS=m
 CONFIG_TI_EMIF=m
 CONFIG_PWM=y
index d2f81e6..6c2327e 100644 (file)
@@ -81,7 +81,7 @@ do {                                                                  \
 #define read_barrier_depends()         do { } while(0)
 #define smp_read_barrier_depends()     do { } while(0)
 
-#define set_mb(var, value)     do { var = value; smp_mb(); } while (0)
+#define smp_store_mb(var, value)       do { WRITE_ONCE(var, value); smp_mb(); } while (0)
 
 #define smp_mb__before_atomic()        smp_mb()
 #define smp_mb__after_atomic() smp_mb()
index 8e3fcb9..2ef282f 100644 (file)
@@ -25,7 +25,7 @@ struct dma_iommu_mapping {
 };
 
 struct dma_iommu_mapping *
-arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size);
+arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, u64 size);
 
 void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping);
 
index 4e78065..5eed828 100644 (file)
@@ -93,6 +93,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
        if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
                return -EFAULT;
 
+       preempt_disable();
        __asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n"
        "1:     " TUSER(ldr) "  %1, [%4]\n"
        "       teq     %1, %2\n"
@@ -104,6 +105,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
        : "cc", "memory");
 
        *uval = val;
+       preempt_enable();
+
        return ret;
 }
 
@@ -124,7 +127,10 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
        if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
                return -EFAULT;
 
-       pagefault_disable();    /* implies preempt_disable() */
+#ifndef CONFIG_SMP
+       preempt_disable();
+#endif
+       pagefault_disable();
 
        switch (op) {
        case FUTEX_OP_SET:
@@ -146,7 +152,10 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
                ret = -ENOSYS;
        }
 
-       pagefault_enable();     /* subsumes preempt_enable() */
+       pagefault_enable();
+#ifndef CONFIG_SMP
+       preempt_enable();
+#endif
 
        if (!ret) {
                switch (cmp) {
index 2fe85ff..370f7a7 100644 (file)
@@ -18,7 +18,7 @@ extern struct cputopo_arm cpu_topology[NR_CPUS];
 #define topology_physical_package_id(cpu)      (cpu_topology[cpu].socket_id)
 #define topology_core_id(cpu)          (cpu_topology[cpu].core_id)
 #define topology_core_cpumask(cpu)     (&cpu_topology[cpu].core_sibling)
-#define topology_thread_cpumask(cpu)   (&cpu_topology[cpu].thread_sibling)
+#define topology_sibling_cpumask(cpu)  (&cpu_topology[cpu].thread_sibling)
 
 void init_cpu_topology(void);
 void store_cpu_topology(unsigned int cpuid);
index 2f7e6ff..0b579b2 100644 (file)
@@ -110,5 +110,6 @@ static inline bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
 bool xen_arch_need_swiotlb(struct device *dev,
                           unsigned long pfn,
                           unsigned long mfn);
+unsigned long xen_get_swiotlb_free_pages(unsigned int order);
 
 #endif /* _ASM_ARM_XEN_PAGE_H */
index f8ccc21..4e7f40c 100644 (file)
@@ -33,7 +33,9 @@ ret_fast_syscall:
  UNWIND(.fnstart       )
  UNWIND(.cantunwind    )
        disable_irq                             @ disable interrupts
-       ldr     r1, [tsk, #TI_FLAGS]
+       ldr     r1, [tsk, #TI_FLAGS]            @ re-check for syscall tracing
+       tst     r1, #_TIF_SYSCALL_WORK
+       bne     __sys_trace_return
        tst     r1, #_TIF_WORK_MASK
        bne     fast_work_pending
        asm_trace_hardirqs_on
index 91c7ba1..3b8c283 100644 (file)
@@ -303,9 +303,15 @@ static int probe_current_pmu(struct arm_pmu *pmu)
 
 static int of_pmu_irq_cfg(struct platform_device *pdev)
 {
-       int i;
-       int *irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
+       int i, irq;
+       int *irqs;
 
+       /* Don't bother with PPIs; they're already affine */
+       irq = platform_get_irq(pdev, 0);
+       if (irq >= 0 && irq_is_percpu(irq))
+               return 0;
+
+       irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
        if (!irqs)
                return -ENOMEM;
 
@@ -317,7 +323,7 @@ static int of_pmu_irq_cfg(struct platform_device *pdev)
                                      i);
                if (!dn) {
                        pr_warn("Failed to parse %s/interrupt-affinity[%d]\n",
-                               of_node_full_name(dn), i);
+                               of_node_full_name(pdev->dev.of_node), i);
                        break;
                }
 
index acd5b56..5f5cd56 100644 (file)
@@ -159,6 +159,8 @@ extern void exynos_enter_aftr(void);
 
 extern struct cpuidle_exynos_data cpuidle_coupled_exynos_data;
 
+extern void exynos_set_delayed_reset_assertion(bool enable);
+
 extern void s5p_init_cpu(void __iomem *cpuid_addr);
 extern unsigned int samsung_rev(void);
 extern void __iomem *cpu_boot_reg_base(void);
index bcde0dd..5917a30 100644 (file)
@@ -167,6 +167,33 @@ static void __init exynos_init_io(void)
 }
 
 /*
+ * Set or clear the USE_DELAYED_RESET_ASSERTION option. Used by smp code
+ * and suspend.
+ *
+ * This is necessary only on Exynos4 SoCs. When system is running
+ * USE_DELAYED_RESET_ASSERTION should be set so the ARM CLK clock down
+ * feature could properly detect global idle state when secondary CPU is
+ * powered down.
+ *
+ * However this should not be set when such system is going into suspend.
+ */
+void exynos_set_delayed_reset_assertion(bool enable)
+{
+       if (of_machine_is_compatible("samsung,exynos4")) {
+               unsigned int tmp, core_id;
+
+               for (core_id = 0; core_id < num_possible_cpus(); core_id++) {
+                       tmp = pmu_raw_readl(EXYNOS_ARM_CORE_OPTION(core_id));
+                       if (enable)
+                               tmp |= S5P_USE_DELAYED_RESET_ASSERTION;
+                       else
+                               tmp &= ~(S5P_USE_DELAYED_RESET_ASSERTION);
+                       pmu_raw_writel(tmp, EXYNOS_ARM_CORE_OPTION(core_id));
+               }
+       }
+}
+
+/*
  * Apparently, these SoCs are not able to wake-up from suspend using
  * the PMU. Too bad. Should they suddenly become capable of such a
  * feat, the matches below should be moved to suspend.c.
index ebd135b..a825bca 100644 (file)
 
 extern void exynos4_secondary_startup(void);
 
-/*
- * Set or clear the USE_DELAYED_RESET_ASSERTION option, set on Exynos4 SoCs
- * during hot-(un)plugging CPUx.
- *
- * The feature can be cleared safely during first boot of secondary CPU.
- *
- * Exynos4 SoCs require setting USE_DELAYED_RESET_ASSERTION during powering
- * down a CPU so the CPU idle clock down feature could properly detect global
- * idle state when CPUx is off.
- */
-static void exynos_set_delayed_reset_assertion(u32 core_id, bool enable)
-{
-       if (soc_is_exynos4()) {
-               unsigned int tmp;
-
-               tmp = pmu_raw_readl(EXYNOS_ARM_CORE_OPTION(core_id));
-               if (enable)
-                       tmp |= S5P_USE_DELAYED_RESET_ASSERTION;
-               else
-                       tmp &= ~(S5P_USE_DELAYED_RESET_ASSERTION);
-               pmu_raw_writel(tmp, EXYNOS_ARM_CORE_OPTION(core_id));
-       }
-}
-
 #ifdef CONFIG_HOTPLUG_CPU
 static inline void cpu_leave_lowpower(u32 core_id)
 {
@@ -73,8 +49,6 @@ static inline void cpu_leave_lowpower(u32 core_id)
          : "=&r" (v)
          : "Ir" (CR_C), "Ir" (0x40)
          : "cc");
-
-        exynos_set_delayed_reset_assertion(core_id, false);
 }
 
 static inline void platform_do_lowpower(unsigned int cpu, int *spurious)
@@ -87,14 +61,6 @@ static inline void platform_do_lowpower(unsigned int cpu, int *spurious)
                /* Turn the CPU off on next WFI instruction. */
                exynos_cpu_power_down(core_id);
 
-               /*
-                * Exynos4 SoCs require setting
-                * USE_DELAYED_RESET_ASSERTION so the CPU idle
-                * clock down feature could properly detect
-                * global idle state when CPUx is off.
-                */
-               exynos_set_delayed_reset_assertion(core_id, true);
-
                wfi();
 
                if (pen_release == core_id) {
@@ -371,9 +337,6 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle)
                udelay(10);
        }
 
-       /* No harm if this is called during first boot of secondary CPU */
-       exynos_set_delayed_reset_assertion(core_id, false);
-
        /*
         * now the secondary core is starting up let it run its
         * calibrations, then wait for it to finish
@@ -420,6 +383,8 @@ static void __init exynos_smp_prepare_cpus(unsigned int max_cpus)
 
        exynos_sysram_init();
 
+       exynos_set_delayed_reset_assertion(true);
+
        if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9)
                scu_enable(scu_base_addr());
 
index cbe56b3..a968653 100644 (file)
@@ -188,7 +188,7 @@ no_clk:
                args.np = np;
                args.args_count = 0;
                child_domain = of_genpd_get_from_provider(&args);
-               if (!child_domain)
+               if (IS_ERR(child_domain))
                        continue;
 
                if (of_parse_phandle_with_args(np, "power-domains",
@@ -196,7 +196,7 @@ no_clk:
                        continue;
 
                parent_domain = of_genpd_get_from_provider(&args);
-               if (!parent_domain)
+               if (IS_ERR(parent_domain))
                        continue;
 
                if (pm_genpd_add_subdomain(parent_domain, child_domain))
index 3e6aea7..7d23ce0 100644 (file)
@@ -87,8 +87,8 @@ static unsigned int exynos_pmu_spare3;
 static u32 exynos_irqwake_intmask = 0xffffffff;
 
 static const struct exynos_wkup_irq exynos3250_wkup_irq[] = {
-       { 105, BIT(1) }, /* RTC alarm */
-       { 106, BIT(2) }, /* RTC tick */
+       { 73, BIT(1) }, /* RTC alarm */
+       { 74, BIT(2) }, /* RTC tick */
        { /* sentinel */ },
 };
 
@@ -342,6 +342,8 @@ static void exynos_pm_enter_sleep_mode(void)
 
 static void exynos_pm_prepare(void)
 {
+       exynos_set_delayed_reset_assertion(false);
+
        /* Set wake-up mask registers */
        exynos_pm_set_wakeup_mask();
 
@@ -482,6 +484,7 @@ early_wakeup:
 
        /* Clear SLEEP mode set in INFORM1 */
        pmu_raw_writel(0x0, S5P_INFORM1);
+       exynos_set_delayed_reset_assertion(true);
 }
 
 static void exynos3250_pm_resume(void)
@@ -723,8 +726,10 @@ void __init exynos_pm_init(void)
                return;
        }
 
-       if (WARN_ON(!of_find_property(np, "interrupt-controller", NULL)))
+       if (WARN_ON(!of_find_property(np, "interrupt-controller", NULL))) {
                pr_warn("Outdated DT detected, suspend/resume will NOT work\n");
+               return;
+       }
 
        pm_data = (const struct exynos_pm_data *) match->data;
 
index 38a4526..dd88369 100644 (file)
@@ -12,6 +12,8 @@
 #ifndef __GEMINI_COMMON_H__
 #define __GEMINI_COMMON_H__
 
+#include <linux/reboot.h>
+
 struct mtd_partition;
 
 extern void gemini_map_io(void);
@@ -26,6 +28,6 @@ extern int platform_register_pflash(unsigned int size,
                                    struct mtd_partition *parts,
                                    unsigned int nr_parts);
 
-extern void gemini_restart(char mode, const char *cmd);
+extern void gemini_restart(enum reboot_mode mode, const char *cmd);
 
 #endif /* __GEMINI_COMMON_H__ */
index b266597..21a6d6d 100644 (file)
@@ -14,7 +14,9 @@
 #include <mach/hardware.h>
 #include <mach/global_reg.h>
 
-void gemini_restart(char mode, const char *cmd)
+#include "common.h"
+
+void gemini_restart(enum reboot_mode mode, const char *cmd)
 {
        __raw_writel(RESET_GLOBAL | RESET_CPU1,
                     IO_ADDRESS(GEMINI_GLOBAL_BASE) + GLOBAL_RESET);
index fb8d4a2..a5edd7d 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010 Pengutronix, Wolfram Sang <w.sang@pengutronix.de>
+ * Copyright (C) 2010 Pengutronix, Wolfram Sang <kernel@pengutronix.de>
  *
  * This program is free software; you can redistribute it and/or modify it under
  * the terms of the GNU General Public License version 2 as published by the
index 4d60005..6d0893a 100644 (file)
@@ -280,9 +280,15 @@ void __init imx_gpc_check_dt(void)
        struct device_node *np;
 
        np = of_find_compatible_node(NULL, NULL, "fsl,imx6q-gpc");
-       if (WARN_ON(!np ||
-                   !of_find_property(np, "interrupt-controller", NULL)))
-               pr_warn("Outdated DT detected, system is about to crash!!!\n");
+       if (WARN_ON(!np))
+               return;
+
+       if (WARN_ON(!of_find_property(np, "interrupt-controller", NULL))) {
+               pr_warn("Outdated DT detected, suspend/resume will NOT work\n");
+
+               /* map GPC, so that at least CPUidle and WARs keep working */
+               gpc_base = of_iomap(np, 0);
+       }
 }
 
 #ifdef CONFIG_PM_GENERIC_DOMAINS
@@ -443,6 +449,10 @@ static int imx_gpc_probe(struct platform_device *pdev)
        struct regulator *pu_reg;
        int ret;
 
+       /* bail out if DT too old and doesn't provide the necessary info */
+       if (!of_property_read_bool(pdev->dev.of_node, "#power-domain-cells"))
+               return 0;
+
        pu_reg = devm_regulator_get_optional(&pdev->dev, "pu");
        if (PTR_ERR(pu_reg) == -ENODEV)
                pu_reg = NULL;
index 355b089..752969f 100644 (file)
  */
 #define LINKS_PER_OCP_IF               2
 
+/*
+ * Address offset (in bytes) between the reset control and the reset
+ * status registers: 4 bytes on OMAP4
+ */
+#define OMAP4_RST_CTRL_ST_OFFSET       4
+
 /**
  * struct omap_hwmod_soc_ops - fn ptrs for some SoC-specific operations
  * @enable_module: function to enable a module (via MODULEMODE)
@@ -3016,10 +3022,12 @@ static int _omap4_deassert_hardreset(struct omap_hwmod *oh,
        if (ohri->st_shift)
                pr_err("omap_hwmod: %s: %s: hwmod data error: OMAP4 does not support st_shift\n",
                       oh->name, ohri->name);
-       return omap_prm_deassert_hardreset(ohri->rst_shift, 0,
+       return omap_prm_deassert_hardreset(ohri->rst_shift, ohri->rst_shift,
                                           oh->clkdm->pwrdm.ptr->prcm_partition,
                                           oh->clkdm->pwrdm.ptr->prcm_offs,
-                                          oh->prcm.omap4.rstctrl_offs, 0);
+                                          oh->prcm.omap4.rstctrl_offs,
+                                          oh->prcm.omap4.rstctrl_offs +
+                                          OMAP4_RST_CTRL_ST_OFFSET);
 }
 
 /**
@@ -3048,27 +3056,6 @@ static int _omap4_is_hardreset_asserted(struct omap_hwmod *oh,
 }
 
 /**
- * _am33xx_assert_hardreset - call AM33XX PRM hardreset fn with hwmod args
- * @oh: struct omap_hwmod * to assert hardreset
- * @ohri: hardreset line data
- *
- * Call am33xx_prminst_assert_hardreset() with parameters extracted
- * from the hwmod @oh and the hardreset line data @ohri.  Only
- * intended for use as an soc_ops function pointer.  Passes along the
- * return value from am33xx_prminst_assert_hardreset().  XXX This
- * function is scheduled for removal when the PRM code is moved into
- * drivers/.
- */
-static int _am33xx_assert_hardreset(struct omap_hwmod *oh,
-                                  struct omap_hwmod_rst_info *ohri)
-
-{
-       return omap_prm_assert_hardreset(ohri->rst_shift, 0,
-                                        oh->clkdm->pwrdm.ptr->prcm_offs,
-                                        oh->prcm.omap4.rstctrl_offs);
-}
-
-/**
  * _am33xx_deassert_hardreset - call AM33XX PRM hardreset fn with hwmod args
  * @oh: struct omap_hwmod * to deassert hardreset
  * @ohri: hardreset line data
@@ -3083,32 +3070,13 @@ static int _am33xx_assert_hardreset(struct omap_hwmod *oh,
 static int _am33xx_deassert_hardreset(struct omap_hwmod *oh,
                                     struct omap_hwmod_rst_info *ohri)
 {
-       return omap_prm_deassert_hardreset(ohri->rst_shift, ohri->st_shift, 0,
+       return omap_prm_deassert_hardreset(ohri->rst_shift, ohri->st_shift,
+                                          oh->clkdm->pwrdm.ptr->prcm_partition,
                                           oh->clkdm->pwrdm.ptr->prcm_offs,
                                           oh->prcm.omap4.rstctrl_offs,
                                           oh->prcm.omap4.rstst_offs);
 }
 
-/**
- * _am33xx_is_hardreset_asserted - call AM33XX PRM hardreset fn with hwmod args
- * @oh: struct omap_hwmod * to test hardreset
- * @ohri: hardreset line data
- *
- * Call am33xx_prminst_is_hardreset_asserted() with parameters
- * extracted from the hwmod @oh and the hardreset line data @ohri.
- * Only intended for use as an soc_ops function pointer.  Passes along
- * the return value from am33xx_prminst_is_hardreset_asserted().  XXX
- * This function is scheduled for removal when the PRM code is moved
- * into drivers/.
- */
-static int _am33xx_is_hardreset_asserted(struct omap_hwmod *oh,
-                                       struct omap_hwmod_rst_info *ohri)
-{
-       return omap_prm_is_hardreset_asserted(ohri->rst_shift, 0,
-                                             oh->clkdm->pwrdm.ptr->prcm_offs,
-                                             oh->prcm.omap4.rstctrl_offs);
-}
-
 /* Public functions */
 
 u32 omap_hwmod_read(struct omap_hwmod *oh, u16 reg_offs)
@@ -3908,21 +3876,13 @@ void __init omap_hwmod_init(void)
                soc_ops.init_clkdm = _init_clkdm;
                soc_ops.update_context_lost = _omap4_update_context_lost;
                soc_ops.get_context_lost = _omap4_get_context_lost;
-       } else if (soc_is_am43xx()) {
+       } else if (cpu_is_ti816x() || soc_is_am33xx() || soc_is_am43xx()) {
                soc_ops.enable_module = _omap4_enable_module;
                soc_ops.disable_module = _omap4_disable_module;
                soc_ops.wait_target_ready = _omap4_wait_target_ready;
                soc_ops.assert_hardreset = _omap4_assert_hardreset;
-               soc_ops.deassert_hardreset = _omap4_deassert_hardreset;
-               soc_ops.is_hardreset_asserted = _omap4_is_hardreset_asserted;
-               soc_ops.init_clkdm = _init_clkdm;
-       } else if (cpu_is_ti816x() || soc_is_am33xx()) {
-               soc_ops.enable_module = _omap4_enable_module;
-               soc_ops.disable_module = _omap4_disable_module;
-               soc_ops.wait_target_ready = _omap4_wait_target_ready;
-               soc_ops.assert_hardreset = _am33xx_assert_hardreset;
                soc_ops.deassert_hardreset = _am33xx_deassert_hardreset;
-               soc_ops.is_hardreset_asserted = _am33xx_is_hardreset_asserted;
+               soc_ops.is_hardreset_asserted = _omap4_is_hardreset_asserted;
                soc_ops.init_clkdm = _init_clkdm;
        } else {
                WARN(1, "omap_hwmod: unknown SoC type\n");
index e222314..17e8004 100644 (file)
@@ -544,6 +544,44 @@ static struct omap_hwmod am43xx_hdq1w_hwmod = {
        },
 };
 
+static struct omap_hwmod_class_sysconfig am43xx_vpfe_sysc = {
+       .rev_offs       = 0x0,
+       .sysc_offs      = 0x104,
+       .sysc_flags     = SYSC_HAS_MIDLEMODE | SYSC_HAS_SIDLEMODE,
+       .idlemodes      = (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART |
+                               MSTANDBY_FORCE | MSTANDBY_SMART | MSTANDBY_NO),
+       .sysc_fields    = &omap_hwmod_sysc_type2,
+};
+
+static struct omap_hwmod_class am43xx_vpfe_hwmod_class = {
+       .name           = "vpfe",
+       .sysc           = &am43xx_vpfe_sysc,
+};
+
+static struct omap_hwmod am43xx_vpfe0_hwmod = {
+       .name           = "vpfe0",
+       .class          = &am43xx_vpfe_hwmod_class,
+       .clkdm_name     = "l3s_clkdm",
+       .prcm           = {
+               .omap4  = {
+                       .modulemode     = MODULEMODE_SWCTRL,
+                       .clkctrl_offs   = AM43XX_CM_PER_VPFE0_CLKCTRL_OFFSET,
+               },
+       },
+};
+
+static struct omap_hwmod am43xx_vpfe1_hwmod = {
+       .name           = "vpfe1",
+       .class          = &am43xx_vpfe_hwmod_class,
+       .clkdm_name     = "l3s_clkdm",
+       .prcm           = {
+               .omap4  = {
+                       .modulemode     = MODULEMODE_SWCTRL,
+                       .clkctrl_offs   = AM43XX_CM_PER_VPFE1_CLKCTRL_OFFSET,
+               },
+       },
+};
+
 /* Interfaces */
 static struct omap_hwmod_ocp_if am43xx_l3_main__l4_hs = {
        .master         = &am33xx_l3_main_hwmod,
@@ -825,6 +863,34 @@ static struct omap_hwmod_ocp_if am43xx_l4_ls__hdq1w = {
        .user           = OCP_USER_MPU | OCP_USER_SDMA,
 };
 
+static struct omap_hwmod_ocp_if am43xx_l3__vpfe0 = {
+       .master         = &am43xx_vpfe0_hwmod,
+       .slave          = &am33xx_l3_main_hwmod,
+       .clk            = "l3_gclk",
+       .user           = OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+static struct omap_hwmod_ocp_if am43xx_l3__vpfe1 = {
+       .master         = &am43xx_vpfe1_hwmod,
+       .slave          = &am33xx_l3_main_hwmod,
+       .clk            = "l3_gclk",
+       .user           = OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+static struct omap_hwmod_ocp_if am43xx_l4_ls__vpfe0 = {
+       .master         = &am33xx_l4_ls_hwmod,
+       .slave          = &am43xx_vpfe0_hwmod,
+       .clk            = "l4ls_gclk",
+       .user           = OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+static struct omap_hwmod_ocp_if am43xx_l4_ls__vpfe1 = {
+       .master         = &am33xx_l4_ls_hwmod,
+       .slave          = &am43xx_vpfe1_hwmod,
+       .clk            = "l4ls_gclk",
+       .user           = OCP_USER_MPU | OCP_USER_SDMA,
+};
+
 static struct omap_hwmod_ocp_if *am43xx_hwmod_ocp_ifs[] __initdata = {
        &am33xx_l4_wkup__synctimer,
        &am43xx_l4_ls__timer8,
@@ -925,6 +991,10 @@ static struct omap_hwmod_ocp_if *am43xx_hwmod_ocp_ifs[] __initdata = {
        &am43xx_l4_ls__dss_dispc,
        &am43xx_l4_ls__dss_rfbi,
        &am43xx_l4_ls__hdq1w,
+       &am43xx_l3__vpfe0,
+       &am43xx_l3__vpfe1,
+       &am43xx_l4_ls__vpfe0,
+       &am43xx_l4_ls__vpfe1,
        NULL,
 };
 
index 48df3b5..d026199 100644 (file)
 #define AM43XX_CM_PER_USBPHYOCP2SCP1_CLKCTRL_OFFSET    0x05C0
 #define AM43XX_CM_PER_DSS_CLKCTRL_OFFSET               0x0a20
 #define AM43XX_CM_PER_HDQ1W_CLKCTRL_OFFSET             0x04a0
-
+#define AM43XX_CM_PER_VPFE0_CLKCTRL_OFFSET             0x0068
+#define AM43XX_CM_PER_VPFE1_CLKCTRL_OFFSET             0x0070
 #endif
index cbefbd7..661d753 100644 (file)
 #define OMAP3430_VC_CMD_ONLP_SHIFT                     16
 #define OMAP3430_VC_CMD_RET_SHIFT                      8
 #define OMAP3430_VC_CMD_OFF_SHIFT                      0
+#define OMAP3430_SREN_MASK                             (1 << 4)
 #define OMAP3430_HSEN_MASK                             (1 << 3)
 #define OMAP3430_MCODE_MASK                            (0x7 << 0)
 #define OMAP3430_VALID_MASK                            (1 << 24)
index b1c7a33..e794828 100644 (file)
@@ -35,6 +35,7 @@
 #define OMAP4430_GLOBAL_WARM_SW_RST_SHIFT                              1
 #define OMAP4430_GLOBAL_WUEN_MASK                                      (1 << 16)
 #define OMAP4430_HSMCODE_MASK                                          (0x7 << 0)
+#define OMAP4430_SRMODEEN_MASK                                         (1 << 4)
 #define OMAP4430_HSMODEEN_MASK                                         (1 << 3)
 #define OMAP4430_HSSCLL_SHIFT                                          24
 #define OMAP4430_ICEPICK_RST_SHIFT                                     9
index c4859c4..d0b15db 100644 (file)
@@ -87,12 +87,6 @@ u32 omap4_prminst_rmw_inst_reg_bits(u32 mask, u32 bits, u8 part, s16 inst,
        return v;
 }
 
-/*
- * Address offset (in bytes) between the reset control and the reset
- * status registers: 4 bytes on OMAP4
- */
-#define OMAP4_RST_CTRL_ST_OFFSET               4
-
 /**
  * omap4_prminst_is_hardreset_asserted - read the HW reset line state of
  * submodules contained in the hwmod module
@@ -141,11 +135,11 @@ int omap4_prminst_assert_hardreset(u8 shift, u8 part, s16 inst,
  * omap4_prminst_deassert_hardreset - deassert a submodule hardreset line and
  * wait
  * @shift: register bit shift corresponding to the reset line to deassert
- * @st_shift: status bit offset, not used for OMAP4+
+ * @st_shift: status bit offset corresponding to the reset line
  * @part: PRM partition
  * @inst: PRM instance offset
  * @rstctrl_offs: reset register offset
- * @st_offs: reset status register offset, not used for OMAP4+
+ * @rstst_offs: reset status register offset
  *
  * Some IPs like dsp, ipu or iva contain processors that require an HW
  * reset line to be asserted / deasserted in order to fully enable the
@@ -157,11 +151,11 @@ int omap4_prminst_assert_hardreset(u8 shift, u8 part, s16 inst,
  * of reset, or -EBUSY if the submodule did not exit reset promptly.
  */
 int omap4_prminst_deassert_hardreset(u8 shift, u8 st_shift, u8 part, s16 inst,
-                                    u16 rstctrl_offs, u16 st_offs)
+                                    u16 rstctrl_offs, u16 rstst_offs)
 {
        int c;
        u32 mask = 1 << shift;
-       u16 rstst_offs = rstctrl_offs + OMAP4_RST_CTRL_ST_OFFSET;
+       u32 st_mask = 1 << st_shift;
 
        /* Check the current status to avoid de-asserting the line twice */
        if (omap4_prminst_is_hardreset_asserted(shift, part, inst,
@@ -169,13 +163,13 @@ int omap4_prminst_deassert_hardreset(u8 shift, u8 st_shift, u8 part, s16 inst,
                return -EEXIST;
 
        /* Clear the reset status by writing 1 to the status bit */
-       omap4_prminst_rmw_inst_reg_bits(0xffffffff, mask, part, inst,
+       omap4_prminst_rmw_inst_reg_bits(0xffffffff, st_mask, part, inst,
                                        rstst_offs);
        /* de-assert the reset control line */
        omap4_prminst_rmw_inst_reg_bits(mask, 0, part, inst, rstctrl_offs);
        /* wait the status to be set */
-       omap_test_timeout(omap4_prminst_is_hardreset_asserted(shift, part, inst,
-                                                             rstst_offs),
+       omap_test_timeout(omap4_prminst_is_hardreset_asserted(st_shift, part,
+                                                             inst, rstst_offs),
                          MAX_MODULE_HARDRESET_WAIT, c);
 
        return (c == MAX_MODULE_HARDRESET_WAIT) ? -EBUSY : 0;
index d1dedc8..eafd120 100644 (file)
@@ -203,23 +203,8 @@ save_context_wfi:
         */
        ldr     r1, kernel_flush
        blx     r1
-       /*
-        * The kernel doesn't interwork: v7_flush_dcache_all in particluar will
-        * always return in Thumb state when CONFIG_THUMB2_KERNEL is enabled.
-        * This sequence switches back to ARM.  Note that .align may insert a
-        * nop: bx pc needs to be word-aligned in order to work.
-        */
- THUMB(        .thumb          )
- THUMB(        .align          )
- THUMB(        bx      pc      )
- THUMB(        nop             )
-       .arm
-
        b       omap3_do_wfi
-
-/*
- * Local variables
- */
+ENDPROC(omap34xx_cpu_suspend)
 omap3_do_wfi_sram_addr:
        .word omap3_do_wfi_sram
 kernel_flush:
@@ -364,10 +349,7 @@ exit_nonoff_modes:
  * ===================================
  */
        ldmfd   sp!, {r4 - r11, pc}     @ restore regs and return
-
-/*
- * Local variables
- */
+ENDPROC(omap3_do_wfi)
 sdrc_power:
        .word   SDRC_POWER_V
 cm_idlest1_core:
index cef67af..cac46d8 100644 (file)
@@ -298,14 +298,11 @@ static int __init omap_dm_timer_init_one(struct omap_dm_timer *timer,
        if (IS_ERR(src))
                return PTR_ERR(src);
 
-       if (clk_get_parent(timer->fclk) != src) {
-               r = clk_set_parent(timer->fclk, src);
-               if (r < 0) {
-                       pr_warn("%s: %s cannot set source\n", __func__,
-                               oh->name);
-                       clk_put(src);
-                       return r;
-               }
+       r = clk_set_parent(timer->fclk, src);
+       if (r < 0) {
+               pr_warn("%s: %s cannot set source\n", __func__, oh->name);
+               clk_put(src);
+               return r;
        }
 
        clk_put(src);
index be9ef83..076fd20 100644 (file)
@@ -316,7 +316,8 @@ static void __init omap3_vc_init_pmic_signaling(struct voltagedomain *voltdm)
         * idle. And we can also scale voltages to zero for off-idle.
         * Note that no actual voltage scaling during off-idle will
         * happen unless the board specific twl4030 PMIC scripts are
-        * loaded.
+        * loaded. See also omap_vc_i2c_init for comments regarding
+        * erratum i531.
         */
        val = voltdm->read(OMAP3_PRM_VOLTCTRL_OFFSET);
        if (!(val & OMAP3430_PRM_VOLTCTRL_SEL_OFF)) {
@@ -704,9 +705,16 @@ static void __init omap_vc_i2c_init(struct voltagedomain *voltdm)
                return;
        }
 
+       /*
+        * Note that for omap3 OMAP3430_SREN_MASK clears SREN to work around
+        * erratum i531 "Extra Power Consumed When Repeated Start Operation
+        * Mode Is Enabled on I2C Interface Dedicated for Smart Reflex (I2C4)".
+        * Otherwise I2C4 eventually leads into about 23mW extra power being
+        * consumed even during off idle using VMODE.
+        */
        i2c_high_speed = voltdm->pmic->i2c_high_speed;
        if (i2c_high_speed)
-               voltdm->rmw(vc->common->i2c_cfg_hsen_mask,
+               voltdm->rmw(vc->common->i2c_cfg_clear_mask,
                            vc->common->i2c_cfg_hsen_mask,
                            vc->common->i2c_cfg_reg);
 
index cdbdd78..89b83b7 100644 (file)
@@ -34,6 +34,7 @@ struct voltagedomain;
  * @cmd_ret_shift: RET field shift in PRM_VC_CMD_VAL_* register
  * @cmd_off_shift: OFF field shift in PRM_VC_CMD_VAL_* register
  * @i2c_cfg_reg: I2C configuration register offset
+ * @i2c_cfg_clear_mask: high-speed mode bit clear mask in I2C config register
  * @i2c_cfg_hsen_mask: high-speed mode bit field mask in I2C config register
  * @i2c_mcode_mask: MCODE field mask for I2C config register
  *
@@ -52,6 +53,7 @@ struct omap_vc_common {
        u8 cmd_ret_shift;
        u8 cmd_off_shift;
        u8 i2c_cfg_reg;
+       u8 i2c_cfg_clear_mask;
        u8 i2c_cfg_hsen_mask;
        u8 i2c_mcode_mask;
 };
index 75bc4aa..71d74c9 100644 (file)
@@ -40,6 +40,7 @@ static struct omap_vc_common omap3_vc_common = {
        .cmd_onlp_shift  = OMAP3430_VC_CMD_ONLP_SHIFT,
        .cmd_ret_shift   = OMAP3430_VC_CMD_RET_SHIFT,
        .cmd_off_shift   = OMAP3430_VC_CMD_OFF_SHIFT,
+       .i2c_cfg_clear_mask = OMAP3430_SREN_MASK | OMAP3430_HSEN_MASK,
        .i2c_cfg_hsen_mask = OMAP3430_HSEN_MASK,
        .i2c_cfg_reg     = OMAP3_PRM_VC_I2C_CFG_OFFSET,
        .i2c_mcode_mask  = OMAP3430_MCODE_MASK,
index 085e5d6..2abd5fa 100644 (file)
@@ -42,6 +42,7 @@ static const struct omap_vc_common omap4_vc_common = {
        .cmd_ret_shift = OMAP4430_RET_SHIFT,
        .cmd_off_shift = OMAP4430_OFF_SHIFT,
        .i2c_cfg_reg = OMAP4_PRM_VC_CFG_I2C_MODE_OFFSET,
+       .i2c_cfg_clear_mask = OMAP4430_SRMODEEN_MASK | OMAP4430_HSMODEEN_MASK,
        .i2c_cfg_hsen_mask = OMAP4430_HSMODEEN_MASK,
        .i2c_mcode_mask  = OMAP4430_HSMCODE_MASK,
 };
index 8896e71..f096836 100644 (file)
@@ -691,4 +691,13 @@ config SHARPSL_PM_MAX1111
 config PXA310_ULPI
        bool
 
+config PXA_SYSTEMS_CPLDS
+       tristate "Motherboard cplds"
+       default ARCH_LUBBOCK || MACH_MAINSTONE
+       help
+         This driver supports the Lubbock and Mainstone multifunction chip
+         found on the pxa25x development platform system (Lubbock) and pxa27x
+         development platform system (Mainstone). This IO board supports the
+         interrupts handling, ethernet controller, flash chips, etc ...
+
 endif
index eb0bf76..4087d33 100644 (file)
@@ -90,4 +90,5 @@ obj-$(CONFIG_MACH_RAUMFELD_CONNECTOR) += raumfeld.o
 obj-$(CONFIG_MACH_RAUMFELD_SPEAKER)    += raumfeld.o
 obj-$(CONFIG_MACH_ZIPIT2)      += z2.o
 
+obj-$(CONFIG_PXA_SYSTEMS_CPLDS)        += pxa_cplds_irqs.o
 obj-$(CONFIG_TOSA_BT)          += tosa-bt.o
index 958cd6a..1eecf79 100644 (file)
@@ -37,7 +37,9 @@
 #define LUB_GP                 __LUB_REG(LUBBOCK_FPGA_PHYS + 0x100)
 
 /* Board specific IRQs */
-#define LUBBOCK_IRQ(x)         (IRQ_BOARD_START + (x))
+#define LUBBOCK_NR_IRQS                IRQ_BOARD_START
+
+#define LUBBOCK_IRQ(x)         (LUBBOCK_NR_IRQS + (x))
 #define LUBBOCK_SD_IRQ         LUBBOCK_IRQ(0)
 #define LUBBOCK_SA1111_IRQ     LUBBOCK_IRQ(1)
 #define LUBBOCK_USB_IRQ                LUBBOCK_IRQ(2)  /* usb connect */
@@ -47,8 +49,7 @@
 #define LUBBOCK_USB_DISC_IRQ   LUBBOCK_IRQ(6)  /* usb disconnect */
 #define LUBBOCK_LAST_IRQ       LUBBOCK_IRQ(6)
 
-#define LUBBOCK_SA1111_IRQ_BASE        (IRQ_BOARD_START + 16)
-#define LUBBOCK_NR_IRQS                (IRQ_BOARD_START + 16 + 55)
+#define LUBBOCK_SA1111_IRQ_BASE        (LUBBOCK_NR_IRQS + 32)
 
 #ifndef __ASSEMBLY__
 extern void lubbock_set_misc_wr(unsigned int mask, unsigned int set);
index 1bfc4e8..e82a7d3 100644 (file)
 #define MST_PCMCIA_PWR_VCC_50   0x4       /* voltage VCC = 5.0V */
 
 /* board specific IRQs */
-#define MAINSTONE_IRQ(x)       (IRQ_BOARD_START + (x))
+#define MAINSTONE_NR_IRQS      IRQ_BOARD_START
+
+#define MAINSTONE_IRQ(x)       (MAINSTONE_NR_IRQS + (x))
 #define MAINSTONE_MMC_IRQ      MAINSTONE_IRQ(0)
 #define MAINSTONE_USIM_IRQ     MAINSTONE_IRQ(1)
 #define MAINSTONE_USBC_IRQ     MAINSTONE_IRQ(2)
 #define MAINSTONE_S1_STSCHG_IRQ        MAINSTONE_IRQ(14)
 #define MAINSTONE_S1_IRQ       MAINSTONE_IRQ(15)
 
-#define MAINSTONE_NR_IRQS      (IRQ_BOARD_START + 16)
-
 #endif
index d8a1be6..4ac9ab8 100644 (file)
@@ -12,6 +12,7 @@
  *  published by the Free Software Foundation.
  */
 #include <linux/gpio.h>
+#include <linux/gpio/machine.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
@@ -123,84 +124,6 @@ void lubbock_set_misc_wr(unsigned int mask, unsigned int set)
 }
 EXPORT_SYMBOL(lubbock_set_misc_wr);
 
-static unsigned long lubbock_irq_enabled;
-
-static void lubbock_mask_irq(struct irq_data *d)
-{
-       int lubbock_irq = (d->irq - LUBBOCK_IRQ(0));
-       LUB_IRQ_MASK_EN = (lubbock_irq_enabled &= ~(1 << lubbock_irq));
-}
-
-static void lubbock_unmask_irq(struct irq_data *d)
-{
-       int lubbock_irq = (d->irq - LUBBOCK_IRQ(0));
-       /* the irq can be acknowledged only if deasserted, so it's done here */
-       LUB_IRQ_SET_CLR &= ~(1 << lubbock_irq);
-       LUB_IRQ_MASK_EN = (lubbock_irq_enabled |= (1 << lubbock_irq));
-}
-
-static struct irq_chip lubbock_irq_chip = {
-       .name           = "FPGA",
-       .irq_ack        = lubbock_mask_irq,
-       .irq_mask       = lubbock_mask_irq,
-       .irq_unmask     = lubbock_unmask_irq,
-};
-
-static void lubbock_irq_handler(unsigned int irq, struct irq_desc *desc)
-{
-       unsigned long pending = LUB_IRQ_SET_CLR & lubbock_irq_enabled;
-       do {
-               /* clear our parent irq */
-               desc->irq_data.chip->irq_ack(&desc->irq_data);
-               if (likely(pending)) {
-                       irq = LUBBOCK_IRQ(0) + __ffs(pending);
-                       generic_handle_irq(irq);
-               }
-               pending = LUB_IRQ_SET_CLR & lubbock_irq_enabled;
-       } while (pending);
-}
-
-static void __init lubbock_init_irq(void)
-{
-       int irq;
-
-       pxa25x_init_irq();
-
-       /* setup extra lubbock irqs */
-       for (irq = LUBBOCK_IRQ(0); irq <= LUBBOCK_LAST_IRQ; irq++) {
-               irq_set_chip_and_handler(irq, &lubbock_irq_chip,
-                                        handle_level_irq);
-               set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
-       }
-
-       irq_set_chained_handler(PXA_GPIO_TO_IRQ(0), lubbock_irq_handler);
-       irq_set_irq_type(PXA_GPIO_TO_IRQ(0), IRQ_TYPE_EDGE_FALLING);
-}
-
-#ifdef CONFIG_PM
-
-static void lubbock_irq_resume(void)
-{
-       LUB_IRQ_MASK_EN = lubbock_irq_enabled;
-}
-
-static struct syscore_ops lubbock_irq_syscore_ops = {
-       .resume = lubbock_irq_resume,
-};
-
-static int __init lubbock_irq_device_init(void)
-{
-       if (machine_is_lubbock()) {
-               register_syscore_ops(&lubbock_irq_syscore_ops);
-               return 0;
-       }
-       return -ENODEV;
-}
-
-device_initcall(lubbock_irq_device_init);
-
-#endif
-
 static int lubbock_udc_is_connected(void)
 {
        return (LUB_MISC_RD & (1 << 9)) == 0;
@@ -383,11 +306,38 @@ static struct platform_device lubbock_flash_device[2] = {
        },
 };
 
+static struct resource lubbock_cplds_resources[] = {
+       [0] = {
+               .start  = LUBBOCK_FPGA_PHYS + 0xc0,
+               .end    = LUBBOCK_FPGA_PHYS + 0xe0 - 1,
+               .flags  = IORESOURCE_MEM,
+       },
+       [1] = {
+               .start  = PXA_GPIO_TO_IRQ(0),
+               .end    = PXA_GPIO_TO_IRQ(0),
+               .flags  = IORESOURCE_IRQ | IORESOURCE_IRQ_LOWEDGE,
+       },
+       [2] = {
+               .start  = LUBBOCK_IRQ(0),
+               .end    = LUBBOCK_IRQ(6),
+               .flags  = IORESOURCE_IRQ,
+       },
+};
+
+static struct platform_device lubbock_cplds_device = {
+       .name           = "pxa_cplds_irqs",
+       .id             = -1,
+       .resource       = &lubbock_cplds_resources[0],
+       .num_resources  = 3,
+};
+
+
 static struct platform_device *devices[] __initdata = {
        &sa1111_device,
        &smc91x_device,
        &lubbock_flash_device[0],
        &lubbock_flash_device[1],
+       &lubbock_cplds_device,
 };
 
 static struct pxafb_mode_info sharp_lm8v31_mode = {
@@ -648,7 +598,7 @@ MACHINE_START(LUBBOCK, "Intel DBPXA250 Development Platform (aka Lubbock)")
        /* Maintainer: MontaVista Software Inc. */
        .map_io         = lubbock_map_io,
        .nr_irqs        = LUBBOCK_NR_IRQS,
-       .init_irq       = lubbock_init_irq,
+       .init_irq       = pxa25x_init_irq,
        .handle_irq     = pxa25x_handle_irq,
        .init_time      = pxa_timer_init,
        .init_machine   = lubbock_init,
index 78b84c0..2c0658c 100644 (file)
@@ -13,6 +13,7 @@
  *  published by the Free Software Foundation.
  */
 #include <linux/gpio.h>
+#include <linux/gpio/machine.h>
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/syscore_ops.h>
@@ -122,92 +123,6 @@ static unsigned long mainstone_pin_config[] = {
        GPIO1_GPIO | WAKEUP_ON_EDGE_BOTH,
 };
 
-static unsigned long mainstone_irq_enabled;
-
-static void mainstone_mask_irq(struct irq_data *d)
-{
-       int mainstone_irq = (d->irq - MAINSTONE_IRQ(0));
-       MST_INTMSKENA = (mainstone_irq_enabled &= ~(1 << mainstone_irq));
-}
-
-static void mainstone_unmask_irq(struct irq_data *d)
-{
-       int mainstone_irq = (d->irq - MAINSTONE_IRQ(0));
-       /* the irq can be acknowledged only if deasserted, so it's done here */
-       MST_INTSETCLR &= ~(1 << mainstone_irq);
-       MST_INTMSKENA = (mainstone_irq_enabled |= (1 << mainstone_irq));
-}
-
-static struct irq_chip mainstone_irq_chip = {
-       .name           = "FPGA",
-       .irq_ack        = mainstone_mask_irq,
-       .irq_mask       = mainstone_mask_irq,
-       .irq_unmask     = mainstone_unmask_irq,
-};
-
-static void mainstone_irq_handler(unsigned int irq, struct irq_desc *desc)
-{
-       unsigned long pending = MST_INTSETCLR & mainstone_irq_enabled;
-       do {
-               /* clear useless edge notification */
-               desc->irq_data.chip->irq_ack(&desc->irq_data);
-               if (likely(pending)) {
-                       irq = MAINSTONE_IRQ(0) + __ffs(pending);
-                       generic_handle_irq(irq);
-               }
-               pending = MST_INTSETCLR & mainstone_irq_enabled;
-       } while (pending);
-}
-
-static void __init mainstone_init_irq(void)
-{
-       int irq;
-
-       pxa27x_init_irq();
-
-       /* setup extra Mainstone irqs */
-       for(irq = MAINSTONE_IRQ(0); irq <= MAINSTONE_IRQ(15); irq++) {
-               irq_set_chip_and_handler(irq, &mainstone_irq_chip,
-                                        handle_level_irq);
-               if (irq == MAINSTONE_IRQ(10) || irq == MAINSTONE_IRQ(14))
-                       set_irq_flags(irq, IRQF_VALID | IRQF_PROBE | IRQF_NOAUTOEN);
-               else
-                       set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
-       }
-       set_irq_flags(MAINSTONE_IRQ(8), 0);
-       set_irq_flags(MAINSTONE_IRQ(12), 0);
-
-       MST_INTMSKENA = 0;
-       MST_INTSETCLR = 0;
-
-       irq_set_chained_handler(PXA_GPIO_TO_IRQ(0), mainstone_irq_handler);
-       irq_set_irq_type(PXA_GPIO_TO_IRQ(0), IRQ_TYPE_EDGE_FALLING);
-}
-
-#ifdef CONFIG_PM
-
-static void mainstone_irq_resume(void)
-{
-       MST_INTMSKENA = mainstone_irq_enabled;
-}
-
-static struct syscore_ops mainstone_irq_syscore_ops = {
-       .resume = mainstone_irq_resume,
-};
-
-static int __init mainstone_irq_device_init(void)
-{
-       if (machine_is_mainstone())
-               register_syscore_ops(&mainstone_irq_syscore_ops);
-
-       return 0;
-}
-
-device_initcall(mainstone_irq_device_init);
-
-#endif
-
-
 static struct resource smc91x_resources[] = {
        [0] = {
                .start  = (MST_ETH_PHYS + 0x300),
@@ -487,11 +402,37 @@ static struct platform_device mst_gpio_keys_device = {
        },
 };
 
+static struct resource mst_cplds_resources[] = {
+       [0] = {
+               .start  = MST_FPGA_PHYS + 0xc0,
+               .end    = MST_FPGA_PHYS + 0xe0 - 1,
+               .flags  = IORESOURCE_MEM,
+       },
+       [1] = {
+               .start  = PXA_GPIO_TO_IRQ(0),
+               .end    = PXA_GPIO_TO_IRQ(0),
+               .flags  = IORESOURCE_IRQ | IORESOURCE_IRQ_LOWEDGE,
+       },
+       [2] = {
+               .start  = MAINSTONE_IRQ(0),
+               .end    = MAINSTONE_IRQ(15),
+               .flags  = IORESOURCE_IRQ,
+       },
+};
+
+static struct platform_device mst_cplds_device = {
+       .name           = "pxa_cplds_irqs",
+       .id             = -1,
+       .resource       = &mst_cplds_resources[0],
+       .num_resources  = 3,
+};
+
 static struct platform_device *platform_devices[] __initdata = {
        &smc91x_device,
        &mst_flash_device[0],
        &mst_flash_device[1],
        &mst_gpio_keys_device,
+       &mst_cplds_device,
 };
 
 static struct pxaohci_platform_data mainstone_ohci_platform_data = {
@@ -718,7 +659,7 @@ MACHINE_START(MAINSTONE, "Intel HCDDBBVA0 Development Platform (aka Mainstone)")
        .atag_offset    = 0x100,        /* BLOB boot parameter setting */
        .map_io         = mainstone_map_io,
        .nr_irqs        = MAINSTONE_NR_IRQS,
-       .init_irq       = mainstone_init_irq,
+       .init_irq       = pxa27x_init_irq,
        .handle_irq     = pxa27x_handle_irq,
        .init_time      = pxa_timer_init,
        .init_machine   = mainstone_init,
diff --git a/arch/arm/mach-pxa/pxa_cplds_irqs.c b/arch/arm/mach-pxa/pxa_cplds_irqs.c
new file mode 100644 (file)
index 0000000..2385052
--- /dev/null
@@ -0,0 +1,200 @@
+/*
+ * Intel Reference Systems cplds
+ *
+ * Copyright (C) 2014 Robert Jarzmik
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Cplds motherboard driver, supporting lubbock and mainstone SoC board.
+ */
+
+#include <linux/bitops.h>
+#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/mfd/core.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+
+#define FPGA_IRQ_MASK_EN 0x0
+#define FPGA_IRQ_SET_CLR 0x10
+
+#define CPLDS_NB_IRQ   32
+
+struct cplds {
+       void __iomem *base;
+       int irq;
+       unsigned int irq_mask;
+       struct gpio_desc *gpio0;
+       struct irq_domain *irqdomain;
+};
+
+static irqreturn_t cplds_irq_handler(int in_irq, void *d)
+{
+       struct cplds *fpga = d;
+       unsigned long pending;
+       unsigned int bit;
+
+       pending = readl(fpga->base + FPGA_IRQ_SET_CLR) & fpga->irq_mask;
+       for_each_set_bit(bit, &pending, CPLDS_NB_IRQ)
+               generic_handle_irq(irq_find_mapping(fpga->irqdomain, bit));
+
+       return IRQ_HANDLED;
+}
+
+static void cplds_irq_mask_ack(struct irq_data *d)
+{
+       struct cplds *fpga = irq_data_get_irq_chip_data(d);
+       unsigned int cplds_irq = irqd_to_hwirq(d);
+       unsigned int set, bit = BIT(cplds_irq);
+
+       fpga->irq_mask &= ~bit;
+       writel(fpga->irq_mask, fpga->base + FPGA_IRQ_MASK_EN);
+       set = readl(fpga->base + FPGA_IRQ_SET_CLR);
+       writel(set & ~bit, fpga->base + FPGA_IRQ_SET_CLR);
+}
+
+static void cplds_irq_unmask(struct irq_data *d)
+{
+       struct cplds *fpga = irq_data_get_irq_chip_data(d);
+       unsigned int cplds_irq = irqd_to_hwirq(d);
+       unsigned int bit = BIT(cplds_irq);
+
+       fpga->irq_mask |= bit;
+       writel(fpga->irq_mask, fpga->base + FPGA_IRQ_MASK_EN);
+}
+
+static struct irq_chip cplds_irq_chip = {
+       .name           = "pxa_cplds",
+       .irq_mask_ack   = cplds_irq_mask_ack,
+       .irq_unmask     = cplds_irq_unmask,
+       .flags          = IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_SKIP_SET_WAKE,
+};
+
+static int cplds_irq_domain_map(struct irq_domain *d, unsigned int irq,
+                                  irq_hw_number_t hwirq)
+{
+       struct cplds *fpga = d->host_data;
+
+       irq_set_chip_and_handler(irq, &cplds_irq_chip, handle_level_irq);
+       irq_set_chip_data(irq, fpga);
+
+       return 0;
+}
+
+static const struct irq_domain_ops cplds_irq_domain_ops = {
+       .xlate = irq_domain_xlate_twocell,
+       .map = cplds_irq_domain_map,
+};
+
+static int cplds_resume(struct platform_device *pdev)
+{
+       struct cplds *fpga = platform_get_drvdata(pdev);
+
+       writel(fpga->irq_mask, fpga->base + FPGA_IRQ_MASK_EN);
+
+       return 0;
+}
+
+static int cplds_probe(struct platform_device *pdev)
+{
+       struct resource *res;
+       struct cplds *fpga;
+       int ret;
+       int base_irq;
+       unsigned long irqflags = 0;
+
+       fpga = devm_kzalloc(&pdev->dev, sizeof(*fpga), GFP_KERNEL);
+       if (!fpga)
+               return -ENOMEM;
+
+       res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+       if (res) {
+               fpga->irq = (unsigned int)res->start;
+               irqflags = res->flags;
+       }
+       if (!fpga->irq)
+               return -ENODEV;
+
+       base_irq = platform_get_irq(pdev, 1);
+       if (base_irq < 0)
+               base_irq = 0;
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       fpga->base = devm_ioremap_resource(&pdev->dev, res);
+       if (IS_ERR(fpga->base))
+               return PTR_ERR(fpga->base);
+
+       platform_set_drvdata(pdev, fpga);
+
+       writel(fpga->irq_mask, fpga->base + FPGA_IRQ_MASK_EN);
+       writel(0, fpga->base + FPGA_IRQ_SET_CLR);
+
+       ret = devm_request_irq(&pdev->dev, fpga->irq, cplds_irq_handler,
+                              irqflags, dev_name(&pdev->dev), fpga);
+       if (ret == -ENOSYS)
+               return -EPROBE_DEFER;
+
+       if (ret) {
+               dev_err(&pdev->dev, "couldn't request main irq%d: %d\n",
+                       fpga->irq, ret);
+               return ret;
+       }
+
+       irq_set_irq_wake(fpga->irq, 1);
+       fpga->irqdomain = irq_domain_add_linear(pdev->dev.of_node,
+                                              CPLDS_NB_IRQ,
+                                              &cplds_irq_domain_ops, fpga);
+       if (!fpga->irqdomain)
+               return -ENODEV;
+
+       if (base_irq) {
+               ret = irq_create_strict_mappings(fpga->irqdomain, base_irq, 0,
+                                                CPLDS_NB_IRQ);
+               if (ret) {
+                       dev_err(&pdev->dev, "couldn't create the irq mapping %d..%d\n",
+                               base_irq, base_irq + CPLDS_NB_IRQ);
+                       return ret;
+               }
+       }
+
+       return 0;
+}
+
+static int cplds_remove(struct platform_device *pdev)
+{
+       struct cplds *fpga = platform_get_drvdata(pdev);
+
+       irq_set_chip_and_handler(fpga->irq, NULL, NULL);
+
+       return 0;
+}
+
+static const struct of_device_id cplds_id_table[] = {
+       { .compatible = "intel,lubbock-cplds-irqs", },
+       { .compatible = "intel,mainstone-cplds-irqs", },
+       { }
+};
+MODULE_DEVICE_TABLE(of, cplds_id_table);
+
+static struct platform_driver cplds_driver = {
+       .driver         = {
+               .name   = "pxa_cplds_irqs",
+               .of_match_table = of_match_ptr(cplds_id_table),
+       },
+       .probe          = cplds_probe,
+       .remove         = cplds_remove,
+       .resume         = cplds_resume,
+};
+
+module_platform_driver(cplds_driver);
+
+MODULE_DESCRIPTION("PXA Cplds interrupts driver");
+MODULE_AUTHOR("Robert Jarzmik <robert.jarzmik@free.fr>");
+MODULE_LICENSE("GPL");
index b07d886..b0dcbe2 100644 (file)
@@ -83,6 +83,13 @@ static void rk3288_slp_mode_set(int level)
                     SGRF_PCLK_WDT_GATE | SGRF_FAST_BOOT_EN
                     | SGRF_PCLK_WDT_GATE_WRITE | SGRF_FAST_BOOT_EN_WRITE);
 
+       /*
+        * The dapswjdp can not auto reset before resume, that cause it may
+        * access some illegal address during resume. Let's disable it before
+        * suspend, and the MASKROM will enable it back.
+        */
+       regmap_write(sgrf_regmap, RK3288_SGRF_CPU_CON0, SGRF_DAPDEVICEEN_WRITE);
+
        /* booting address of resuming system is from this register value */
        regmap_write(sgrf_regmap, RK3288_SGRF_FAST_BOOT_ADDR,
                     rk3288_bootram_phy);
index 03ff31d..3e8d39c 100644 (file)
@@ -55,6 +55,10 @@ static inline void rockchip_suspend_init(void)
 #define SGRF_FAST_BOOT_EN              BIT(8)
 #define SGRF_FAST_BOOT_EN_WRITE                BIT(24)
 
+#define RK3288_SGRF_CPU_CON0           (0x40)
+#define SGRF_DAPDEVICEEN               BIT(0)
+#define SGRF_DAPDEVICEEN_WRITE         BIT(16)
+
 #define RK3288_CRU_MODE_CON            0x50
 #define RK3288_CRU_SEL0_CON            0x60
 #define RK3288_CRU_SEL1_CON            0x64
index d360ec0..b6cf3b4 100644 (file)
 #include "pm.h"
 
 #define RK3288_GRF_SOC_CON0 0x244
+#define RK3288_TIMER6_7_PHYS 0xff810000
 
 static void __init rockchip_timer_init(void)
 {
        if (of_machine_is_compatible("rockchip,rk3288")) {
                struct regmap *grf;
+               void __iomem *reg_base;
+
+               /*
+                * Most/all uboot versions for rk3288 don't enable timer7
+                * which is needed for the architected timer to work.
+                * So make sure it is running during early boot.
+                */
+               reg_base = ioremap(RK3288_TIMER6_7_PHYS, SZ_16K);
+               if (reg_base) {
+                       writel(0, reg_base + 0x30);
+                       writel(0xffffffff, reg_base + 0x20);
+                       writel(0xffffffff, reg_base + 0x24);
+                       writel(1, reg_base + 0x30);
+                       dsb();
+                       iounmap(reg_base);
+               } else {
+                       pr_err("rockchip: could not map timer7 registers\n");
+               }
 
                /*
                 * Disable auto jtag/sdmmc switching that causes issues
index 09c5fe3..7e7583d 100644 (file)
@@ -1878,7 +1878,7 @@ struct dma_map_ops iommu_coherent_ops = {
  * arm_iommu_attach_device function.
  */
 struct dma_iommu_mapping *
-arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size)
+arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, u64 size)
 {
        unsigned int bits = size >> PAGE_SHIFT;
        unsigned int bitmap_size = BITS_TO_LONGS(bits) * sizeof(long);
@@ -1886,6 +1886,10 @@ arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size)
        int extensions = 1;
        int err = -ENOMEM;
 
+       /* currently only 32-bit DMA address space is supported */
+       if (size > DMA_BIT_MASK(32) + 1)
+               return ERR_PTR(-ERANGE);
+
        if (!bitmap_size)
                return ERR_PTR(-EINVAL);
 
@@ -2057,13 +2061,6 @@ static bool arm_setup_iommu_dma_ops(struct device *dev, u64 dma_base, u64 size,
        if (!iommu)
                return false;
 
-       /*
-        * currently arm_iommu_create_mapping() takes a max of size_t
-        * for size param. So check this limit for now.
-        */
-       if (size > SIZE_MAX)
-               return false;
-
        mapping = arm_iommu_create_mapping(dev->bus, dma_base, size);
        if (IS_ERR(mapping)) {
                pr_warn("Failed to create %llu-byte IOMMU mapping for device %s\n",
index 6333d9c..0d629b8 100644 (file)
@@ -276,7 +276,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
         * If we're in an interrupt or have no user
         * context, we must not take the fault..
         */
-       if (in_atomic() || !mm)
+       if (faulthandler_disabled() || !mm)
                goto no_context;
 
        if (user_mode(regs))
index b98895d..ee8dfa7 100644 (file)
@@ -59,6 +59,7 @@ void *kmap_atomic(struct page *page)
        void *kmap;
        int type;
 
+       preempt_disable();
        pagefault_disable();
        if (!PageHighMem(page))
                return page_address(page);
@@ -121,6 +122,7 @@ void __kunmap_atomic(void *kvaddr)
                kunmap_high(pte_page(pkmap_page_table[PKMAP_NR(vaddr)]));
        }
        pagefault_enable();
+       preempt_enable();
 }
 EXPORT_SYMBOL(__kunmap_atomic);
 
@@ -130,6 +132,7 @@ void *kmap_atomic_pfn(unsigned long pfn)
        int idx, type;
        struct page *page = pfn_to_page(pfn);
 
+       preempt_disable();
        pagefault_disable();
        if (!PageHighMem(page))
                return page_address(page);
index 4e6ef89..7186382 100644 (file)
@@ -1112,22 +1112,22 @@ void __init sanity_check_meminfo(void)
                        }
 
                        /*
-                        * Find the first non-section-aligned page, and point
+                        * Find the first non-pmd-aligned page, and point
                         * memblock_limit at it. This relies on rounding the
-                        * limit down to be section-aligned, which happens at
-                        * the end of this function.
+                        * limit down to be pmd-aligned, which happens at the
+                        * end of this function.
                         *
                         * With this algorithm, the start or end of almost any
-                        * bank can be non-section-aligned. The only exception
-                        * is that the start of the bank 0 must be section-
+                        * bank can be non-pmd-aligned. The only exception is
+                        * that the start of the bank 0 must be section-
                         * aligned, since otherwise memory would need to be
                         * allocated when mapping the start of bank 0, which
                         * occurs before any free memory is mapped.
                         */
                        if (!memblock_limit) {
-                               if (!IS_ALIGNED(block_start, SECTION_SIZE))
+                               if (!IS_ALIGNED(block_start, PMD_SIZE))
                                        memblock_limit = block_start;
-                               else if (!IS_ALIGNED(block_end, SECTION_SIZE))
+                               else if (!IS_ALIGNED(block_end, PMD_SIZE))
                                        memblock_limit = arm_lowmem_limit;
                        }
 
@@ -1137,12 +1137,12 @@ void __init sanity_check_meminfo(void)
        high_memory = __va(arm_lowmem_limit - 1) + 1;
 
        /*
-        * Round the memblock limit down to a section size.  This
+        * Round the memblock limit down to a pmd size.  This
         * helps to ensure that we will allocate memory from the
-        * last full section, which should be mapped.
+        * last full pmd, which should be mapped.
         */
        if (memblock_limit)
-               memblock_limit = round_down(memblock_limit, SECTION_SIZE);
+               memblock_limit = round_down(memblock_limit, PMD_SIZE);
        if (!memblock_limit)
                memblock_limit = arm_lowmem_limit;
 
index aa0519e..774ef13 100644 (file)
@@ -22,8 +22,6 @@
  *
  * These are the low level assembler for performing cache and TLB
  * functions on the arm1020.
- *
- *  CONFIG_CPU_ARM1020_CPU_IDLE -> nohlt
  */
 #include <linux/linkage.h>
 #include <linux/init.h>
index bff4c7f..ae3c27b 100644 (file)
@@ -22,8 +22,6 @@
  *
  * These are the low level assembler for performing cache and TLB
  * functions on the arm1020e.
- *
- *  CONFIG_CPU_ARM1020_CPU_IDLE -> nohlt
  */
 #include <linux/linkage.h>
 #include <linux/init.h>
index ede8c54..32a47cc 100644 (file)
@@ -441,9 +441,6 @@ ENTRY(cpu_arm925_set_pte_ext)
        .type   __arm925_setup, #function
 __arm925_setup:
        mov     r0, #0
-#if defined(CONFIG_CPU_ICACHE_STREAMING_DISABLE)
-        orr     r0,r0,#1 << 7
-#endif
 
        /* Transparent on, D-cache clean & flush mode. See  NOTE2 above */
         orr     r0,r0,#1 << 1                  @ transparent mode on
index e494d6d..92e08bf 100644 (file)
@@ -602,7 +602,6 @@ __\name\()_proc_info:
                PMD_SECT_AP_WRITE | \
                PMD_SECT_AP_READ
        initfn  __feroceon_setup, __\name\()_proc_info
-       .long __feroceon_setup
        .long   cpu_arch_name
        .long   cpu_elf_name
        .long   HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
index e1268f9..e0e2358 100644 (file)
@@ -54,6 +54,7 @@
 #define SEEN_DATA              (1 << (BPF_MEMWORDS + 3))
 
 #define FLAG_NEED_X_RESET      (1 << 0)
+#define FLAG_IMM_OVERFLOW      (1 << 1)
 
 struct jit_ctx {
        const struct bpf_prog *skf;
@@ -293,6 +294,15 @@ static u16 imm_offset(u32 k, struct jit_ctx *ctx)
        /* PC in ARM mode == address of the instruction + 8 */
        imm = offset - (8 + ctx->idx * 4);
 
+       if (imm & ~0xfff) {
+               /*
+                * literal pool is too far, signal it into flags. we
+                * can only detect it on the second pass unfortunately.
+                */
+               ctx->flags |= FLAG_IMM_OVERFLOW;
+               return 0;
+       }
+
        return imm;
 }
 
@@ -449,10 +459,21 @@ static inline void emit_udiv(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx)
                return;
        }
 #endif
-       if (rm != ARM_R0)
-               emit(ARM_MOV_R(ARM_R0, rm), ctx);
+
+       /*
+        * For BPF_ALU | BPF_DIV | BPF_K instructions, rm is ARM_R4
+        * (r_A) and rn is ARM_R0 (r_scratch) so load rn first into
+        * ARM_R1 to avoid accidentally overwriting ARM_R0 with rm
+        * before using it as a source for ARM_R1.
+        *
+        * For BPF_ALU | BPF_DIV | BPF_X rm is ARM_R4 (r_A) and rn is
+        * ARM_R5 (r_X) so there is no particular register overlap
+        * issues.
+        */
        if (rn != ARM_R1)
                emit(ARM_MOV_R(ARM_R1, rn), ctx);
+       if (rm != ARM_R0)
+               emit(ARM_MOV_R(ARM_R0, rm), ctx);
 
        ctx->seen |= SEEN_CALL;
        emit_mov_i(ARM_R3, (u32)jit_udiv, ctx);
@@ -855,6 +876,14 @@ b_epilogue:
                default:
                        return -1;
                }
+
+               if (ctx->flags & FLAG_IMM_OVERFLOW)
+                       /*
+                        * this instruction generated an overflow when
+                        * trying to access the literal pool, so
+                        * delegate this filter to the kernel interpreter.
+                        */
+                       return -1;
        }
 
        /* compute offsets only during the first pass */
@@ -917,7 +946,14 @@ void bpf_jit_compile(struct bpf_prog *fp)
        ctx.idx = 0;
 
        build_prologue(&ctx);
-       build_body(&ctx);
+       if (build_body(&ctx) < 0) {
+#if __LINUX_ARM_ARCH__ < 7
+               if (ctx.imm_count)
+                       kfree(ctx.imms);
+#endif
+               bpf_jit_binary_free(header);
+               goto out;
+       }
        build_epilogue(&ctx);
 
        flush_icache_range((u32)ctx.target, (u32)(ctx.target + ctx.idx));
index 224081c..7d0f070 100644 (file)
@@ -272,6 +272,7 @@ void xen_arch_pre_suspend(void) { }
 void xen_arch_post_suspend(int suspend_cancelled) { }
 void xen_timer_resume(void) { }
 void xen_arch_resume(void) { }
+void xen_arch_suspend(void) { }
 
 
 /* In the hypervisor.S file. */
index 793551d..4983250 100644 (file)
@@ -4,6 +4,7 @@
 #include <linux/gfp.h>
 #include <linux/highmem.h>
 #include <linux/export.h>
+#include <linux/memblock.h>
 #include <linux/of_address.h>
 #include <linux/slab.h>
 #include <linux/types.h>
 #include <asm/xen/hypercall.h>
 #include <asm/xen/interface.h>
 
+unsigned long xen_get_swiotlb_free_pages(unsigned int order)
+{
+       struct memblock_region *reg;
+       gfp_t flags = __GFP_NOWARN;
+
+       for_each_memblock(memory, reg) {
+               if (reg->base < (phys_addr_t)0xffffffff) {
+                       flags |= __GFP_DMA;
+                       break;
+               }
+       }
+       return __get_free_pages(flags, order);
+}
+
 enum dma_cache_op {
        DMA_UNMAP,
        DMA_MAP,
index c138b95..351c95b 100644 (file)
                        clock-output-names = "juno_mb:clk25mhz";
                };
 
+               v2m_refclk1mhz: refclk1mhz {
+                       compatible = "fixed-clock";
+                       #clock-cells = <0>;
+                       clock-frequency = <1000000>;
+                       clock-output-names = "juno_mb:refclk1mhz";
+               };
+
+               v2m_refclk32khz: refclk32khz {
+                       compatible = "fixed-clock";
+                       #clock-cells = <0>;
+                       clock-frequency = <32768>;
+                       clock-output-names = "juno_mb:refclk32khz";
+               };
+
                motherboard {
                        compatible = "arm,vexpress,v2p-p1", "simple-bus";
                        #address-cells = <2>;  /* SMB chipselect number and offset */
                                #size-cells = <1>;
                                ranges = <0 3 0 0x200000>;
 
+                               v2m_sysctl: sysctl@020000 {
+                                       compatible = "arm,sp810", "arm,primecell";
+                                       reg = <0x020000 0x1000>;
+                                       clocks = <&v2m_refclk32khz>, <&v2m_refclk1mhz>, <&mb_clk24mhz>;
+                                       clock-names = "refclk", "timclk", "apb_pclk";
+                                       #clock-cells = <1>;
+                                       clock-output-names = "timerclken0", "timerclken1", "timerclken2", "timerclken3";
+                               };
+
                                mmci@050000 {
                                        compatible = "arm,pl180", "arm,primecell";
                                        reg = <0x050000 0x1000>;
                                        compatible = "arm,sp804", "arm,primecell";
                                        reg = <0x110000 0x10000>;
                                        interrupts = <9>;
-                                       clocks = <&mb_clk24mhz>, <&soc_smc50mhz>;
-                                       clock-names = "timclken1", "apb_pclk";
+                                       clocks = <&v2m_sysctl 0>, <&v2m_sysctl 1>, <&mb_clk24mhz>;
+                                       clock-names = "timclken1", "timclken2", "apb_pclk";
                                };
 
                                v2m_timer23: timer@120000 {
                                        compatible = "arm,sp804", "arm,primecell";
                                        reg = <0x120000 0x10000>;
                                        interrupts = <9>;
-                                       clocks = <&mb_clk24mhz>, <&soc_smc50mhz>;
-                                       clock-names = "timclken1", "apb_pclk";
+                                       clocks = <&v2m_sysctl 2>, <&v2m_sysctl 3>, <&mb_clk24mhz>;
+                                       clock-names = "timclken1", "timclken2", "apb_pclk";
                                };
 
                                rtc@170000 {
index 43d5401..d0ab012 100644 (file)
@@ -16,7 +16,8 @@
 #include "mt8173.dtsi"
 
 / {
-       model = "mediatek,mt8173-evb";
+       model = "MediaTek MT8173 evaluation board";
+       compatible = "mediatek,mt8173-evb", "mediatek,mt8173";
 
        aliases {
                serial0 = &uart0;
index 9499199..6a37c3c 100644 (file)
@@ -147,13 +147,21 @@ static int chksum_final(struct shash_desc *desc, u8 *out)
 {
        struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
 
+       put_unaligned_le32(ctx->crc, out);
+       return 0;
+}
+
+static int chksumc_final(struct shash_desc *desc, u8 *out)
+{
+       struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
        put_unaligned_le32(~ctx->crc, out);
        return 0;
 }
 
 static int __chksum_finup(u32 crc, const u8 *data, unsigned int len, u8 *out)
 {
-       put_unaligned_le32(~crc32_arm64_le_hw(crc, data, len), out);
+       put_unaligned_le32(crc32_arm64_le_hw(crc, data, len), out);
        return 0;
 }
 
@@ -199,6 +207,14 @@ static int crc32_cra_init(struct crypto_tfm *tfm)
 {
        struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
 
+       mctx->key = 0;
+       return 0;
+}
+
+static int crc32c_cra_init(struct crypto_tfm *tfm)
+{
+       struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
+
        mctx->key = ~0;
        return 0;
 }
@@ -229,7 +245,7 @@ static struct shash_alg crc32c_alg = {
        .setkey                 =       chksum_setkey,
        .init                   =       chksum_init,
        .update                 =       chksumc_update,
-       .final                  =       chksum_final,
+       .final                  =       chksumc_final,
        .finup                  =       chksumc_finup,
        .digest                 =       chksumc_digest,
        .descsize               =       sizeof(struct chksum_desc_ctx),
@@ -241,7 +257,7 @@ static struct shash_alg crc32c_alg = {
                .cra_alignmask          =       0,
                .cra_ctxsize            =       sizeof(struct chksum_ctx),
                .cra_module             =       THIS_MODULE,
-               .cra_init               =       crc32_cra_init,
+               .cra_init               =       crc32c_cra_init,
        }
 };
 
index 114e7cc..aefda98 100644 (file)
@@ -74,6 +74,9 @@ static int sha1_ce_finup(struct shash_desc *desc, const u8 *data,
 
 static int sha1_ce_final(struct shash_desc *desc, u8 *out)
 {
+       struct sha1_ce_state *sctx = shash_desc_ctx(desc);
+
+       sctx->finalize = 0;
        kernel_neon_begin_partial(16);
        sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform);
        kernel_neon_end();
index 1340e44..7cd5875 100644 (file)
@@ -75,6 +75,9 @@ static int sha256_ce_finup(struct shash_desc *desc, const u8 *data,
 
 static int sha256_ce_final(struct shash_desc *desc, u8 *out)
 {
+       struct sha256_ce_state *sctx = shash_desc_ctx(desc);
+
+       sctx->finalize = 0;
        kernel_neon_begin_partial(28);
        sha256_base_do_finalize(desc, (sha256_block_fn *)sha2_ce_transform);
        kernel_neon_end();
index 71f19c4..0fa47c4 100644 (file)
@@ -114,7 +114,7 @@ do {                                                                        \
 #define read_barrier_depends()         do { } while(0)
 #define smp_read_barrier_depends()     do { } while(0)
 
-#define set_mb(var, value)     do { var = value; smp_mb(); } while (0)
+#define smp_store_mb(var, value)       do { WRITE_ONCE(var, value); smp_mb(); } while (0)
 #define nop()          asm volatile("nop");
 
 #define smp_mb__before_atomic()        smp_mb()
index 5f750dc..74069b3 100644 (file)
@@ -58,7 +58,7 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
        if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
                return -EFAULT;
 
-       pagefault_disable();    /* implies preempt_disable() */
+       pagefault_disable();
 
        switch (op) {
        case FUTEX_OP_SET:
@@ -85,7 +85,7 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
                ret = -ENOSYS;
        }
 
-       pagefault_enable();     /* subsumes preempt_enable() */
+       pagefault_enable();
 
        if (!ret) {
                switch (cmp) {
index 7ebcd31..225ec35 100644 (file)
@@ -18,7 +18,7 @@ extern struct cpu_topology cpu_topology[NR_CPUS];
 #define topology_physical_package_id(cpu)      (cpu_topology[cpu].cluster_id)
 #define topology_core_id(cpu)          (cpu_topology[cpu].core_id)
 #define topology_core_cpumask(cpu)     (&cpu_topology[cpu].core_sibling)
-#define topology_thread_cpumask(cpu)   (&cpu_topology[cpu].thread_sibling)
+#define topology_sibling_cpumask(cpu)  (&cpu_topology[cpu].thread_sibling)
 
 void init_cpu_topology(void);
 void store_cpu_topology(unsigned int cpuid);
index 21033bb..28f8365 100644 (file)
@@ -24,7 +24,6 @@
 #include <asm/cacheflush.h>
 #include <asm/alternative.h>
 #include <asm/cpufeature.h>
-#include <asm/insn.h>
 #include <linux/stop_machine.h>
 
 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
@@ -34,48 +33,6 @@ struct alt_region {
        struct alt_instr *end;
 };
 
-/*
- * Decode the imm field of a b/bl instruction, and return the byte
- * offset as a signed value (so it can be used when computing a new
- * branch target).
- */
-static s32 get_branch_offset(u32 insn)
-{
-       s32 imm = aarch64_insn_decode_immediate(AARCH64_INSN_IMM_26, insn);
-
-       /* sign-extend the immediate before turning it into a byte offset */
-       return (imm << 6) >> 4;
-}
-
-static u32 get_alt_insn(u8 *insnptr, u8 *altinsnptr)
-{
-       u32 insn;
-
-       aarch64_insn_read(altinsnptr, &insn);
-
-       /* Stop the world on instructions we don't support... */
-       BUG_ON(aarch64_insn_is_cbz(insn));
-       BUG_ON(aarch64_insn_is_cbnz(insn));
-       BUG_ON(aarch64_insn_is_bcond(insn));
-       /* ... and there is probably more. */
-
-       if (aarch64_insn_is_b(insn) || aarch64_insn_is_bl(insn)) {
-               enum aarch64_insn_branch_type type;
-               unsigned long target;
-
-               if (aarch64_insn_is_b(insn))
-                       type = AARCH64_INSN_BRANCH_NOLINK;
-               else
-                       type = AARCH64_INSN_BRANCH_LINK;
-
-               target = (unsigned long)altinsnptr + get_branch_offset(insn);
-               insn = aarch64_insn_gen_branch_imm((unsigned long)insnptr,
-                                                  target, type);
-       }
-
-       return insn;
-}
-
 static int __apply_alternatives(void *alt_region)
 {
        struct alt_instr *alt;
@@ -83,9 +40,6 @@ static int __apply_alternatives(void *alt_region)
        u8 *origptr, *replptr;
 
        for (alt = region->begin; alt < region->end; alt++) {
-               u32 insn;
-               int i;
-
                if (!cpus_have_cap(alt->cpufeature))
                        continue;
 
@@ -95,12 +49,7 @@ static int __apply_alternatives(void *alt_region)
 
                origptr = (u8 *)&alt->orig_offset + alt->orig_offset;
                replptr = (u8 *)&alt->alt_offset + alt->alt_offset;
-
-               for (i = 0; i < alt->alt_len; i += sizeof(insn)) {
-                       insn = get_alt_insn(origptr + i, replptr + i);
-                       aarch64_insn_write(origptr + i, insn);
-               }
-
+               memcpy(origptr, replptr, alt->alt_len);
                flush_icache_range((uintptr_t)origptr,
                                   (uintptr_t)(origptr + alt->alt_len));
        }
index 23f25ac..cce18c8 100644 (file)
@@ -1315,15 +1315,15 @@ static int armpmu_device_probe(struct platform_device *pdev)
        if (!cpu_pmu)
                return -ENODEV;
 
-       irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
-       if (!irqs)
-               return -ENOMEM;
-
        /* Don't bother with PPIs; they're already affine */
        irq = platform_get_irq(pdev, 0);
        if (irq >= 0 && irq_is_percpu(irq))
                return 0;
 
+       irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
+       if (!irqs)
+               return -ENOMEM;
+
        for (i = 0; i < pdev->num_resources; ++i) {
                struct device_node *dn;
                int cpu;
index 74c2567..f3d6221 100644 (file)
@@ -328,10 +328,12 @@ static int ptdump_init(void)
                        for (j = 0; j < pg_level[i].num; j++)
                                pg_level[i].mask |= pg_level[i].bits[j].mask;
 
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
        address_markers[VMEMMAP_START_NR].start_address =
                                (unsigned long)virt_to_page(PAGE_OFFSET);
        address_markers[VMEMMAP_END_NR].start_address =
                                (unsigned long)virt_to_page(high_memory);
+#endif
 
        pe = debugfs_create_file("kernel_page_tables", 0400, NULL, NULL,
                                 &ptdump_fops);
index 96da131..0948d32 100644 (file)
@@ -211,7 +211,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
         * If we're in an interrupt or have no user context, we must not take
         * the fault.
         */
-       if (in_atomic() || !mm)
+       if (faulthandler_disabled() || !mm)
                goto no_context;
 
        if (user_mode(regs))
index edba042..dc6a484 100644 (file)
@@ -487,7 +487,7 @@ emit_cond_jmp:
                        return -EINVAL;
                }
 
-               imm64 = (u64)insn1.imm << 32 | imm;
+               imm64 = (u64)insn1.imm << 32 | (u32)imm;
                emit_a64_mov_i64(dst, imm64, ctx);
 
                return 1;
index 962a6ae..366bbea 100644 (file)
@@ -70,8 +70,6 @@ extern unsigned long __cmpxchg_u64_unsupported_on_32bit_kernels(
    if something tries to do an invalid cmpxchg().  */
 extern void __cmpxchg_called_with_bad_pointer(void);
 
-#define __HAVE_ARCH_CMPXCHG 1
-
 static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
                                      unsigned long new, int size)
 {
index a46f7cf..68cf638 100644 (file)
@@ -97,7 +97,8 @@ static inline __kernel_size_t __copy_from_user(void *to,
  * @x:   Value to copy to user space.
  * @ptr: Destination address, in user space.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple value from kernel space to user
  * space.  It supports simple types like char and int, but not larger
@@ -116,7 +117,8 @@ static inline __kernel_size_t __copy_from_user(void *to,
  * @x:   Variable to store result.
  * @ptr: Source address, in user space.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple variable from user space to kernel
  * space.  It supports simple types like char and int, but not larger
@@ -136,7 +138,8 @@ static inline __kernel_size_t __copy_from_user(void *to,
  * @x:   Value to copy to user space.
  * @ptr: Destination address, in user space.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple value from kernel space to user
  * space.  It supports simple types like char and int, but not larger
@@ -158,7 +161,8 @@ static inline __kernel_size_t __copy_from_user(void *to,
  * @x:   Variable to store result.
  * @ptr: Source address, in user space.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple variable from user space to kernel
  * space.  It supports simple types like char and int, but not larger
index d223a8b..c035339 100644 (file)
 #include <linux/pagemap.h>
 #include <linux/kdebug.h>
 #include <linux/kprobes.h>
+#include <linux/uaccess.h>
 
 #include <asm/mmu_context.h>
 #include <asm/sysreg.h>
 #include <asm/tlb.h>
-#include <asm/uaccess.h>
 
 #ifdef CONFIG_KPROBES
 static inline int notify_page_fault(struct pt_regs *regs, int trap)
@@ -81,7 +81,7 @@ asmlinkage void do_page_fault(unsigned long ecr, struct pt_regs *regs)
         * If we're in an interrupt or have no user context, we must
         * not take the fault...
         */
-       if (in_atomic() || !mm || regs->sr & SYSREG_BIT(GM))
+       if (faulthandler_disabled() || !mm || regs->sr & SYSREG_BIT(GM))
                goto no_context;
 
        local_irq_enable();
index 4e8ad05..6abebe8 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/compiler.h>
 #include <linux/types.h>
 #include <asm/byteorder.h>
+#include <asm/def_LPBlackfin.h>
 
 #define __raw_readb bfin_read8
 #define __raw_readw bfin_read16
index 83f12f2..3066d40 100644 (file)
@@ -8,7 +8,7 @@
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/wait.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <arch/system.h>
 
 extern int find_fixup_code(struct pt_regs *);
@@ -109,11 +109,11 @@ do_page_fault(unsigned long address, struct pt_regs *regs,
        info.si_code = SEGV_MAPERR;
 
        /*
-        * If we're in an interrupt or "atomic" operation or have no
+        * If we're in an interrupt, have pagefaults disabled or have no
         * user context, we must not take the fault.
         */
 
-       if (in_atomic() || !mm)
+       if (faulthandler_disabled() || !mm)
                goto no_context;
 
        if (user_mode(regs))
index ec4917d..61d9976 100644 (file)
@@ -19,9 +19,9 @@
 #include <linux/kernel.h>
 #include <linux/ptrace.h>
 #include <linux/hardirq.h>
+#include <linux/uaccess.h>
 
 #include <asm/pgtable.h>
-#include <asm/uaccess.h>
 #include <asm/gdb-stub.h>
 
 /*****************************************************************************/
@@ -78,7 +78,7 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear
         * If we're in an interrupt or have no user
         * context, we must not take the fault..
         */
-       if (in_atomic() || !mm)
+       if (faulthandler_disabled() || !mm)
                goto no_context;
 
        if (user_mode(__frame))
index bed9a9b..785344b 100644 (file)
@@ -42,6 +42,7 @@ void *kmap_atomic(struct page *page)
        unsigned long paddr;
        int type;
 
+       preempt_disable();
        pagefault_disable();
        type = kmap_atomic_idx_push();
        paddr = page_to_phys(page);
@@ -85,5 +86,6 @@ void __kunmap_atomic(void *kvaddr)
        }
        kmap_atomic_idx_pop();
        pagefault_enable();
+       preempt_enable();
 }
 EXPORT_SYMBOL(__kunmap_atomic);
index 9e78029..a6e34e2 100644 (file)
@@ -64,7 +64,6 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr,
  *  looks just like atomic_cmpxchg on our arch currently with a bunch of
  *  variable casting.
  */
-#define __HAVE_ARCH_CMPXCHG 1
 
 #define cmpxchg(ptr, old, new)                                 \
 ({                                                             \
index e4127e4..f000a38 100644 (file)
@@ -36,7 +36,8 @@
  * @addr: User space pointer to start of block to check
  * @size: Size of block to check
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Checks if a pointer to a block of memory in user space is valid.
  *
index f6769eb..843ba43 100644 (file)
@@ -77,12 +77,7 @@ do {                                                                 \
        ___p1;                                                          \
 })
 
-/*
- * XXX check on this ---I suspect what Linus really wants here is
- * acquire vs release semantics but we can't discuss this stuff with
- * Linus just yet.  Grrr...
- */
-#define set_mb(var, value)     do { (var) = (value); mb(); } while (0)
+#define smp_store_mb(var, value)       do { WRITE_ONCE(var, value); mb(); } while (0)
 
 /*
  * The group barrier in front of the rsm & ssm are necessary to ensure
index 6437ca2..3ad8f69 100644 (file)
@@ -53,7 +53,7 @@ void build_cpu_to_node_map(void);
 #define topology_physical_package_id(cpu)      (cpu_data(cpu)->socket_id)
 #define topology_core_id(cpu)                  (cpu_data(cpu)->core_id)
 #define topology_core_cpumask(cpu)             (&cpu_core_map[cpu])
-#define topology_thread_cpumask(cpu)           (&per_cpu(cpu_sibling_map, cpu))
+#define topology_sibling_cpumask(cpu)          (&per_cpu(cpu_sibling_map, cpu))
 #endif
 
 extern void arch_fix_phys_package_id(int num, u32 slot);
index f35109b..a0e3620 100644 (file)
@@ -61,8 +61,6 @@ extern void ia64_xchg_called_with_bad_pointer(void);
  * indicated by comparing RETURN with OLD.
  */
 
-#define __HAVE_ARCH_CMPXCHG 1
-
 /*
  * This function doesn't exist, so you'll get a linker error
  * if something tries to do an invalid cmpxchg().
index 15051e9..b054c5c 100644 (file)
@@ -127,7 +127,7 @@ int smp_num_siblings = 1;
 volatile int ia64_cpu_to_sapicid[NR_CPUS];
 EXPORT_SYMBOL(ia64_cpu_to_sapicid);
 
-static volatile cpumask_t cpu_callin_map;
+static cpumask_t cpu_callin_map;
 
 struct smp_boot_data smp_boot_data __initdata;
 
@@ -477,6 +477,7 @@ do_boot_cpu (int sapicid, int cpu, struct task_struct *idle)
        for (timeout = 0; timeout < 100000; timeout++) {
                if (cpumask_test_cpu(cpu, &cpu_callin_map))
                        break;  /* It has booted */
+               barrier(); /* Make sure we re-read cpu_callin_map */
                udelay(100);
        }
        Dprintk("\n");
index ba5ba7a..70b40d1 100644 (file)
 #include <linux/kprobes.h>
 #include <linux/kdebug.h>
 #include <linux/prefetch.h>
+#include <linux/uaccess.h>
 
 #include <asm/pgtable.h>
 #include <asm/processor.h>
-#include <asm/uaccess.h>
 
 extern int die(char *, struct pt_regs *, long);
 
@@ -96,7 +96,7 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
        /*
         * If we're in an interrupt or have no user context, we must not take the fault..
         */
-       if (in_atomic() || !mm)
+       if (faulthandler_disabled() || !mm)
                goto no_context;
 
 #ifdef CONFIG_VIRTUAL_MEM_MAP
index d4e162d..7cc3be9 100644 (file)
@@ -478,9 +478,16 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
 
 int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
 {
-       struct pci_controller *controller = bridge->bus->sysdata;
-
-       ACPI_COMPANION_SET(&bridge->dev, controller->companion);
+       /*
+        * We pass NULL as parent to pci_create_root_bus(), so if it is not NULL
+        * here, pci_create_root_bus() has been called by someone else and
+        * sysdata is likely to be different from what we expect.  Let it go in
+        * that case.
+        */
+       if (!bridge->dev.parent) {
+               struct pci_controller *controller = bridge->bus->sysdata;
+               ACPI_COMPANION_SET(&bridge->dev, controller->companion);
+       }
        return 0;
 }
 
index de651db..14bf9b7 100644 (file)
@@ -107,8 +107,6 @@ __xchg_local(unsigned long x, volatile void *ptr, int size)
        ((__typeof__(*(ptr)))__xchg_local((unsigned long)(x), (ptr),    \
                        sizeof(*(ptr))))
 
-#define __HAVE_ARCH_CMPXCHG    1
-
 static inline unsigned long
 __cmpxchg_u32(volatile unsigned int *p, unsigned int old, unsigned int new)
 {
index 71adff2..cac7014 100644 (file)
@@ -91,7 +91,8 @@ static inline void set_fs(mm_segment_t s)
  * @addr: User space pointer to start of block to check
  * @size: Size of block to check
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Checks if a pointer to a block of memory in user space is valid.
  *
@@ -155,7 +156,8 @@ extern int fixup_exception(struct pt_regs *regs);
  * @x:   Variable to store result.
  * @ptr: Source address, in user space.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple variable from user space to kernel
  * space.  It supports simple types like char and int, but not larger
@@ -175,7 +177,8 @@ extern int fixup_exception(struct pt_regs *regs);
  * @x:   Value to copy to user space.
  * @ptr: Destination address, in user space.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple value from kernel space to user
  * space.  It supports simple types like char and int, but not larger
@@ -194,7 +197,8 @@ extern int fixup_exception(struct pt_regs *regs);
  * @x:   Variable to store result.
  * @ptr: Source address, in user space.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple variable from user space to kernel
  * space.  It supports simple types like char and int, but not larger
@@ -274,7 +278,8 @@ do {                                                                        \
  * @x:   Value to copy to user space.
  * @ptr: Destination address, in user space.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple value from kernel space to user
  * space.  It supports simple types like char and int, but not larger
@@ -568,7 +573,8 @@ unsigned long __generic_copy_from_user(void *, const void __user *, unsigned lon
  * @from: Source address, in kernel space.
  * @n:    Number of bytes to copy.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Copy data from kernel space to user space.  Caller must check
  * the specified block with access_ok() before calling this function.
@@ -588,7 +594,8 @@ unsigned long __generic_copy_from_user(void *, const void __user *, unsigned lon
  * @from: Source address, in kernel space.
  * @n:    Number of bytes to copy.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Copy data from kernel space to user space.
  *
@@ -606,7 +613,8 @@ unsigned long __generic_copy_from_user(void *, const void __user *, unsigned lon
  * @from: Source address, in user space.
  * @n:    Number of bytes to copy.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Copy data from user space to kernel space.  Caller must check
  * the specified block with access_ok() before calling this function.
@@ -626,7 +634,8 @@ unsigned long __generic_copy_from_user(void *, const void __user *, unsigned lon
  * @from: Source address, in user space.
  * @n:    Number of bytes to copy.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Copy data from user space to kernel space.
  *
@@ -677,7 +686,8 @@ unsigned long clear_user(void __user *mem, unsigned long len);
  * strlen_user: - Get the size of a string in user space.
  * @str: The string to measure.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Get the size of a NUL-terminated string in user space.
  *
index ce7aea3..c18ddc7 100644 (file)
@@ -45,7 +45,7 @@ static volatile unsigned long flushcache_cpumask = 0;
 /*
  * For flush_tlb_others()
  */
-static volatile cpumask_t flush_cpumask;
+static cpumask_t flush_cpumask;
 static struct mm_struct *flush_mm;
 static struct vm_area_struct *flush_vma;
 static volatile unsigned long flush_va;
@@ -415,7 +415,7 @@ static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
         */
        send_IPI_mask(&cpumask, INVALIDATE_TLB_IPI, 0);
 
-       while (!cpumask_empty((cpumask_t*)&flush_cpumask)) {
+       while (!cpumask_empty(&flush_cpumask)) {
                /* nothing. lockup detection does not belong here */
                mb();
        }
@@ -468,7 +468,7 @@ void smp_invalidate_interrupt(void)
                        __flush_tlb_page(va);
                }
        }
-       cpumask_clear_cpu(cpu_id, (cpumask_t*)&flush_cpumask);
+       cpumask_clear_cpu(cpu_id, &flush_cpumask);
 }
 
 /*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*/
index e3d4d48..8f9875b 100644 (file)
@@ -24,9 +24,9 @@
 #include <linux/vt_kern.h>             /* For unblank_screen() */
 #include <linux/highmem.h>
 #include <linux/module.h>
+#include <linux/uaccess.h>
 
 #include <asm/m32r.h>
-#include <asm/uaccess.h>
 #include <asm/hardirq.h>
 #include <asm/mmu_context.h>
 #include <asm/tlbflush.h>
@@ -111,10 +111,10 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code,
        mm = tsk->mm;
 
        /*
-        * If we're in an interrupt or have no user context or are running in an
-        * atomic region then we must not take the fault..
+        * If we're in an interrupt or have no user context or have pagefaults
+        * disabled then we must not take the fault.
         */
-       if (in_atomic() || !mm)
+       if (faulthandler_disabled() || !mm)
                goto bad_area_nosemaphore;
 
        if (error_code & ACE_USERMODE)
index bc755bc..83b1df8 100644 (file)
@@ -90,7 +90,6 @@ extern unsigned long __invalid_cmpxchg_size(volatile void *,
  * indicated by comparing RETURN with OLD.
  */
 #ifdef CONFIG_RMW_INSNS
-#define __HAVE_ARCH_CMPXCHG    1
 
 static inline unsigned long __cmpxchg(volatile void *p, unsigned long old,
                                      unsigned long new, int size)
index a823cd7..b594181 100644 (file)
@@ -2,9 +2,6 @@
 #define _M68K_IRQFLAGS_H
 
 #include <linux/types.h>
-#ifdef CONFIG_MMU
-#include <linux/preempt_mask.h>
-#endif
 #include <linux/preempt.h>
 #include <asm/thread_info.h>
 #include <asm/entry.h>
index b2f04ae..6a94cdd 100644 (file)
 #include <linux/ptrace.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
+#include <linux/uaccess.h>
 
 #include <asm/setup.h>
 #include <asm/traps.h>
-#include <asm/uaccess.h>
 #include <asm/pgalloc.h>
 
 extern void die_if_kernel(char *, struct pt_regs *, long);
@@ -81,7 +81,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
         * If we're in an interrupt or have no user
         * context, we must not take the fault..
         */
-       if (in_atomic() || !mm)
+       if (faulthandler_disabled() || !mm)
                goto no_context;
 
        if (user_mode(regs))
index d703d8e..5a696e5 100644 (file)
@@ -84,7 +84,7 @@ static inline void fence(void)
 #define read_barrier_depends()         do { } while (0)
 #define smp_read_barrier_depends()     do { } while (0)
 
-#define set_mb(var, value) do { var = value; smp_mb(); } while (0)
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
 
 #define smp_store_release(p, v)                                                \
 do {                                                                   \
index b1bc1be..be29e3e 100644 (file)
@@ -51,8 +51,6 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
        return old;
 }
 
-#define __HAVE_ARCH_CMPXCHG 1
-
 #define cmpxchg(ptr, o, n)                                             \
        ({                                                              \
                __typeof__(*(ptr)) _o_ = (o);                           \
index 2de5dc6..f57edca 100644 (file)
@@ -105,7 +105,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
 
        mm = tsk->mm;
 
-       if (in_atomic() || !mm)
+       if (faulthandler_disabled() || !mm)
                goto no_context;
 
        if (user_mode(regs))
index d71f621..807f1b1 100644 (file)
@@ -43,7 +43,7 @@ void *kmap_atomic(struct page *page)
        unsigned long vaddr;
        int type;
 
-       /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
+       preempt_disable();
        pagefault_disable();
        if (!PageHighMem(page))
                return page_address(page);
@@ -82,6 +82,7 @@ void __kunmap_atomic(void *kvaddr)
        }
 
        pagefault_enable();
+       preempt_enable();
 }
 EXPORT_SYMBOL(__kunmap_atomic);
 
@@ -95,6 +96,7 @@ void *kmap_atomic_pfn(unsigned long pfn)
        unsigned long vaddr;
        int type;
 
+       preempt_disable();
        pagefault_disable();
 
        type = kmap_atomic_idx_push();
index 62942fd..331b0d3 100644 (file)
@@ -178,7 +178,8 @@ extern long __user_bad(void);
  * @x:   Variable to store result.
  * @ptr: Source address, in user space.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple variable from user space to kernel
  * space.  It supports simple types like char and int, but not larger
@@ -290,7 +291,8 @@ extern long __user_bad(void);
  * @x:   Value to copy to user space.
  * @ptr: Destination address, in user space.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple value from kernel space to user
  * space.  It supports simple types like char and int, but not larger
index d46a5eb..177dfc0 100644 (file)
@@ -107,14 +107,14 @@ void do_page_fault(struct pt_regs *regs, unsigned long address,
        if ((error_code & 0x13) == 0x13 || (error_code & 0x11) == 0x11)
                is_write = 0;
 
-       if (unlikely(in_atomic() || !mm)) {
+       if (unlikely(faulthandler_disabled() || !mm)) {
                if (kernel_mode(regs))
                        goto bad_area_nosemaphore;
 
-               /* in_atomic() in user mode is really bad,
+               /* faulthandler_disabled() in user mode is really bad,
                   as is current->mm == NULL. */
-               pr_emerg("Page fault in user mode with in_atomic(), mm = %p\n",
-                                                                       mm);
+               pr_emerg("Page fault in user mode with faulthandler_disabled(), mm = %p\n",
+                        mm);
                pr_emerg("r15 = %lx  MSR = %lx\n",
                       regs->r15, regs->msr);
                die("Weird page fault", regs, SIGSEGV);
index 5a92576..2fcc5a5 100644 (file)
@@ -37,7 +37,7 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot)
        unsigned long vaddr;
        int idx, type;
 
-       /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
+       preempt_disable();
        pagefault_disable();
        if (!PageHighMem(page))
                return page_address(page);
@@ -63,6 +63,7 @@ void __kunmap_atomic(void *kvaddr)
 
        if (vaddr < __fix_to_virt(FIX_KMAP_END)) {
                pagefault_enable();
+               preempt_enable();
                return;
        }
 
@@ -84,5 +85,6 @@ void __kunmap_atomic(void *kvaddr)
 #endif
        kmap_atomic_idx_pop();
        pagefault_enable();
+       preempt_enable();
 }
 EXPORT_SYMBOL(__kunmap_atomic);
index 5200f64..ae2dd59 100644 (file)
@@ -277,7 +277,7 @@ LDFLAGS                     += -m $(ld-emul)
 ifdef CONFIG_MIPS
 CHECKFLAGS += $(shell $(CC) $(KBUILD_CFLAGS) -dM -E -x c /dev/null | \
        egrep -vw '__GNUC_(|MINOR_|PATCHLEVEL_)_' | \
-       sed -e "s/^\#define /-D'/" -e "s/ /'='/" -e "s/$$/'/")
+       sed -e "s/^\#define /-D'/" -e "s/ /'='/" -e "s/$$/'/" -e 's/\$$/&&/g')
 ifdef CONFIG_64BIT
 CHECKFLAGS             += -m64
 endif
index e1fe630..597899a 100644 (file)
@@ -1,6 +1,7 @@
 /*
  *  Atheros AR71XX/AR724X/AR913X specific prom routines
  *
+ *  Copyright (C) 2015 Laurent Fasnacht <l@libres.ch>
  *  Copyright (C) 2008-2010 Gabor Juhos <juhosg@openwrt.org>
  *  Copyright (C) 2008 Imre Kaloz <kaloz@openwrt.org>
  *
@@ -25,12 +26,14 @@ void __init prom_init(void)
 {
        fw_init_cmdline();
 
+#ifdef CONFIG_BLK_DEV_INITRD
        /* Read the initrd address from the firmware environment */
        initrd_start = fw_getenvl("initrd_start");
        if (initrd_start) {
                initrd_start = KSEG0ADDR(initrd_start);
                initrd_end = initrd_start + fw_getenvl("initrd_size");
        }
+#endif
 }
 
 void __init prom_free_prom_memory(void)
index a73c93c..7fc8397 100644 (file)
@@ -225,7 +225,7 @@ void __init plat_time_init(void)
        ddr_clk_rate = ath79_get_sys_clk_rate("ddr");
        ref_clk_rate = ath79_get_sys_clk_rate("ref");
 
-       pr_info("Clocks: CPU:%lu.%03luMHz, DDR:%lu.%03luMHz, AHB:%lu.%03luMHz, Ref:%lu.%03luMHz",
+       pr_info("Clocks: CPU:%lu.%03luMHz, DDR:%lu.%03luMHz, AHB:%lu.%03luMHz, Ref:%lu.%03luMHz\n",
                cpu_clk_rate / 1000000, (cpu_clk_rate / 1000) % 1000,
                ddr_clk_rate / 1000000, (ddr_clk_rate / 1000) % 1000,
                ahb_clk_rate / 1000000, (ahb_clk_rate / 1000) % 1000,
index 558e949..68f0c58 100644 (file)
@@ -2,7 +2,6 @@
 # Makefile for the Cobalt micro systems family specific parts of the kernel
 #
 
-obj-y := buttons.o irq.o lcd.o led.o reset.o rtc.o serial.o setup.o time.o
+obj-y := buttons.o irq.o lcd.o led.o mtd.o reset.o rtc.o serial.o setup.o time.o
 
 obj-$(CONFIG_PCI)              += pci.o
-obj-$(CONFIG_MTD_PHYSMAP)      += mtd.o
index 0026806..b2a577e 100644 (file)
@@ -194,7 +194,7 @@ CONFIG_USB_WUSB_CBAF=m
 CONFIG_USB_C67X00_HCD=m
 CONFIG_USB_EHCI_HCD=y
 CONFIG_USB_EHCI_ROOT_HUB_TT=y
-CONFIG_USB_ISP1760_HCD=m
+CONFIG_USB_ISP1760=m
 CONFIG_USB_OHCI_HCD=y
 CONFIG_USB_UHCI_HCD=m
 CONFIG_USB_R8A66597_HCD=m
index 2b8bbbc..7ecba84 100644 (file)
 #define __WEAK_LLSC_MB         "               \n"
 #endif
 
-#define set_mb(var, value) \
-       do { var = value; smp_mb(); } while (0)
+#define smp_store_mb(var, value) \
+       do { WRITE_ONCE(var, value); smp_mb(); } while (0)
 
 #define smp_llsc_mb()  __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory")
 
index 412f945..b71ab4a 100644 (file)
@@ -138,8 +138,6 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz
                __xchg((unsigned long)(x), (ptr), sizeof(*(ptr))));     \
 })
 
-#define __HAVE_ARCH_CMPXCHG 1
-
 #define __cmpxchg_asm(ld, st, m, old, new)                             \
 ({                                                                     \
        __typeof(*(m)) __ret;                                           \
index a594d8e..f19e890 100644 (file)
@@ -304,7 +304,7 @@ do {                                                                        \
                                                                        \
        current->thread.abi = &mips_abi;                                \
                                                                        \
-       current->thread.fpu.fcr31 = current_cpu_data.fpu_csr31;         \
+       current->thread.fpu.fcr31 = boot_cpu_data.fpu_csr31;            \
 } while (0)
 
 #endif /* CONFIG_32BIT */
@@ -366,7 +366,7 @@ do {                                                                        \
        else                                                            \
                current->thread.abi = &mips_abi;                        \
                                                                        \
-       current->thread.fpu.fcr31 = current_cpu_data.fpu_csr31;         \
+       current->thread.fpu.fcr31 = boot_cpu_data.fpu_csr31;            \
                                                                        \
        p = personality(current->personality);                          \
        if (p != PER_LINUX32 && p != PER_LINUX)                         \
index 18ae5dd..c28a849 100644 (file)
 #define _PAGE_PRESENT_SHIFT    0
 #define _PAGE_PRESENT          (1 << _PAGE_PRESENT_SHIFT)
 /* R2 or later cores check for RI/XI support to determine _PAGE_READ */
-#ifdef CONFIG_CPU_MIPSR2
+#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
 #define _PAGE_WRITE_SHIFT      (_PAGE_PRESENT_SHIFT + 1)
 #define _PAGE_WRITE            (1 << _PAGE_WRITE_SHIFT)
 #else
 #define _PAGE_SPLITTING                (1 << _PAGE_SPLITTING_SHIFT)
 
 /* Only R2 or newer cores have the XI bit */
-#ifdef CONFIG_CPU_MIPSR2
+#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
 #define _PAGE_NO_EXEC_SHIFT    (_PAGE_SPLITTING_SHIFT + 1)
 #else
 #define _PAGE_GLOBAL_SHIFT     (_PAGE_SPLITTING_SHIFT + 1)
 #define _PAGE_GLOBAL           (1 << _PAGE_GLOBAL_SHIFT)
-#endif /* CONFIG_CPU_MIPSR2 */
+#endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */
 
 #endif /* CONFIG_64BIT && CONFIG_MIPS_HUGE_TLB_SUPPORT */
 
-#ifdef CONFIG_CPU_MIPSR2
+#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
 /* XI - page cannot be executed */
 #ifndef _PAGE_NO_EXEC_SHIFT
 #define _PAGE_NO_EXEC_SHIFT    (_PAGE_MODIFIED_SHIFT + 1)
 #define _PAGE_GLOBAL_SHIFT     (_PAGE_NO_READ_SHIFT + 1)
 #define _PAGE_GLOBAL           (1 << _PAGE_GLOBAL_SHIFT)
 
-#else  /* !CONFIG_CPU_MIPSR2 */
+#else  /* !CONFIG_CPU_MIPSR2 && !CONFIG_CPU_MIPSR6 */
 #define _PAGE_GLOBAL_SHIFT     (_PAGE_MODIFIED_SHIFT + 1)
 #define _PAGE_GLOBAL           (1 << _PAGE_GLOBAL_SHIFT)
-#endif /* CONFIG_CPU_MIPSR2 */
+#endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */
 
 #define _PAGE_VALID_SHIFT      (_PAGE_GLOBAL_SHIFT + 1)
 #define _PAGE_VALID            (1 << _PAGE_VALID_SHIFT)
  */
 static inline uint64_t pte_to_entrylo(unsigned long pte_val)
 {
-#ifdef CONFIG_CPU_MIPSR2
+#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
        if (cpu_has_rixi) {
                int sa;
 #ifdef CONFIG_32BIT
index bb02fac..2b25d1b 100644 (file)
@@ -45,7 +45,7 @@ extern int __cpu_logical_map[NR_CPUS];
 #define SMP_DUMP               0x8
 #define SMP_ASK_C0COUNT                0x10
 
-extern volatile cpumask_t cpu_callin_map;
+extern cpumask_t cpu_callin_map;
 
 /* Mask of CPUs which are currently definitely operating coherently */
 extern cpumask_t cpu_coherent_mask;
index e92d6c4..7163cd7 100644 (file)
@@ -104,7 +104,6 @@ do {                                                                        \
        if (test_and_clear_tsk_thread_flag(prev, TIF_USEDMSA))          \
                __fpsave = FP_SAVE_VECTOR;                              \
        (last) = resume(prev, next, task_thread_info(next), __fpsave);  \
-       disable_msa();                                                  \
 } while (0)
 
 #define finish_arch_switch(prev)                                       \
@@ -122,6 +121,7 @@ do {                                                                        \
        if (cpu_has_userlocal)                                          \
                write_c0_userlocal(current_thread_info()->tp_value);    \
        __restore_watch();                                              \
+       disable_msa();                                                  \
 } while (0)
 
 #endif /* _ASM_SWITCH_TO_H */
index 3e307ec..7afda41 100644 (file)
@@ -15,7 +15,7 @@
 #define topology_physical_package_id(cpu)      (cpu_data[cpu].package)
 #define topology_core_id(cpu)                  (cpu_data[cpu].core)
 #define topology_core_cpumask(cpu)             (&cpu_core_map[cpu])
-#define topology_thread_cpumask(cpu)           (&cpu_sibling_map[cpu])
+#define topology_sibling_cpumask(cpu)          (&cpu_sibling_map[cpu])
 #endif
 
 #endif /* __ASM_TOPOLOGY_H */
index bf8b324..9722357 100644 (file)
@@ -103,7 +103,8 @@ extern u64 __ua_limit;
  * @addr: User space pointer to start of block to check
  * @size: Size of block to check
  *
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Checks if a pointer to a block of memory in user space is valid.
  *
@@ -138,7 +139,8 @@ extern u64 __ua_limit;
  * @x:  Value to copy to user space.
  * @ptr: Destination address, in user space.
  *
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple value from kernel space to user
  * space.  It supports simple types like char and int, but not larger
@@ -157,7 +159,8 @@ extern u64 __ua_limit;
  * @x:  Variable to store result.
  * @ptr: Source address, in user space.
  *
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple variable from user space to kernel
  * space.  It supports simple types like char and int, but not larger
@@ -177,7 +180,8 @@ extern u64 __ua_limit;
  * @x:  Value to copy to user space.
  * @ptr: Destination address, in user space.
  *
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple value from kernel space to user
  * space.  It supports simple types like char and int, but not larger
@@ -199,7 +203,8 @@ extern u64 __ua_limit;
  * @x:  Variable to store result.
  * @ptr: Source address, in user space.
  *
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple variable from user space to kernel
  * space.  It supports simple types like char and int, but not larger
@@ -498,7 +503,8 @@ extern void __put_user_unknown(void);
  * @x:  Value to copy to user space.
  * @ptr: Destination address, in user space.
  *
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple value from kernel space to user
  * space.  It supports simple types like char and int, but not larger
@@ -517,7 +523,8 @@ extern void __put_user_unknown(void);
  * @x:  Variable to store result.
  * @ptr: Source address, in user space.
  *
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple variable from user space to kernel
  * space.  It supports simple types like char and int, but not larger
@@ -537,7 +544,8 @@ extern void __put_user_unknown(void);
  * @x:  Value to copy to user space.
  * @ptr: Destination address, in user space.
  *
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple value from kernel space to user
  * space.  It supports simple types like char and int, but not larger
@@ -559,7 +567,8 @@ extern void __put_user_unknown(void);
  * @x:  Variable to store result.
  * @ptr: Source address, in user space.
  *
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple variable from user space to kernel
  * space.  It supports simple types like char and int, but not larger
@@ -815,7 +824,8 @@ extern size_t __copy_user(void *__to, const void *__from, size_t __n);
  * @from: Source address, in kernel space.
  * @n:   Number of bytes to copy.
  *
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Copy data from kernel space to user space.  Caller must check
  * the specified block with access_ok() before calling this function.
@@ -888,7 +898,8 @@ extern size_t __copy_user_inatomic(void *__to, const void *__from, size_t __n);
  * @from: Source address, in kernel space.
  * @n:   Number of bytes to copy.
  *
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Copy data from kernel space to user space.
  *
@@ -1075,7 +1086,8 @@ extern size_t __copy_in_user_eva(void *__to, const void *__from, size_t __n);
  * @from: Source address, in user space.
  * @n:   Number of bytes to copy.
  *
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Copy data from user space to kernel space.  Caller must check
  * the specified block with access_ok() before calling this function.
@@ -1107,7 +1119,8 @@ extern size_t __copy_in_user_eva(void *__to, const void *__from, size_t __n);
  * @from: Source address, in user space.
  * @n:   Number of bytes to copy.
  *
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Copy data from user space to kernel space.
  *
@@ -1329,7 +1342,8 @@ strncpy_from_user(char *__to, const char __user *__from, long __len)
  * strlen_user: - Get the size of a string in user space.
  * @str: The string to measure.
  *
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Get the size of a NUL-terminated string in user space.
  *
@@ -1398,7 +1412,8 @@ static inline long __strnlen_user(const char __user *s, long n)
  * strnlen_user: - Get the size of a string in user space.
  * @str: The string to measure.
  *
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Get the size of a NUL-terminated string in user space.
  *
index e36515d..209e5b7 100644 (file)
@@ -74,13 +74,12 @@ static inline void cpu_set_fpu_fcsr_mask(struct cpuinfo_mips *c)
 {
        unsigned long sr, mask, fcsr, fcsr0, fcsr1;
 
+       fcsr = c->fpu_csr31;
        mask = FPU_CSR_ALL_X | FPU_CSR_ALL_E | FPU_CSR_ALL_S | FPU_CSR_RM;
 
        sr = read_c0_status();
        __enable_fpu(FPU_AS_IS);
 
-       fcsr = read_32bit_cp1_register(CP1_STATUS);
-
        fcsr0 = fcsr & mask;
        write_32bit_cp1_register(CP1_STATUS, fcsr0);
        fcsr0 = read_32bit_cp1_register(CP1_STATUS);
index be4899f..4a4d9e0 100644 (file)
@@ -76,14 +76,6 @@ int arch_elf_pt_proc(void *_ehdr, void *_phdr, struct file *elf,
 
        /* Lets see if this is an O32 ELF */
        if (ehdr32->e_ident[EI_CLASS] == ELFCLASS32) {
-               /* FR = 1 for N32 */
-               if (ehdr32->e_flags & EF_MIPS_ABI2)
-                       state->overall_fp_mode = FP_FR1;
-               else
-                       /* Set a good default FPU mode for O32 */
-                       state->overall_fp_mode = cpu_has_mips_r6 ?
-                               FP_FRE : FP_FR0;
-
                if (ehdr32->e_flags & EF_MIPS_FP64) {
                        /*
                         * Set MIPS_ABI_FP_OLD_64 for EF_MIPS_FP64. We will override it
@@ -104,9 +96,6 @@ int arch_elf_pt_proc(void *_ehdr, void *_phdr, struct file *elf,
                                  (char *)&abiflags,
                                  sizeof(abiflags));
        } else {
-               /* FR=1 is really the only option for 64-bit */
-               state->overall_fp_mode = FP_FR1;
-
                if (phdr64->p_type != PT_MIPS_ABIFLAGS)
                        return 0;
                if (phdr64->p_filesz < sizeof(abiflags))
@@ -137,6 +126,7 @@ int arch_check_elf(void *_ehdr, bool has_interpreter,
        struct elf32_hdr *ehdr = _ehdr;
        struct mode_req prog_req, interp_req;
        int fp_abi, interp_fp_abi, abi0, abi1, max_abi;
+       bool is_mips64;
 
        if (!config_enabled(CONFIG_MIPS_O32_FP64_SUPPORT))
                return 0;
@@ -152,10 +142,22 @@ int arch_check_elf(void *_ehdr, bool has_interpreter,
                abi0 = abi1 = fp_abi;
        }
 
-       /* ABI limits. O32 = FP_64A, N32/N64 = FP_SOFT */
-       max_abi = ((ehdr->e_ident[EI_CLASS] == ELFCLASS32) &&
-                  (!(ehdr->e_flags & EF_MIPS_ABI2))) ?
-               MIPS_ABI_FP_64A : MIPS_ABI_FP_SOFT;
+       is_mips64 = (ehdr->e_ident[EI_CLASS] == ELFCLASS64) ||
+                   (ehdr->e_flags & EF_MIPS_ABI2);
+
+       if (is_mips64) {
+               /* MIPS64 code always uses FR=1, thus the default is easy */
+               state->overall_fp_mode = FP_FR1;
+
+               /* Disallow access to the various FPXX & FP64 ABIs */
+               max_abi = MIPS_ABI_FP_SOFT;
+       } else {
+               /* Default to a mode capable of running code expecting FR=0 */
+               state->overall_fp_mode = cpu_has_mips_r6 ? FP_FRE : FP_FR0;
+
+               /* Allow all ABIs we know about */
+               max_abi = MIPS_ABI_FP_64A;
+       }
 
        if ((abi0 > max_abi && abi0 != MIPS_ABI_FP_UNKNOWN) ||
            (abi1 > max_abi && abi1 != MIPS_ABI_FP_UNKNOWN))
index d2bfbc2..3c8a18a 100644 (file)
@@ -29,7 +29,7 @@
 int kgdb_early_setup;
 #endif
 
-static unsigned long irq_map[NR_IRQS / BITS_PER_LONG];
+static DECLARE_BITMAP(irq_map, NR_IRQS);
 
 int allocate_irqno(void)
 {
@@ -109,7 +109,7 @@ void __init init_IRQ(void)
 #endif
 }
 
-#ifdef DEBUG_STACKOVERFLOW
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
 static inline void check_stack_overflow(void)
 {
        unsigned long sp;
index d544e77..e933a30 100644 (file)
@@ -176,7 +176,7 @@ int ptrace_setfpregs(struct task_struct *child, __u32 __user *data)
 
        __get_user(value, data + 64);
        fcr31 = child->thread.fpu.fcr31;
-       mask = current_cpu_data.fpu_msk31;
+       mask = boot_cpu_data.fpu_msk31;
        child->thread.fpu.fcr31 = (value & ~mask) | (fcr31 & mask);
 
        /* FIR may not be written.  */
index 06805e0..0b85f82 100644 (file)
@@ -28,12 +28,7 @@ extern void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs,
 extern int fpcsr_pending(unsigned int __user *fpcsr);
 
 /* Make sure we will not lose FPU ownership */
-#ifdef CONFIG_PREEMPT
-#define lock_fpu_owner()       preempt_disable()
-#define unlock_fpu_owner()     preempt_enable()
-#else
-#define lock_fpu_owner()       pagefault_disable()
-#define unlock_fpu_owner()     pagefault_enable()
-#endif
+#define lock_fpu_owner()       ({ preempt_disable(); pagefault_disable(); })
+#define unlock_fpu_owner()     ({ pagefault_enable(); preempt_enable(); })
 
 #endif /* __SIGNAL_COMMON_H */
index fd528d7..336708a 100644 (file)
@@ -444,7 +444,7 @@ struct plat_smp_ops bmips5000_smp_ops = {
 static void bmips_wr_vec(unsigned long dst, char *start, char *end)
 {
        memcpy((void *)dst, start, end - start);
-       dma_cache_wback((unsigned long)start, end - start);
+       dma_cache_wback(dst, end - start);
        local_flush_icache_range(dst, dst + (end - start));
        instruction_hazard();
 }
index 7e011f9..4251d39 100644 (file)
@@ -92,7 +92,7 @@ static void __init cps_smp_setup(void)
 #ifdef CONFIG_MIPS_MT_FPAFF
        /* If we have an FPU, enroll ourselves in the FPU-full mask */
        if (cpu_has_fpu)
-               cpu_set(0, mt_fpu_cpumask);
+               cpumask_set_cpu(0, &mt_fpu_cpumask);
 #endif /* CONFIG_MIPS_MT_FPAFF */
 }
 
index 193ace7..faa46eb 100644 (file)
@@ -43,7 +43,7 @@
 #include <asm/time.h>
 #include <asm/setup.h>
 
-volatile cpumask_t cpu_callin_map;     /* Bitmask of started secondaries */
+cpumask_t cpu_callin_map;              /* Bitmask of started secondaries */
 
 int __cpu_number_map[NR_CPUS];         /* Map physical to logical */
 EXPORT_SYMBOL(__cpu_number_map);
@@ -218,8 +218,10 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
        /*
         * Trust is futile.  We should really have timeouts ...
         */
-       while (!cpumask_test_cpu(cpu, &cpu_callin_map))
+       while (!cpumask_test_cpu(cpu, &cpu_callin_map)) {
                udelay(100);
+               schedule();
+       }
 
        synchronise_count_master(cpu);
        return 0;
index ba32e48..d2d1c19 100644 (file)
@@ -269,7 +269,6 @@ static void __show_regs(const struct pt_regs *regs)
         */
        printk("epc   : %0*lx %pS\n", field, regs->cp0_epc,
               (void *) regs->cp0_epc);
-       printk("    %s\n", print_tainted());
        printk("ra    : %0*lx %pS\n", field, regs->regs[31],
               (void *) regs->regs[31]);
 
index 6230f37..d5fa3ea 100644 (file)
@@ -2389,7 +2389,6 @@ enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu,
 {
        unsigned long *gpr = &vcpu->arch.gprs[vcpu->arch.io_gpr];
        enum emulation_result er = EMULATE_DONE;
-       unsigned long curr_pc;
 
        if (run->mmio.len > sizeof(*gpr)) {
                kvm_err("Bad MMIO length: %d", run->mmio.len);
@@ -2397,11 +2396,6 @@ enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu,
                goto done;
        }
 
-       /*
-        * Update PC and hold onto current PC in case there is
-        * an error and we want to rollback the PC
-        */
-       curr_pc = vcpu->arch.pc;
        er = update_pc(vcpu, vcpu->arch.pending_load_cause);
        if (er == EMULATE_FAIL)
                return er;
@@ -2415,7 +2409,7 @@ enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu,
                if (vcpu->mmio_needed == 2)
                        *gpr = *(int16_t *) run->mmio.data;
                else
-                       *gpr = *(int16_t *) run->mmio.data;
+                       *gpr = *(uint16_t *)run->mmio.data;
 
                break;
        case 1:
index 7d12c0d..77e6494 100644 (file)
@@ -34,7 +34,12 @@ LEAF(__strnlen_\func\()_asm)
 FEXPORT(__strnlen_\func\()_nocheck_asm)
        move            v0, a0
        PTR_ADDU        a1, a0                  # stop pointer
-1:     beq             v0, a1, 1f              # limit reached?
+1:
+#ifdef CONFIG_CPU_DADDI_WORKAROUNDS
+       .set            noat
+       li              AT, 1
+#endif
+       beq             v0, a1, 1f              # limit reached?
 .ifeqs "\func", "kernel"
        EX(lb, t0, (v0), .Lfault\@)
 .else
@@ -42,7 +47,13 @@ FEXPORT(__strnlen_\func\()_nocheck_asm)
 .endif
        .set            noreorder
        bnez            t0, 1b
-1:      PTR_ADDIU      v0, 1
+1:
+#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
+        PTR_ADDIU      v0, 1
+#else
+        PTR_ADDU       v0, AT
+       .set            at
+#endif
        .set            reorder
        PTR_SUBU        v0, a0
        jr              ra
index e70c33f..f2e8153 100644 (file)
@@ -3,15 +3,13 @@
 #
 
 obj-y += setup.o init.o cmdline.o env.o time.o reset.o irq.o \
-    bonito-irq.o mem.o machtype.o platform.o
+    bonito-irq.o mem.o machtype.o platform.o serial.o
 obj-$(CONFIG_PCI) += pci.o
 
 #
 # Serial port support
 #
 obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
-loongson-serial-$(CONFIG_SERIAL_8250) := serial.o
-obj-y += $(loongson-serial-m) $(loongson-serial-y)
 obj-$(CONFIG_LOONGSON_UART_BASE) += uart_base.o
 obj-$(CONFIG_LOONGSON_MC146818) += rtc.o
 
index e3c68b5..509877c 100644 (file)
@@ -272,7 +272,7 @@ void loongson3_ipi_interrupt(struct pt_regs *regs)
        if (action & SMP_ASK_C0COUNT) {
                BUG_ON(cpu != 0);
                c0count = read_c0_count();
-               for (i = 1; i < loongson_sysconf.nr_cpus; i++)
+               for (i = 1; i < num_possible_cpus(); i++)
                        per_cpu(core0_c0count, i) = c0count;
        }
 }
index d31c537..22b9b2c 100644 (file)
@@ -889,7 +889,7 @@ static inline void cop1_cfc(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
                break;
 
        case FPCREG_RID:
-               value = current_cpu_data.fpu_id;
+               value = boot_cpu_data.fpu_id;
                break;
 
        default:
@@ -921,7 +921,7 @@ static inline void cop1_ctc(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
                         (void *)xcp->cp0_epc, MIPSInst_RT(ir), value);
 
                /* Preserve read-only bits.  */
-               mask = current_cpu_data.fpu_msk31;
+               mask = boot_cpu_data.fpu_msk31;
                fcr31 = (value & ~mask) | (fcr31 & mask);
                break;
 
index 0dbb65a..2e03ab1 100644 (file)
@@ -1372,7 +1372,7 @@ static int probe_scache(void)
        scache_size = addr;
        c->scache.linesz = 16 << ((config & R4K_CONF_SB) >> 22);
        c->scache.ways = 1;
-       c->dcache.waybit = 0;           /* does not matter */
+       c->scache.waybit = 0;           /* does not matter */
 
        return 1;
 }
index 7ff8637..36c0f26 100644 (file)
 #include <linux/module.h>
 #include <linux/kprobes.h>
 #include <linux/perf_event.h>
+#include <linux/uaccess.h>
 
 #include <asm/branch.h>
 #include <asm/mmu_context.h>
-#include <asm/uaccess.h>
 #include <asm/ptrace.h>
 #include <asm/highmem.h>               /* For VMALLOC_END */
 #include <linux/kdebug.h>
@@ -94,7 +94,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write,
         * If we're in an interrupt or have no user
         * context, we must not take the fault..
         */
-       if (in_atomic() || !mm)
+       if (faulthandler_disabled() || !mm)
                goto bad_area_nosemaphore;
 
        if (user_mode(regs))
index da815d2..11661cb 100644 (file)
@@ -47,7 +47,7 @@ void *kmap_atomic(struct page *page)
        unsigned long vaddr;
        int idx, type;
 
-       /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
+       preempt_disable();
        pagefault_disable();
        if (!PageHighMem(page))
                return page_address(page);
@@ -72,6 +72,7 @@ void __kunmap_atomic(void *kvaddr)
 
        if (vaddr < FIXADDR_START) { // FIXME
                pagefault_enable();
+               preempt_enable();
                return;
        }
 
@@ -92,6 +93,7 @@ void __kunmap_atomic(void *kvaddr)
 #endif
        kmap_atomic_idx_pop();
        pagefault_enable();
+       preempt_enable();
 }
 EXPORT_SYMBOL(__kunmap_atomic);
 
@@ -104,6 +106,7 @@ void *kmap_atomic_pfn(unsigned long pfn)
        unsigned long vaddr;
        int idx, type;
 
+       preempt_disable();
        pagefault_disable();
 
        type = kmap_atomic_idx_push();
index faa5c98..198a314 100644 (file)
@@ -90,6 +90,7 @@ static void *__kmap_pgprot(struct page *page, unsigned long addr, pgprot_t prot)
 
        BUG_ON(Page_dcache_dirty(page));
 
+       preempt_disable();
        pagefault_disable();
        idx = (addr >> PAGE_SHIFT) & (FIX_N_COLOURS - 1);
        idx += in_interrupt() ? FIX_N_COLOURS : 0;
@@ -152,6 +153,7 @@ void kunmap_coherent(void)
        write_c0_entryhi(old_ctx);
        local_irq_restore(flags);
        pagefault_enable();
+       preempt_enable();
 }
 
 void copy_user_highpage(struct page *to, struct page *from,
index a27a088..08318ec 100644 (file)
@@ -495,7 +495,7 @@ static void r4k_tlb_configure(void)
 
        if (cpu_has_rixi) {
                /*
-                * Enable the no read, no exec bits, and enable large virtual
+                * Enable the no read, no exec bits, and enable large physical
                 * address.
                 */
 #ifdef CONFIG_64BIT
index 5d61393..e23fdf2 100644 (file)
@@ -681,11 +681,7 @@ static unsigned int get_stack_depth(struct jit_ctx *ctx)
                sp_off += config_enabled(CONFIG_64BIT) ?
                        (ARGS_USED_BY_JIT + 1) * RSIZE : RSIZE;
 
-       /*
-        * Subtract the bytes for the last registers since we only care about
-        * the location on the stack pointer.
-        */
-       return sp_off - RSIZE;
+       return sp_off;
 }
 
 static void build_prologue(struct jit_ctx *ctx)
index e20b02e..e10d10b 100644 (file)
@@ -41,7 +41,7 @@ static irqreturn_t ill_acc_irq_handler(int irq, void *_priv)
                addr, (type >> ILL_ACC_OFF_S) & ILL_ACC_OFF_M,
                type & ILL_ACC_LEN_M);
 
-       rt_memc_w32(REG_ILL_ACC_TYPE, REG_ILL_ACC_TYPE);
+       rt_memc_w32(ILL_INT_STATUS, REG_ILL_ACC_TYPE);
 
        return IRQ_HANDLED;
 }
index 0134db2..5a2a821 100644 (file)
@@ -130,9 +130,9 @@ struct platform_device ip32_rtc_device = {
        .resource               = ip32_rtc_resources,
 };
 
-+static int __init sgio2_rtc_devinit(void)
+static __init int sgio2_rtc_devinit(void)
 {
        return platform_device_register(&ip32_rtc_device);
 }
 
-device_initcall(sgio2_cmos_devinit);
+device_initcall(sgio2_rtc_devinit);
index 2fbbe4d..1ddea5a 100644 (file)
@@ -75,6 +75,7 @@ static inline void *kmap_atomic(struct page *page)
        unsigned long vaddr;
        int idx, type;
 
+       preempt_disable();
        pagefault_disable();
        if (page < highmem_start_page)
                return page_address(page);
@@ -98,6 +99,7 @@ static inline void __kunmap_atomic(unsigned long vaddr)
 
        if (vaddr < FIXADDR_START) { /* FIXME */
                pagefault_enable();
+               preempt_enable();
                return;
        }
 
@@ -122,6 +124,7 @@ static inline void __kunmap_atomic(unsigned long vaddr)
 
        kmap_atomic_idx_pop();
        pagefault_enable();
+       preempt_enable();
 }
 #endif /* __KERNEL__ */
 
index 0c2cc5d..4a1d181 100644 (file)
@@ -23,8 +23,8 @@
 #include <linux/interrupt.h>
 #include <linux/init.h>
 #include <linux/vt_kern.h>             /* For unblank_screen() */
+#include <linux/uaccess.h>
 
-#include <asm/uaccess.h>
 #include <asm/pgalloc.h>
 #include <asm/hardirq.h>
 #include <asm/cpu-regs.h>
@@ -168,7 +168,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long fault_code,
         * If we're in an interrupt or have no user
         * context, we must not take the fault..
         */
-       if (in_atomic() || !mm)
+       if (faulthandler_disabled() || !mm)
                goto no_context;
 
        if ((fault_code & MMUFCR_xFC_ACCESS) == MMUFCR_xFC_ACCESS_USR)
index 0c9b6af..b51878b 100644 (file)
@@ -77,7 +77,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long cause,
         * If we're in an interrupt or have no user
         * context, we must not take the fault..
         */
-       if (in_atomic() || !mm)
+       if (faulthandler_disabled() || !mm)
                goto bad_area_nosemaphore;
 
        if (user_mode(regs))
index de65f66..ec2df4b 100644 (file)
@@ -142,6 +142,7 @@ static inline void kunmap(struct page *page)
 
 static inline void *kmap_atomic(struct page *page)
 {
+       preempt_disable();
        pagefault_disable();
        return page_address(page);
 }
@@ -150,6 +151,7 @@ static inline void __kunmap_atomic(void *addr)
 {
        flush_kernel_dcache_page_addr(addr);
        pagefault_enable();
+       preempt_enable();
 }
 
 #define kmap_atomic_prot(page, prot)   kmap_atomic(page)
index dbd1335..0a90b96 100644 (file)
@@ -46,8 +46,6 @@ __xchg(unsigned long x, __volatile__ void *ptr, int size)
 #define xchg(ptr, x) \
        ((__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr), sizeof(*(ptr))))
 
-#define __HAVE_ARCH_CMPXCHG    1
-
 /* bug catcher for when unsupported size is used - won't link */
 extern void __cmpxchg_called_with_bad_pointer(void);
 
index 3391d06..78c9fd3 100644 (file)
@@ -348,6 +348,10 @@ struct pt_regs;    /* forward declaration... */
 
 #define ELF_HWCAP      0
 
+#define STACK_RND_MASK (is_32bit_task() ? \
+                               0x7ff >> (PAGE_SHIFT - 12) : \
+                               0x3ffff >> (PAGE_SHIFT - 12))
+
 struct mm_struct;
 extern unsigned long arch_randomize_brk(struct mm_struct *);
 #define arch_randomize_brk arch_randomize_brk
index 8a488c2..809905a 100644 (file)
@@ -181,9 +181,12 @@ int dump_task_fpu (struct task_struct *tsk, elf_fpregset_t *r)
        return 1;
 }
 
+/*
+ * Copy architecture-specific thread state
+ */
 int
 copy_thread(unsigned long clone_flags, unsigned long usp,
-           unsigned long arg, struct task_struct *p)
+           unsigned long kthread_arg, struct task_struct *p)
 {
        struct pt_regs *cregs = &(p->thread.regs);
        void *stack = task_stack_page(p);
@@ -195,11 +198,10 @@ copy_thread(unsigned long clone_flags, unsigned long usp,
        extern void * const child_return;
 
        if (unlikely(p->flags & PF_KTHREAD)) {
+               /* kernel thread */
                memset(cregs, 0, sizeof(struct pt_regs));
                if (!usp) /* idle thread */
                        return 0;
-
-               /* kernel thread */
                /* Must exit via ret_from_kernel_thread in order
                 * to call schedule_tail()
                 */
@@ -215,7 +217,7 @@ copy_thread(unsigned long clone_flags, unsigned long usp,
 #else
                cregs->gr[26] = usp;
 #endif
-               cregs->gr[25] = arg;
+               cregs->gr[25] = kthread_arg;
        } else {
                /* user thread */
                /* usp must be word aligned.  This also prevents users from
index e1ffea2..5aba01a 100644 (file)
@@ -77,6 +77,9 @@ static unsigned long mmap_upper_limit(void)
        if (stack_base > STACK_SIZE_MAX)
                stack_base = STACK_SIZE_MAX;
 
+       /* Add space for stack randomization. */
+       stack_base += (STACK_RND_MASK << PAGE_SHIFT);
+
        return PAGE_ALIGN(STACK_TOP - stack_base);
 }
 
index 47ee620..6548fd1 100644 (file)
@@ -26,9 +26,9 @@
 #include <linux/console.h>
 #include <linux/bug.h>
 #include <linux/ratelimit.h>
+#include <linux/uaccess.h>
 
 #include <asm/assembly.h>
-#include <asm/uaccess.h>
 #include <asm/io.h>
 #include <asm/irq.h>
 #include <asm/traps.h>
@@ -800,7 +800,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
             * unless pagefault_disable() was called before.
             */
 
-           if (fault_space == 0 && !in_atomic())
+           if (fault_space == 0 && !faulthandler_disabled())
            {
                pdc_chassis_send_status(PDC_CHASSIS_DIRECT_PANIC);
                parisc_terminate("Kernel Fault", regs, code, fault_address);
index e5120e6..15503ad 100644 (file)
@@ -15,8 +15,8 @@
 #include <linux/sched.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
+#include <linux/uaccess.h>
 
-#include <asm/uaccess.h>
 #include <asm/traps.h>
 
 /* Various important other fields */
@@ -207,7 +207,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long code,
        int fault;
        unsigned int flags;
 
-       if (in_atomic())
+       if (pagefault_disabled())
                goto no_context;
 
        tsk = current;
index a3bf5be..51ccc72 100644 (file)
@@ -34,7 +34,7 @@
 #define rmb()  __asm__ __volatile__ ("sync" : : : "memory")
 #define wmb()  __asm__ __volatile__ ("sync" : : : "memory")
 
-#define set_mb(var, value)     do { var = value; mb(); } while (0)
+#define smp_store_mb(var, value)       do { WRITE_ONCE(var, value); mb(); } while (0)
 
 #ifdef __SUBARCH_HAS_LWSYNC
 #    define SMPWMB      LWSYNC
@@ -89,5 +89,6 @@ do {                                                                  \
 
 #define smp_mb__before_atomic()     smp_mb()
 #define smp_mb__after_atomic()      smp_mb()
+#define smp_mb__before_spinlock()   smp_mb()
 
 #endif /* _ASM_POWERPC_BARRIER_H */
index d463c68..ad6263c 100644 (file)
@@ -144,7 +144,6 @@ __xchg_local(volatile void *ptr, unsigned long x, unsigned int size)
  * Compare and exchange - if *p == old, set it to new,
  * and return the old value of *p.
  */
-#define __HAVE_ARCH_CMPXCHG    1
 
 static __always_inline unsigned long
 __cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new)
index 5f1048e..8b3b46b 100644 (file)
@@ -87,7 +87,7 @@ static inline int prrn_is_enabled(void)
 #include <asm/smp.h>
 
 #define topology_physical_package_id(cpu)      (cpu_to_chip_id(cpu))
-#define topology_thread_cpumask(cpu)   (per_cpu(cpu_sibling_map, cpu))
+#define topology_sibling_cpumask(cpu)  (per_cpu(cpu_sibling_map, cpu))
 #define topology_core_cpumask(cpu)     (per_cpu(cpu_core_map, cpu))
 #define topology_core_id(cpu)          (cpu_to_core_id(cpu))
 #endif
index 15c99b6..b2eb468 100644 (file)
@@ -73,7 +73,7 @@ void save_mce_event(struct pt_regs *regs, long handled,
                    uint64_t nip, uint64_t addr)
 {
        uint64_t srr1;
-       int index = __this_cpu_inc_return(mce_nest_count);
+       int index = __this_cpu_inc_return(mce_nest_count) - 1;
        struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]);
 
        /*
@@ -184,7 +184,7 @@ void machine_check_queue_event(void)
        if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
                return;
 
-       index = __this_cpu_inc_return(mce_queue_count);
+       index = __this_cpu_inc_return(mce_queue_count) - 1;
        /* If queue is full, just return for now. */
        if (index >= MAX_MC_EVT) {
                __this_cpu_dec(mce_queue_count);
index f096e72..1db6851 100644 (file)
@@ -213,6 +213,7 @@ SECTIONS
                *(.opd)
        }
 
+       . = ALIGN(256);
        .got : AT(ADDR(.got) - LOAD_OFFSET) {
                __toc_start = .;
 #ifndef CONFIG_RELOCATABLE
index 48d3c5d..df81caa 100644 (file)
@@ -1952,7 +1952,7 @@ static void post_guest_process(struct kvmppc_vcore *vc)
  */
 static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 {
-       struct kvm_vcpu *vcpu;
+       struct kvm_vcpu *vcpu, *vnext;
        int i;
        int srcu_idx;
 
@@ -1982,7 +1982,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
         */
        if ((threads_per_core > 1) &&
            ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) {
-               list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
+               list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
+                                        arch.run_list) {
                        vcpu->arch.ret = -EBUSY;
                        kvmppc_remove_runnable(vc, vcpu);
                        wake_up(&vcpu->arch.cpu_run);
index 3cf529c..ac93a3b 100644 (file)
@@ -27,11 +27,11 @@ int enter_vmx_usercopy(void)
        if (in_interrupt())
                return 0;
 
-       /* This acts as preempt_disable() as well and will make
-        * enable_kernel_altivec(). We need to disable page faults
-        * as they can call schedule and thus make us lose the VMX
-        * context. So on page faults, we just fail which will cause
-        * a fallback to the normal non-vmx copy.
+       preempt_disable();
+       /*
+        * We need to disable page faults as they can call schedule and
+        * thus make us lose the VMX context. So on page faults, we just
+        * fail which will cause a fallback to the normal non-vmx copy.
         */
        pagefault_disable();
 
@@ -47,6 +47,7 @@ int enter_vmx_usercopy(void)
 int exit_vmx_usercopy(void)
 {
        pagefault_enable();
+       preempt_enable();
        return 0;
 }
 
index b396868..6d53597 100644 (file)
 #include <linux/ratelimit.h>
 #include <linux/context_tracking.h>
 #include <linux/hugetlb.h>
+#include <linux/uaccess.h>
 
 #include <asm/firmware.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/mmu.h>
 #include <asm/mmu_context.h>
-#include <asm/uaccess.h>
 #include <asm/tlbflush.h>
 #include <asm/siginfo.h>
 #include <asm/debug.h>
@@ -272,15 +272,16 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
        if (!arch_irq_disabled_regs(regs))
                local_irq_enable();
 
-       if (in_atomic() || mm == NULL) {
+       if (faulthandler_disabled() || mm == NULL) {
                if (!user_mode(regs)) {
                        rc = SIGSEGV;
                        goto bail;
                }
-               /* in_atomic() in user mode is really bad,
+               /* faulthandler_disabled() in user mode is really bad,
                   as is current->mm == NULL. */
                printk(KERN_EMERG "Page fault in user mode with "
-                      "in_atomic() = %d mm = %p\n", in_atomic(), mm);
+                      "faulthandler_disabled() = %d mm = %p\n",
+                      faulthandler_disabled(), mm);
                printk(KERN_EMERG "NIP = %lx  MSR = %lx\n",
                       regs->nip, regs->msr);
                die("Weird page fault", regs, SIGSEGV);
index e7450bd..e292c8a 100644 (file)
@@ -34,7 +34,7 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot)
        unsigned long vaddr;
        int idx, type;
 
-       /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
+       preempt_disable();
        pagefault_disable();
        if (!PageHighMem(page))
                return page_address(page);
@@ -59,6 +59,7 @@ void __kunmap_atomic(void *kvaddr)
 
        if (vaddr < __fix_to_virt(FIX_KMAP_END)) {
                pagefault_enable();
+               preempt_enable();
                return;
        }
 
@@ -82,5 +83,6 @@ void __kunmap_atomic(void *kvaddr)
 
        kmap_atomic_idx_pop();
        pagefault_enable();
+       preempt_enable();
 }
 EXPORT_SYMBOL(__kunmap_atomic);
index 0ce968b..3385e3d 100644 (file)
@@ -689,27 +689,34 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb,
 struct page *
 follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
 {
-       pte_t *ptep;
-       struct page *page;
+       pte_t *ptep, pte;
        unsigned shift;
        unsigned long mask, flags;
+       struct page *page = ERR_PTR(-EINVAL);
+
+       local_irq_save(flags);
+       ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift);
+       if (!ptep)
+               goto no_page;
+       pte = READ_ONCE(*ptep);
        /*
+        * Verify it is a huge page else bail.
         * Transparent hugepages are handled by generic code. We can skip them
         * here.
         */
-       local_irq_save(flags);
-       ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift);
+       if (!shift || pmd_trans_huge(__pmd(pte_val(pte))))
+               goto no_page;
 
-       /* Verify it is a huge page else bail. */
-       if (!ptep || !shift || pmd_trans_huge(*(pmd_t *)ptep)) {
-               local_irq_restore(flags);
-               return ERR_PTR(-EINVAL);
+       if (!pte_present(pte)) {
+               page = NULL;
+               goto no_page;
        }
        mask = (1UL << shift) - 1;
-       page = pte_page(*ptep);
+       page = pte_page(pte);
        if (page)
                page += (address & mask) / PAGE_SIZE;
 
+no_page:
        local_irq_restore(flags);
        return page;
 }
index 59daa5e..6bfadf1 100644 (file)
@@ -839,6 +839,17 @@ pmd_t pmdp_get_and_clear(struct mm_struct *mm,
         * hash fault look at them.
         */
        memset(pgtable, 0, PTE_FRAG_SIZE);
+       /*
+        * Serialize against find_linux_pte_or_hugepte which does lock-less
+        * lookup in page tables with local interrupts disabled. For huge pages
+        * it casts pmd_t to pte_t. Since format of pte_t is different from
+        * pmd_t we want to prevent transit from pmd pointing to page table
+        * to pmd pointing to huge page (and back) while interrupts are disabled.
+        * We clear pmd to possibly replace it with page table pointer in
+        * different code paths. So make sure we wait for the parallel
+        * find_linux_pte_or_hugepage to finish.
+        */
+       kick_all_cpus_sync();
        return old_pmd;
 }
 
index cbd3d06..723a099 100644 (file)
@@ -217,7 +217,7 @@ static DEFINE_RAW_SPINLOCK(tlbivax_lock);
 static int mm_is_core_local(struct mm_struct *mm)
 {
        return cpumask_subset(mm_cpumask(mm),
-                             topology_thread_cpumask(smp_processor_id()));
+                             topology_sibling_cpumask(smp_processor_id()));
 }
 
 struct tlb_flush_param {
index 7940dc9..b258110 100644 (file)
 #define GHASH_DIGEST_SIZE      16
 
 struct ghash_ctx {
-       u8 icv[16];
-       u8 key[16];
+       u8 key[GHASH_BLOCK_SIZE];
 };
 
 struct ghash_desc_ctx {
+       u8 icv[GHASH_BLOCK_SIZE];
+       u8 key[GHASH_BLOCK_SIZE];
        u8 buffer[GHASH_BLOCK_SIZE];
        u32 bytes;
 };
@@ -28,8 +29,10 @@ struct ghash_desc_ctx {
 static int ghash_init(struct shash_desc *desc)
 {
        struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+       struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
 
        memset(dctx, 0, sizeof(*dctx));
+       memcpy(dctx->key, ctx->key, GHASH_BLOCK_SIZE);
 
        return 0;
 }
@@ -45,7 +48,6 @@ static int ghash_setkey(struct crypto_shash *tfm,
        }
 
        memcpy(ctx->key, key, GHASH_BLOCK_SIZE);
-       memset(ctx->icv, 0, GHASH_BLOCK_SIZE);
 
        return 0;
 }
@@ -54,7 +56,6 @@ static int ghash_update(struct shash_desc *desc,
                         const u8 *src, unsigned int srclen)
 {
        struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
-       struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
        unsigned int n;
        u8 *buf = dctx->buffer;
        int ret;
@@ -70,7 +71,7 @@ static int ghash_update(struct shash_desc *desc,
                src += n;
 
                if (!dctx->bytes) {
-                       ret = crypt_s390_kimd(KIMD_GHASH, ctx, buf,
+                       ret = crypt_s390_kimd(KIMD_GHASH, dctx, buf,
                                              GHASH_BLOCK_SIZE);
                        if (ret != GHASH_BLOCK_SIZE)
                                return -EIO;
@@ -79,7 +80,7 @@ static int ghash_update(struct shash_desc *desc,
 
        n = srclen & ~(GHASH_BLOCK_SIZE - 1);
        if (n) {
-               ret = crypt_s390_kimd(KIMD_GHASH, ctx, src, n);
+               ret = crypt_s390_kimd(KIMD_GHASH, dctx, src, n);
                if (ret != n)
                        return -EIO;
                src += n;
@@ -94,7 +95,7 @@ static int ghash_update(struct shash_desc *desc,
        return 0;
 }
 
-static int ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx)
+static int ghash_flush(struct ghash_desc_ctx *dctx)
 {
        u8 *buf = dctx->buffer;
        int ret;
@@ -104,24 +105,24 @@ static int ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx)
 
                memset(pos, 0, dctx->bytes);
 
-               ret = crypt_s390_kimd(KIMD_GHASH, ctx, buf, GHASH_BLOCK_SIZE);
+               ret = crypt_s390_kimd(KIMD_GHASH, dctx, buf, GHASH_BLOCK_SIZE);
                if (ret != GHASH_BLOCK_SIZE)
                        return -EIO;
+
+               dctx->bytes = 0;
        }
 
-       dctx->bytes = 0;
        return 0;
 }
 
 static int ghash_final(struct shash_desc *desc, u8 *dst)
 {
        struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
-       struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
        int ret;
 
-       ret = ghash_flush(ctx, dctx);
+       ret = ghash_flush(dctx);
        if (!ret)
-               memcpy(dst, ctx->icv, GHASH_BLOCK_SIZE);
+               memcpy(dst, dctx->icv, GHASH_BLOCK_SIZE);
        return ret;
 }
 
index 1f374b3..9d5192c 100644 (file)
@@ -125,7 +125,7 @@ static int generate_entropy(u8 *ebuf, size_t nbytes)
                /* fill page with urandom bytes */
                get_random_bytes(pg, PAGE_SIZE);
                /* exor page with stckf values */
-               for (n = 0; n < sizeof(PAGE_SIZE/sizeof(u64)); n++) {
+               for (n = 0; n < PAGE_SIZE / sizeof(u64); n++) {
                        u64 *p = ((u64 *)pg) + n;
                        *p ^= get_tod_clock_fast();
                }
index 8d72471..e6f8615 100644 (file)
@@ -36,7 +36,7 @@
 #define smp_mb__before_atomic()                smp_mb()
 #define smp_mb__after_atomic()         smp_mb()
 
-#define set_mb(var, value)             do { var = value; mb(); } while (0)
+#define smp_store_mb(var, value)               do { WRITE_ONCE(var, value); mb(); } while (0)
 
 #define smp_store_release(p, v)                                                \
 do {                                                                   \
index 4eadec4..411464f 100644 (file)
@@ -32,8 +32,6 @@
        __old;                                                          \
 })
 
-#define __HAVE_ARCH_CMPXCHG
-
 #define __cmpxchg_double_op(p1, p2, o1, o2, n1, n2, insn)              \
 ({                                                                     \
        register __typeof__(*(p1)) __old1 asm("2") = (o1);              \
index fc64239..ef24a21 100644 (file)
@@ -494,7 +494,7 @@ static inline int pmd_large(pmd_t pmd)
        return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0;
 }
 
-static inline int pmd_pfn(pmd_t pmd)
+static inline unsigned long pmd_pfn(pmd_t pmd)
 {
        unsigned long origin_mask;
 
index b1453a2..4990f6c 100644 (file)
@@ -22,7 +22,8 @@ DECLARE_PER_CPU(struct cpu_topology_s390, cpu_topology);
 
 #define topology_physical_package_id(cpu) (per_cpu(cpu_topology, cpu).socket_id)
 #define topology_thread_id(cpu)                  (per_cpu(cpu_topology, cpu).thread_id)
-#define topology_thread_cpumask(cpu)     (&per_cpu(cpu_topology, cpu).thread_mask)
+#define topology_sibling_cpumask(cpu) \
+               (&per_cpu(cpu_topology, cpu).thread_mask)
 #define topology_core_id(cpu)            (per_cpu(cpu_topology, cpu).core_id)
 #define topology_core_cpumask(cpu)       (&per_cpu(cpu_topology, cpu).core_mask)
 #define topology_book_id(cpu)            (per_cpu(cpu_topology, cpu).book_id)
index d64a7a6..9dd4cc4 100644 (file)
@@ -98,7 +98,8 @@ static inline unsigned long extable_fixup(const struct exception_table_entry *x)
  * @from: Source address, in user space.
  * @n:   Number of bytes to copy.
  *
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Copy data from user space to kernel space.  Caller must check
  * the specified block with access_ok() before calling this function.
@@ -118,7 +119,8 @@ unsigned long __must_check __copy_from_user(void *to, const void __user *from,
  * @from: Source address, in kernel space.
  * @n:   Number of bytes to copy.
  *
- * Context: User context only. This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Copy data from kernel space to user space.  Caller must check
  * the specified block with access_ok() before calling this function.
@@ -264,7 +266,8 @@ int __get_user_bad(void) __attribute__((noreturn));
  * @from: Source address, in kernel space.
  * @n:    Number of bytes to copy.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Copy data from kernel space to user space.
  *
@@ -290,7 +293,8 @@ __compiletime_warning("copy_from_user() buffer size is not provably correct")
  * @from: Source address, in user space.
  * @n:    Number of bytes to copy.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Copy data from user space to kernel space.
  *
@@ -348,7 +352,8 @@ static inline unsigned long strnlen_user(const char __user *src, unsigned long n
  * strlen_user: - Get the size of a string in user space.
  * @str: The string to measure.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Get the size of a NUL-terminated string in user space.
  *
index 76515bc..4c8f5d7 100644 (file)
@@ -399,7 +399,7 @@ static inline int do_exception(struct pt_regs *regs, int access)
         * user context.
         */
        fault = VM_FAULT_BADCONTEXT;
-       if (unlikely(!user_space_fault(regs) || in_atomic() || !mm))
+       if (unlikely(!user_space_fault(regs) || faulthandler_disabled() || !mm))
                goto out;
 
        address = trans_exc_code & __FAIL_ADDR_MASK;
index ba8593a..de156ba 100644 (file)
@@ -48,7 +48,9 @@ extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
  * We get 160 bytes stack space from calling function, but only use
  * 11 * 8 byte (old backchain + r15 - r6) for storing registers.
  */
-#define STK_OFF (MAX_BPF_STACK + 8 + 4 + 4 + (160 - 11 * 8))
+#define STK_SPACE      (MAX_BPF_STACK + 8 + 4 + 4 + 160)
+#define STK_160_UNUSED (160 - 11 * 8)
+#define STK_OFF                (STK_SPACE - STK_160_UNUSED)
 #define STK_OFF_TMP    160     /* Offset of tmp buffer on stack */
 #define STK_OFF_HLEN   168     /* Offset of SKB header length on stack */
 
index 7690dc8..55423d8 100644 (file)
@@ -384,13 +384,16 @@ static void bpf_jit_prologue(struct bpf_jit *jit)
        }
        /* Setup stack and backchain */
        if (jit->seen & SEEN_STACK) {
-               /* lgr %bfp,%r15 (BPF frame pointer) */
-               EMIT4(0xb9040000, BPF_REG_FP, REG_15);
+               if (jit->seen & SEEN_FUNC)
+                       /* lgr %w1,%r15 (backchain) */
+                       EMIT4(0xb9040000, REG_W1, REG_15);
+               /* la %bfp,STK_160_UNUSED(%r15) (BPF frame pointer) */
+               EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, STK_160_UNUSED);
                /* aghi %r15,-STK_OFF */
                EMIT4_IMM(0xa70b0000, REG_15, -STK_OFF);
                if (jit->seen & SEEN_FUNC)
-                       /* stg %bfp,152(%r15) (backchain) */
-                       EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_FP, REG_0,
+                       /* stg %w1,152(%r15) (backchain) */
+                       EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
                                      REG_15, 152);
        }
        /*
@@ -443,8 +446,11 @@ static void bpf_jit_epilogue(struct bpf_jit *jit)
 
 /*
  * Compile one eBPF instruction into s390x code
+ *
+ * NOTE: Use noinline because for gcov (-fprofile-arcs) gcc allocates a lot of
+ * stack space for the large switch statement.
  */
-static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
+static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
 {
        struct bpf_insn *insn = &fp->insnsi[i];
        int jmp_off, last, insn_count = 1;
@@ -588,8 +594,8 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
                EMIT4(0xb9160000, dst_reg, rc_reg);
                break;
        }
-       case BPF_ALU64 | BPF_DIV | BPF_X: /* dst = dst / (u32) src */
-       case BPF_ALU64 | BPF_MOD | BPF_X: /* dst = dst % (u32) src */
+       case BPF_ALU64 | BPF_DIV | BPF_X: /* dst = dst / src */
+       case BPF_ALU64 | BPF_MOD | BPF_X: /* dst = dst % src */
        {
                int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
 
@@ -602,10 +608,8 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
                EMIT4_IMM(0xa7090000, REG_W0, 0);
                /* lgr %w1,%dst */
                EMIT4(0xb9040000, REG_W1, dst_reg);
-               /* llgfr %dst,%src (u32 cast) */
-               EMIT4(0xb9160000, dst_reg, src_reg);
                /* dlgr %w0,%dst */
-               EMIT4(0xb9870000, REG_W0, dst_reg);
+               EMIT4(0xb9870000, REG_W0, src_reg);
                /* lgr %dst,%rc */
                EMIT4(0xb9040000, dst_reg, rc_reg);
                break;
@@ -632,8 +636,8 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
                EMIT4(0xb9160000, dst_reg, rc_reg);
                break;
        }
-       case BPF_ALU64 | BPF_DIV | BPF_K: /* dst = dst / (u32) imm */
-       case BPF_ALU64 | BPF_MOD | BPF_K: /* dst = dst % (u32) imm */
+       case BPF_ALU64 | BPF_DIV | BPF_K: /* dst = dst / imm */
+       case BPF_ALU64 | BPF_MOD | BPF_K: /* dst = dst % imm */
        {
                int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
 
@@ -649,7 +653,7 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
                EMIT4(0xb9040000, REG_W1, dst_reg);
                /* dlg %w0,<d(imm)>(%l) */
                EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, REG_L,
-                             EMIT_CONST_U64((u32) imm));
+                             EMIT_CONST_U64(imm));
                /* lgr %dst,%rc */
                EMIT4(0xb9040000, dst_reg, rc_reg);
                break;
index f384839..cc3f642 100644 (file)
@@ -42,8 +42,6 @@ static inline unsigned long __cmpxchg(volatile unsigned long *m,
                                        (unsigned long)(o),     \
                                        (unsigned long)(n)))
 
-#define __HAVE_ARCH_CMPXCHG    1
-
 #include <asm-generic/cmpxchg-local.h>
 
 #endif /* _ASM_SCORE_CMPXCHG_H */
index ab66ddd..20a3591 100644 (file)
@@ -36,7 +36,8 @@
  * @addr: User space pointer to start of block to check
  * @size: Size of block to check
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Checks if a pointer to a block of memory in user space is valid.
  *
@@ -61,7 +62,8 @@
  * @x:   Value to copy to user space.
  * @ptr: Destination address, in user space.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple value from kernel space to user
  * space.  It supports simple types like char and int, but not larger
@@ -79,7 +81,8 @@
  * @x:   Variable to store result.
  * @ptr: Source address, in user space.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple variable from user space to kernel
  * space.  It supports simple types like char and int, but not larger
  * @x:   Value to copy to user space.
  * @ptr: Destination address, in user space.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple value from kernel space to user
  * space.  It supports simple types like char and int, but not larger
  * @x:   Variable to store result.
  * @ptr: Source address, in user space.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple variable from user space to kernel
  * space.  It supports simple types like char and int, but not larger
index 00b7d3a..16efa3a 100644 (file)
@@ -175,10 +175,10 @@ ENTRY(__clear_user)
        br      r3
 
        .section .fixup, "ax"
+99:
        br      r3
        .previous
        .section __ex_table, "a"
        .align  2
-99:
        .word   0b, 99b
        .previous
index 6860beb..37a6c2e 100644 (file)
@@ -34,6 +34,7 @@
 #include <linux/string.h>
 #include <linux/types.h>
 #include <linux/ptrace.h>
+#include <linux/uaccess.h>
 
 /*
  * This routine handles page faults.  It determines the address,
@@ -73,7 +74,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write,
        * If we're in an interrupt or have no user
        * context, we must not take the fault..
        */
-       if (in_atomic() || !mm)
+       if (pagefault_disabled() || !mm)
                goto bad_area_nosemaphore;
 
        if (user_mode(regs))
index 4371530..bf91037 100644 (file)
@@ -32,7 +32,7 @@
 #define ctrl_barrier() __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop")
 #endif
 
-#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
+#define smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)
 
 #include <asm-generic/barrier.h>
 
index f6bd140..85c97b1 100644 (file)
@@ -46,8 +46,6 @@ extern void __xchg_called_with_bad_pointer(void);
  * if something tries to do an invalid cmpxchg(). */
 extern void __cmpxchg_called_with_bad_pointer(void);
 
-#define __HAVE_ARCH_CMPXCHG 1
-
 static inline unsigned long __cmpxchg(volatile void * ptr, unsigned long old,
                unsigned long new, int size)
 {
index a58fec9..79d8276 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/kprobes.h>
 #include <linux/perf_event.h>
 #include <linux/kdebug.h>
+#include <linux/uaccess.h>
 #include <asm/io_trapped.h>
 #include <asm/mmu_context.h>
 #include <asm/tlbflush.h>
@@ -438,9 +439,9 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 
        /*
         * If we're in an interrupt, have no user context or are running
-        * in an atomic region then we must not take the fault:
+        * with pagefaults disabled then we must not take the fault:
         */
-       if (unlikely(in_atomic() || !mm)) {
+       if (unlikely(faulthandler_disabled() || !mm)) {
                bad_area_nosemaphore(regs, error_code, address);
                return;
        }
index 7664894..809941e 100644 (file)
@@ -40,8 +40,8 @@ do {  __asm__ __volatile__("ba,pt     %%xcc, 1f\n\t" \
 #define dma_rmb()      rmb()
 #define dma_wmb()      wmb()
 
-#define set_mb(__var, __value) \
-       do { __var = __value; membar_safe("#StoreLoad"); } while(0)
+#define smp_store_mb(__var, __value) \
+       do { WRITE_ONCE(__var, __value); membar_safe("#StoreLoad"); } while(0)
 
 #ifdef CONFIG_SMP
 #define smp_mb()       mb()
index d38b52d..83ffb83 100644 (file)
@@ -34,7 +34,6 @@ static inline unsigned long __xchg(unsigned long x, __volatile__ void * ptr, int
  *
  * Cribbed from <asm-parisc/atomic.h>
  */
-#define __HAVE_ARCH_CMPXCHG    1
 
 /* bug catcher for when unsupported size is used - won't link */
 void __cmpxchg_called_with_bad_pointer(void);
index 0e1ed6c..faa2f61 100644 (file)
@@ -65,8 +65,6 @@ static inline unsigned long __xchg(unsigned long x, __volatile__ void * ptr,
 
 #include <asm-generic/cmpxchg-local.h>
 
-#define __HAVE_ARCH_CMPXCHG 1
-
 static inline unsigned long
 __cmpxchg_u32(volatile int *m, int old, int new)
 {
index a6e424d..a6cfdab 100644 (file)
@@ -24,7 +24,8 @@ typedef struct {
        unsigned int    icache_line_size;
        unsigned int    ecache_size;
        unsigned int    ecache_line_size;
-       int             core_id;
+       unsigned short  sock_id;
+       unsigned short  core_id;
        int             proc_id;
 } cpuinfo_sparc;
 
index dc165eb..2a52c91 100644 (file)
@@ -308,12 +308,26 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t prot)
        "       sllx            %1, 32, %1\n"
        "       or              %0, %1, %0\n"
        "       .previous\n"
+       "       .section        .sun_m7_2insn_patch, \"ax\"\n"
+       "       .word           661b\n"
+       "       sethi           %%uhi(%4), %1\n"
+       "       sethi           %%hi(%4), %0\n"
+       "       .word           662b\n"
+       "       or              %1, %%ulo(%4), %1\n"
+       "       or              %0, %%lo(%4), %0\n"
+       "       .word           663b\n"
+       "       sllx            %1, 32, %1\n"
+       "       or              %0, %1, %0\n"
+       "       .previous\n"
        : "=r" (mask), "=r" (tmp)
        : "i" (_PAGE_PADDR_4U | _PAGE_MODIFIED_4U | _PAGE_ACCESSED_4U |
               _PAGE_CP_4U | _PAGE_CV_4U | _PAGE_E_4U |
               _PAGE_SPECIAL | _PAGE_PMD_HUGE | _PAGE_SZALL_4U),
          "i" (_PAGE_PADDR_4V | _PAGE_MODIFIED_4V | _PAGE_ACCESSED_4V |
               _PAGE_CP_4V | _PAGE_CV_4V | _PAGE_E_4V |
+              _PAGE_SPECIAL | _PAGE_PMD_HUGE | _PAGE_SZALL_4V),
+         "i" (_PAGE_PADDR_4V | _PAGE_MODIFIED_4V | _PAGE_ACCESSED_4V |
+              _PAGE_CP_4V | _PAGE_E_4V |
               _PAGE_SPECIAL | _PAGE_PMD_HUGE | _PAGE_SZALL_4V));
 
        return __pte((pte_val(pte) & mask) | (pgprot_val(prot) & ~mask));
@@ -342,9 +356,15 @@ static inline pgprot_t pgprot_noncached(pgprot_t prot)
        "       andn            %0, %4, %0\n"
        "       or              %0, %5, %0\n"
        "       .previous\n"
+       "       .section        .sun_m7_2insn_patch, \"ax\"\n"
+       "       .word           661b\n"
+       "       andn            %0, %6, %0\n"
+       "       or              %0, %5, %0\n"
+       "       .previous\n"
        : "=r" (val)
        : "0" (val), "i" (_PAGE_CP_4U | _PAGE_CV_4U), "i" (_PAGE_E_4U),
-                    "i" (_PAGE_CP_4V | _PAGE_CV_4V), "i" (_PAGE_E_4V));
+                    "i" (_PAGE_CP_4V | _PAGE_CV_4V), "i" (_PAGE_E_4V),
+                    "i" (_PAGE_CP_4V));
 
        return __pgprot(val);
 }
index ed8f071..01d1704 100644 (file)
@@ -40,11 +40,12 @@ static inline int pcibus_to_node(struct pci_bus *pbus)
 #ifdef CONFIG_SMP
 #define topology_physical_package_id(cpu)      (cpu_data(cpu).proc_id)
 #define topology_core_id(cpu)                  (cpu_data(cpu).core_id)
-#define topology_core_cpumask(cpu)             (&cpu_core_map[cpu])
-#define topology_thread_cpumask(cpu)           (&per_cpu(cpu_sibling_map, cpu))
+#define topology_core_cpumask(cpu)             (&cpu_core_sib_map[cpu])
+#define topology_sibling_cpumask(cpu)          (&per_cpu(cpu_sibling_map, cpu))
 #endif /* CONFIG_SMP */
 
 extern cpumask_t cpu_core_map[NR_CPUS];
+extern cpumask_t cpu_core_sib_map[NR_CPUS];
 static inline const struct cpumask *cpu_coregroup_mask(int cpu)
 {
         return &cpu_core_map[cpu];
index 6fd4436..ec9c04d 100644 (file)
@@ -79,6 +79,8 @@ struct sun4v_2insn_patch_entry {
 };
 extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch,
        __sun4v_2insn_patch_end;
+extern struct sun4v_2insn_patch_entry __sun_m7_2insn_patch,
+       __sun_m7_2insn_patch_end;
 
 
 #endif /* !(__ASSEMBLY__) */
index 07cc49e..0f67942 100644 (file)
@@ -69,6 +69,8 @@ void sun4v_patch_1insn_range(struct sun4v_1insn_patch_entry *,
                             struct sun4v_1insn_patch_entry *);
 void sun4v_patch_2insn_range(struct sun4v_2insn_patch_entry *,
                             struct sun4v_2insn_patch_entry *);
+void sun_m7_patch_2insn_range(struct sun4v_2insn_patch_entry *,
+                            struct sun4v_2insn_patch_entry *);
 extern unsigned int dcache_parity_tl1_occurred;
 extern unsigned int icache_parity_tl1_occurred;
 
index 94e392b..814fb17 100644 (file)
@@ -723,7 +723,6 @@ static int grpci2_of_probe(struct platform_device *ofdev)
                err = -ENOMEM;
                goto err1;
        }
-       memset(grpci2priv, 0, sizeof(*grpci2priv));
        priv->regs = regs;
        priv->irq = ofdev->archdata.irqs[0]; /* BASE IRQ */
        priv->irq_mode = (capability & STS_IRQMODE) >> STS_IRQMODE_BIT;
index 26c80e1..6f80936 100644 (file)
@@ -614,45 +614,68 @@ static void fill_in_one_cache(cpuinfo_sparc *c, struct mdesc_handle *hp, u64 mp)
        }
 }
 
-static void mark_core_ids(struct mdesc_handle *hp, u64 mp, int core_id)
+static void find_back_node_value(struct mdesc_handle *hp, u64 node,
+                                char *srch_val,
+                                void (*func)(struct mdesc_handle *, u64, int),
+                                u64 val, int depth)
 {
-       u64 a;
-
-       mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_BACK) {
-               u64 t = mdesc_arc_target(hp, a);
-               const char *name;
-               const u64 *id;
+       u64 arc;
 
-               name = mdesc_node_name(hp, t);
-               if (!strcmp(name, "cpu")) {
-                       id = mdesc_get_property(hp, t, "id", NULL);
-                       if (*id < NR_CPUS)
-                               cpu_data(*id).core_id = core_id;
-               } else {
-                       u64 j;
+       /* Since we have an estimate of recursion depth, do a sanity check. */
+       if (depth == 0)
+               return;
 
-                       mdesc_for_each_arc(j, hp, t, MDESC_ARC_TYPE_BACK) {
-                               u64 n = mdesc_arc_target(hp, j);
-                               const char *n_name;
+       mdesc_for_each_arc(arc, hp, node, MDESC_ARC_TYPE_BACK) {
+               u64 n = mdesc_arc_target(hp, arc);
+               const char *name = mdesc_node_name(hp, n);
 
-                               n_name = mdesc_node_name(hp, n);
-                               if (strcmp(n_name, "cpu"))
-                                       continue;
+               if (!strcmp(srch_val, name))
+                       (*func)(hp, n, val);
 
-                               id = mdesc_get_property(hp, n, "id", NULL);
-                               if (*id < NR_CPUS)
-                                       cpu_data(*id).core_id = core_id;
-                       }
-               }
+               find_back_node_value(hp, n, srch_val, func, val, depth-1);
        }
 }
 
+static void __mark_core_id(struct mdesc_handle *hp, u64 node,
+                          int core_id)
+{
+       const u64 *id = mdesc_get_property(hp, node, "id", NULL);
+
+       if (*id < num_possible_cpus())
+               cpu_data(*id).core_id = core_id;
+}
+
+static void __mark_sock_id(struct mdesc_handle *hp, u64 node,
+                          int sock_id)
+{
+       const u64 *id = mdesc_get_property(hp, node, "id", NULL);
+
+       if (*id < num_possible_cpus())
+               cpu_data(*id).sock_id = sock_id;
+}
+
+static void mark_core_ids(struct mdesc_handle *hp, u64 mp,
+                         int core_id)
+{
+       find_back_node_value(hp, mp, "cpu", __mark_core_id, core_id, 10);
+}
+
+static void mark_sock_ids(struct mdesc_handle *hp, u64 mp,
+                         int sock_id)
+{
+       find_back_node_value(hp, mp, "cpu", __mark_sock_id, sock_id, 10);
+}
+
 static void set_core_ids(struct mdesc_handle *hp)
 {
        int idx;
        u64 mp;
 
        idx = 1;
+
+       /* Identify unique cores by looking for cpus backpointed to by
+        * level 1 instruction caches.
+        */
        mdesc_for_each_node_by_name(hp, mp, "cache") {
                const u64 *level;
                const char *type;
@@ -667,11 +690,72 @@ static void set_core_ids(struct mdesc_handle *hp)
                        continue;
 
                mark_core_ids(hp, mp, idx);
+               idx++;
+       }
+}
+
+static int set_sock_ids_by_cache(struct mdesc_handle *hp, int level)
+{
+       u64 mp;
+       int idx = 1;
+       int fnd = 0;
+
+       /* Identify unique sockets by looking for cpus backpointed to by
+        * shared level n caches.
+        */
+       mdesc_for_each_node_by_name(hp, mp, "cache") {
+               const u64 *cur_lvl;
+
+               cur_lvl = mdesc_get_property(hp, mp, "level", NULL);
+               if (*cur_lvl != level)
+                       continue;
+
+               mark_sock_ids(hp, mp, idx);
+               idx++;
+               fnd = 1;
+       }
+       return fnd;
+}
+
+static void set_sock_ids_by_socket(struct mdesc_handle *hp, u64 mp)
+{
+       int idx = 1;
 
+       mdesc_for_each_node_by_name(hp, mp, "socket") {
+               u64 a;
+
+               mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_FWD) {
+                       u64 t = mdesc_arc_target(hp, a);
+                       const char *name;
+                       const u64 *id;
+
+                       name = mdesc_node_name(hp, t);
+                       if (strcmp(name, "cpu"))
+                               continue;
+
+                       id = mdesc_get_property(hp, t, "id", NULL);
+                       if (*id < num_possible_cpus())
+                               cpu_data(*id).sock_id = idx;
+               }
                idx++;
        }
 }
 
+static void set_sock_ids(struct mdesc_handle *hp)
+{
+       u64 mp;
+
+       /* If machine description exposes sockets data use it.
+        * Otherwise fallback to use shared L3 or L2 caches.
+        */
+       mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "sockets");
+       if (mp != MDESC_NODE_NULL)
+               return set_sock_ids_by_socket(hp, mp);
+
+       if (!set_sock_ids_by_cache(hp, 3))
+               set_sock_ids_by_cache(hp, 2);
+}
+
 static void mark_proc_ids(struct mdesc_handle *hp, u64 mp, int proc_id)
 {
        u64 a;
@@ -707,7 +791,6 @@ static void __set_proc_ids(struct mdesc_handle *hp, const char *exec_unit_name)
                        continue;
 
                mark_proc_ids(hp, mp, idx);
-
                idx++;
        }
 }
@@ -900,6 +983,7 @@ void mdesc_fill_in_cpu_data(cpumask_t *mask)
 
        set_core_ids(hp);
        set_proc_ids(hp);
+       set_sock_ids(hp);
 
        mdesc_release(hp);
 
index 6f7251f..c928bc6 100644 (file)
@@ -1002,6 +1002,38 @@ static int __init pcibios_init(void)
 subsys_initcall(pcibios_init);
 
 #ifdef CONFIG_SYSFS
+
+#define SLOT_NAME_SIZE  11  /* Max decimal digits + null in u32 */
+
+static void pcie_bus_slot_names(struct pci_bus *pbus)
+{
+       struct pci_dev *pdev;
+       struct pci_bus *bus;
+
+       list_for_each_entry(pdev, &pbus->devices, bus_list) {
+               char name[SLOT_NAME_SIZE];
+               struct pci_slot *pci_slot;
+               const u32 *slot_num;
+               int len;
+
+               slot_num = of_get_property(pdev->dev.of_node,
+                                          "physical-slot#", &len);
+
+               if (slot_num == NULL || len != 4)
+                       continue;
+
+               snprintf(name, sizeof(name), "%u", slot_num[0]);
+               pci_slot = pci_create_slot(pbus, slot_num[0], name, NULL);
+
+               if (IS_ERR(pci_slot))
+                       pr_err("PCI: pci_create_slot returned %ld.\n",
+                              PTR_ERR(pci_slot));
+       }
+
+       list_for_each_entry(bus, &pbus->children, node)
+               pcie_bus_slot_names(bus);
+}
+
 static void pci_bus_slot_names(struct device_node *node, struct pci_bus *bus)
 {
        const struct pci_slot_names {
@@ -1053,18 +1085,29 @@ static int __init of_pci_slot_init(void)
 
        while ((pbus = pci_find_next_bus(pbus)) != NULL) {
                struct device_node *node;
+               struct pci_dev *pdev;
+
+               pdev = list_first_entry(&pbus->devices, struct pci_dev,
+                                       bus_list);
 
-               if (pbus->self) {
-                       /* PCI->PCI bridge */
-                       node = pbus->self->dev.of_node;
+               if (pdev && pci_is_pcie(pdev)) {
+                       pcie_bus_slot_names(pbus);
                } else {
-                       struct pci_pbm_info *pbm = pbus->sysdata;
 
-                       /* Host PCI controller */
-                       node = pbm->op->dev.of_node;
-               }
+                       if (pbus->self) {
+
+                               /* PCI->PCI bridge */
+                               node = pbus->self->dev.of_node;
+
+                       } else {
+                               struct pci_pbm_info *pbm = pbus->sysdata;
 
-               pci_bus_slot_names(node, pbus);
+                               /* Host PCI controller */
+                               node = pbm->op->dev.of_node;
+                       }
+
+                       pci_bus_slot_names(node, pbus);
+               }
        }
 
        return 0;
index c38d19f..f7b2617 100644 (file)
@@ -255,6 +255,24 @@ void sun4v_patch_2insn_range(struct sun4v_2insn_patch_entry *start,
        }
 }
 
+void sun_m7_patch_2insn_range(struct sun4v_2insn_patch_entry *start,
+                            struct sun4v_2insn_patch_entry *end)
+{
+       while (start < end) {
+               unsigned long addr = start->addr;
+
+               *(unsigned int *) (addr +  0) = start->insns[0];
+               wmb();
+               __asm__ __volatile__("flush     %0" : : "r" (addr +  0));
+
+               *(unsigned int *) (addr +  4) = start->insns[1];
+               wmb();
+               __asm__ __volatile__("flush     %0" : : "r" (addr +  4));
+
+               start++;
+       }
+}
+
 static void __init sun4v_patch(void)
 {
        extern void sun4v_hvapi_init(void);
@@ -267,6 +285,9 @@ static void __init sun4v_patch(void)
 
        sun4v_patch_2insn_range(&__sun4v_2insn_patch,
                                &__sun4v_2insn_patch_end);
+       if (sun4v_chip_type == SUN4V_CHIP_SPARC_M7)
+               sun_m7_patch_2insn_range(&__sun_m7_2insn_patch,
+                                        &__sun_m7_2insn_patch_end);
 
        sun4v_hvapi_init();
 }
index 61139d9..19cd08d 100644 (file)
@@ -60,8 +60,12 @@ DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE;
 cpumask_t cpu_core_map[NR_CPUS] __read_mostly =
        { [0 ... NR_CPUS-1] = CPU_MASK_NONE };
 
+cpumask_t cpu_core_sib_map[NR_CPUS] __read_mostly = {
+       [0 ... NR_CPUS-1] = CPU_MASK_NONE };
+
 EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
 EXPORT_SYMBOL(cpu_core_map);
+EXPORT_SYMBOL(cpu_core_sib_map);
 
 static cpumask_t smp_commenced_mask;
 
@@ -1243,6 +1247,15 @@ void smp_fill_in_sib_core_maps(void)
                }
        }
 
+       for_each_present_cpu(i)  {
+               unsigned int j;
+
+               for_each_present_cpu(j)  {
+                       if (cpu_data(i).sock_id == cpu_data(j).sock_id)
+                               cpumask_set_cpu(j, &cpu_core_sib_map[i]);
+               }
+       }
+
        for_each_present_cpu(i) {
                unsigned int j;
 
index 0924305..f1a2f68 100644 (file)
@@ -138,6 +138,11 @@ SECTIONS
                *(.pause_3insn_patch)
                __pause_3insn_patch_end = .;
        }
+       .sun_m7_2insn_patch : {
+               __sun_m7_2insn_patch = .;
+               *(.sun_m7_2insn_patch)
+               __sun_m7_2insn_patch_end = .;
+       }
        PERCPU_SECTION(SMP_CACHE_BYTES)
 
        . = ALIGN(PAGE_SIZE);
index 70d8171..c399e7b 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/perf_event.h>
 #include <linux/interrupt.h>
 #include <linux/kdebug.h>
+#include <linux/uaccess.h>
 
 #include <asm/page.h>
 #include <asm/pgtable.h>
@@ -29,7 +30,6 @@
 #include <asm/setup.h>
 #include <asm/smp.h>
 #include <asm/traps.h>
-#include <asm/uaccess.h>
 
 #include "mm_32.h"
 
@@ -196,7 +196,7 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write,
         * If we're in an interrupt or have no user
         * context, we must not take the fault..
         */
-       if (in_atomic() || !mm)
+       if (pagefault_disabled() || !mm)
                goto no_context;
 
        perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
index 4798232..e9268ea 100644 (file)
 #include <linux/kdebug.h>
 #include <linux/percpu.h>
 #include <linux/context_tracking.h>
+#include <linux/uaccess.h>
 
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/openprom.h>
 #include <asm/oplib.h>
-#include <asm/uaccess.h>
 #include <asm/asi.h>
 #include <asm/lsu.h>
 #include <asm/sections.h>
@@ -330,7 +330,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)
         * If we're in an interrupt or have no user
         * context, we must not take the fault..
         */
-       if (in_atomic() || !mm)
+       if (faulthandler_disabled() || !mm)
                goto intr_or_no_mm;
 
        perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
index 449f864..a454ec5 100644 (file)
@@ -53,7 +53,7 @@ void *kmap_atomic(struct page *page)
        unsigned long vaddr;
        long idx, type;
 
-       /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
+       preempt_disable();
        pagefault_disable();
        if (!PageHighMem(page))
                return page_address(page);
@@ -91,6 +91,7 @@ void __kunmap_atomic(void *kvaddr)
 
        if (vaddr < FIXADDR_START) { // FIXME
                pagefault_enable();
+               preempt_enable();
                return;
        }
 
@@ -126,5 +127,6 @@ void __kunmap_atomic(void *kvaddr)
 
        kmap_atomic_idx_pop();
        pagefault_enable();
+       preempt_enable();
 }
 EXPORT_SYMBOL(__kunmap_atomic);
index 4ca0d6b..c5d08b8 100644 (file)
@@ -54,6 +54,7 @@
 #include "init_64.h"
 
 unsigned long kern_linear_pte_xor[4] __read_mostly;
+static unsigned long page_cache4v_flag;
 
 /* A bitmap, two bits for every 256MB of physical memory.  These two
  * bits determine what page size we use for kernel linear
@@ -1909,11 +1910,24 @@ static void __init sun4u_linear_pte_xor_finalize(void)
 
 static void __init sun4v_linear_pte_xor_finalize(void)
 {
+       unsigned long pagecv_flag;
+
+       /* Bit 9 of TTE is no longer CV bit on M7 processor and it instead
+        * enables MCD error. Do not set bit 9 on M7 processor.
+        */
+       switch (sun4v_chip_type) {
+       case SUN4V_CHIP_SPARC_M7:
+               pagecv_flag = 0x00;
+               break;
+       default:
+               pagecv_flag = _PAGE_CV_4V;
+               break;
+       }
 #ifndef CONFIG_DEBUG_PAGEALLOC
        if (cpu_pgsz_mask & HV_PGSZ_MASK_256MB) {
                kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZ256MB_4V) ^
                        PAGE_OFFSET;
-               kern_linear_pte_xor[1] |= (_PAGE_CP_4V | _PAGE_CV_4V |
+               kern_linear_pte_xor[1] |= (_PAGE_CP_4V | pagecv_flag |
                                           _PAGE_P_4V | _PAGE_W_4V);
        } else {
                kern_linear_pte_xor[1] = kern_linear_pte_xor[0];
@@ -1922,7 +1936,7 @@ static void __init sun4v_linear_pte_xor_finalize(void)
        if (cpu_pgsz_mask & HV_PGSZ_MASK_2GB) {
                kern_linear_pte_xor[2] = (_PAGE_VALID | _PAGE_SZ2GB_4V) ^
                        PAGE_OFFSET;
-               kern_linear_pte_xor[2] |= (_PAGE_CP_4V | _PAGE_CV_4V |
+               kern_linear_pte_xor[2] |= (_PAGE_CP_4V | pagecv_flag |
                                           _PAGE_P_4V | _PAGE_W_4V);
        } else {
                kern_linear_pte_xor[2] = kern_linear_pte_xor[1];
@@ -1931,7 +1945,7 @@ static void __init sun4v_linear_pte_xor_finalize(void)
        if (cpu_pgsz_mask & HV_PGSZ_MASK_16GB) {
                kern_linear_pte_xor[3] = (_PAGE_VALID | _PAGE_SZ16GB_4V) ^
                        PAGE_OFFSET;
-               kern_linear_pte_xor[3] |= (_PAGE_CP_4V | _PAGE_CV_4V |
+               kern_linear_pte_xor[3] |= (_PAGE_CP_4V | pagecv_flag |
                                           _PAGE_P_4V | _PAGE_W_4V);
        } else {
                kern_linear_pte_xor[3] = kern_linear_pte_xor[2];
@@ -1958,6 +1972,13 @@ static phys_addr_t __init available_memory(void)
        return available;
 }
 
+#define _PAGE_CACHE_4U (_PAGE_CP_4U | _PAGE_CV_4U)
+#define _PAGE_CACHE_4V (_PAGE_CP_4V | _PAGE_CV_4V)
+#define __DIRTY_BITS_4U         (_PAGE_MODIFIED_4U | _PAGE_WRITE_4U | _PAGE_W_4U)
+#define __DIRTY_BITS_4V         (_PAGE_MODIFIED_4V | _PAGE_WRITE_4V | _PAGE_W_4V)
+#define __ACCESS_BITS_4U (_PAGE_ACCESSED_4U | _PAGE_READ_4U | _PAGE_R)
+#define __ACCESS_BITS_4V (_PAGE_ACCESSED_4V | _PAGE_READ_4V | _PAGE_R)
+
 /* We need to exclude reserved regions. This exclusion will include
  * vmlinux and initrd. To be more precise the initrd size could be used to
  * compute a new lower limit because it is freed later during initialization.
@@ -2034,6 +2055,25 @@ void __init paging_init(void)
        memset(swapper_4m_tsb, 0x40, sizeof(swapper_4m_tsb));
 #endif
 
+       /* TTE.cv bit on sparc v9 occupies the same position as TTE.mcde
+        * bit on M7 processor. This is a conflicting usage of the same
+        * bit. Enabling TTE.cv on M7 would turn on Memory Corruption
+        * Detection error on all pages and this will lead to problems
+        * later. Kernel does not run with MCD enabled and hence rest
+        * of the required steps to fully configure memory corruption
+        * detection are not taken. We need to ensure TTE.mcde is not
+        * set on M7 processor. Compute the value of cacheability
+        * flag for use later taking this into consideration.
+        */
+       switch (sun4v_chip_type) {
+       case SUN4V_CHIP_SPARC_M7:
+               page_cache4v_flag = _PAGE_CP_4V;
+               break;
+       default:
+               page_cache4v_flag = _PAGE_CACHE_4V;
+               break;
+       }
+
        if (tlb_type == hypervisor)
                sun4v_pgprot_init();
        else
@@ -2274,13 +2314,6 @@ void free_initrd_mem(unsigned long start, unsigned long end)
 }
 #endif
 
-#define _PAGE_CACHE_4U (_PAGE_CP_4U | _PAGE_CV_4U)
-#define _PAGE_CACHE_4V (_PAGE_CP_4V | _PAGE_CV_4V)
-#define __DIRTY_BITS_4U         (_PAGE_MODIFIED_4U | _PAGE_WRITE_4U | _PAGE_W_4U)
-#define __DIRTY_BITS_4V         (_PAGE_MODIFIED_4V | _PAGE_WRITE_4V | _PAGE_W_4V)
-#define __ACCESS_BITS_4U (_PAGE_ACCESSED_4U | _PAGE_READ_4U | _PAGE_R)
-#define __ACCESS_BITS_4V (_PAGE_ACCESSED_4V | _PAGE_READ_4V | _PAGE_R)
-
 pgprot_t PAGE_KERNEL __read_mostly;
 EXPORT_SYMBOL(PAGE_KERNEL);
 
@@ -2312,8 +2345,7 @@ int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend,
                    _PAGE_P_4U | _PAGE_W_4U);
        if (tlb_type == hypervisor)
                pte_base = (_PAGE_VALID | _PAGE_SZ4MB_4V |
-                           _PAGE_CP_4V | _PAGE_CV_4V |
-                           _PAGE_P_4V | _PAGE_W_4V);
+                           page_cache4v_flag | _PAGE_P_4V | _PAGE_W_4V);
 
        pte_base |= _PAGE_PMD_HUGE;
 
@@ -2450,14 +2482,14 @@ static void __init sun4v_pgprot_init(void)
        int i;
 
        PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4V | _PAGE_VALID |
-                               _PAGE_CACHE_4V | _PAGE_P_4V |
+                               page_cache4v_flag | _PAGE_P_4V |
                                __ACCESS_BITS_4V | __DIRTY_BITS_4V |
                                _PAGE_EXEC_4V);
        PAGE_KERNEL_LOCKED = PAGE_KERNEL;
 
        _PAGE_IE = _PAGE_IE_4V;
        _PAGE_E = _PAGE_E_4V;
-       _PAGE_CACHE = _PAGE_CACHE_4V;
+       _PAGE_CACHE = page_cache4v_flag;
 
 #ifdef CONFIG_DEBUG_PAGEALLOC
        kern_linear_pte_xor[0] = _PAGE_VALID ^ PAGE_OFFSET;
@@ -2465,8 +2497,8 @@ static void __init sun4v_pgprot_init(void)
        kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4V) ^
                PAGE_OFFSET;
 #endif
-       kern_linear_pte_xor[0] |= (_PAGE_CP_4V | _PAGE_CV_4V |
-                                  _PAGE_P_4V | _PAGE_W_4V);
+       kern_linear_pte_xor[0] |= (page_cache4v_flag | _PAGE_P_4V |
+                                  _PAGE_W_4V);
 
        for (i = 1; i < 4; i++)
                kern_linear_pte_xor[i] = kern_linear_pte_xor[0];
@@ -2479,12 +2511,12 @@ static void __init sun4v_pgprot_init(void)
                             _PAGE_SZ4MB_4V | _PAGE_SZ512K_4V |
                             _PAGE_SZ64K_4V | _PAGE_SZ8K_4V);
 
-       page_none = _PAGE_PRESENT_4V | _PAGE_ACCESSED_4V | _PAGE_CACHE_4V;
-       page_shared = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V |
+       page_none = _PAGE_PRESENT_4V | _PAGE_ACCESSED_4V | page_cache4v_flag;
+       page_shared = (_PAGE_VALID | _PAGE_PRESENT_4V | page_cache4v_flag |
                       __ACCESS_BITS_4V | _PAGE_WRITE_4V | _PAGE_EXEC_4V);
-       page_copy   = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V |
+       page_copy   = (_PAGE_VALID | _PAGE_PRESENT_4V | page_cache4v_flag |
                       __ACCESS_BITS_4V | _PAGE_EXEC_4V);
-       page_readonly = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V |
+       page_readonly = (_PAGE_VALID | _PAGE_PRESENT_4V | page_cache4v_flag |
                         __ACCESS_BITS_4V | _PAGE_EXEC_4V);
 
        page_exec_bit = _PAGE_EXEC_4V;
@@ -2542,7 +2574,7 @@ static unsigned long kern_large_tte(unsigned long paddr)
               _PAGE_EXEC_4U | _PAGE_L_4U | _PAGE_W_4U);
        if (tlb_type == hypervisor)
                val = (_PAGE_VALID | _PAGE_SZ4MB_4V |
-                      _PAGE_CP_4V | _PAGE_CV_4V | _PAGE_P_4V |
+                      page_cache4v_flag | _PAGE_P_4V |
                       _PAGE_EXEC_4V | _PAGE_W_4V);
 
        return val | paddr;
@@ -2706,7 +2738,7 @@ void hugetlb_setup(struct pt_regs *regs)
        struct mm_struct *mm = current->mm;
        struct tsb_config *tp;
 
-       if (in_atomic() || !mm) {
+       if (faulthandler_disabled() || !mm) {
                const struct exception_table_entry *entry;
 
                entry = search_exception_tables(regs->tpc);
index 7b11c5f..0496970 100644 (file)
@@ -105,9 +105,6 @@ static inline long atomic64_add_unless(atomic64_t *v, long a, long u)
 
 #define atomic64_inc_not_zero(v)       atomic64_add_unless((v), 1, 0)
 
-/* Define this to indicate that cmpxchg is an efficient operation. */
-#define __HAVE_ARCH_CMPXCHG
-
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _ASM_TILE_ATOMIC_64_H */
index 9383118..76b0d0e 100644 (file)
@@ -55,7 +55,7 @@ static inline const struct cpumask *cpumask_of_node(int node)
 #define topology_physical_package_id(cpu)       ((void)(cpu), 0)
 #define topology_core_id(cpu)                   (cpu)
 #define topology_core_cpumask(cpu)              ((void)(cpu), cpu_online_mask)
-#define topology_thread_cpumask(cpu)            cpumask_of(cpu)
+#define topology_sibling_cpumask(cpu)           cpumask_of(cpu)
 #endif
 
 #endif /* _ASM_TILE_TOPOLOGY_H */
index f41cb53..a33276b 100644 (file)
@@ -78,7 +78,8 @@ int __range_ok(unsigned long addr, unsigned long size);
  * @addr: User space pointer to start of block to check
  * @size: Size of block to check
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Checks if a pointer to a block of memory in user space is valid.
  *
@@ -192,7 +193,8 @@ extern int __get_user_bad(void)
  * @x:   Variable to store result.
  * @ptr: Source address, in user space.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple variable from user space to kernel
  * space.  It supports simple types like char and int, but not larger
@@ -274,7 +276,8 @@ extern int __put_user_bad(void)
  * @x:   Value to copy to user space.
  * @ptr: Destination address, in user space.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple value from kernel space to user
  * space.  It supports simple types like char and int, but not larger
@@ -330,7 +333,8 @@ extern int __put_user_bad(void)
  * @from: Source address, in kernel space.
  * @n:    Number of bytes to copy.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Copy data from kernel space to user space.  Caller must check
  * the specified block with access_ok() before calling this function.
@@ -366,7 +370,8 @@ copy_to_user(void __user *to, const void *from, unsigned long n)
  * @from: Source address, in user space.
  * @n:    Number of bytes to copy.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Copy data from user space to kernel space.  Caller must check
  * the specified block with access_ok() before calling this function.
@@ -437,7 +442,8 @@ static inline unsigned long __must_check copy_from_user(void *to,
  * @from: Source address, in user space.
  * @n:    Number of bytes to copy.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Copy data from user space to user space.  Caller must check
  * the specified blocks with access_ok() before calling this function.
index e83cc99..3f4f58d 100644 (file)
@@ -354,9 +354,9 @@ static int handle_page_fault(struct pt_regs *regs,
 
        /*
         * If we're in an interrupt, have no user context or are running in an
-        * atomic region then we must not take the fault.
+        * region with pagefaults disabled then we must not take the fault.
         */
-       if (in_atomic() || !mm) {
+       if (pagefault_disabled() || !mm) {
                vma = NULL;  /* happy compiler */
                goto bad_area_nosemaphore;
        }
index 6aa2f26..fcd5450 100644 (file)
@@ -201,7 +201,7 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot)
        int idx, type;
        pte_t *pte;
 
-       /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
+       preempt_disable();
        pagefault_disable();
 
        /* Avoid icache flushes by disallowing atomic executable mappings. */
@@ -259,6 +259,7 @@ void __kunmap_atomic(void *kvaddr)
        }
 
        pagefault_enable();
+       preempt_enable();
 }
 EXPORT_SYMBOL(__kunmap_atomic);
 
index 8e4daf4..47ff9b7 100644 (file)
@@ -7,6 +7,7 @@
 #include <linux/sched.h>
 #include <linux/hardirq.h>
 #include <linux/module.h>
+#include <linux/uaccess.h>
 #include <asm/current.h>
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
@@ -35,10 +36,10 @@ int handle_page_fault(unsigned long address, unsigned long ip,
        *code_out = SEGV_MAPERR;
 
        /*
-        * If the fault was during atomic operation, don't take the fault, just
+        * If the fault was with pagefaults disabled, don't take the fault, just
         * fail.
         */
-       if (in_atomic())
+       if (faulthandler_disabled())
                goto out_nosemaphore;
 
        if (is_user)
index 0dc922d..afccef5 100644 (file)
@@ -218,7 +218,7 @@ static int do_pf(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
         * If we're in an interrupt or have no user
         * context, we must not take the fault..
         */
-       if (in_atomic() || !mm)
+       if (faulthandler_disabled() || !mm)
                goto no_context;
 
        if (user_mode(regs))
index 226d569..4e986e8 100644 (file)
@@ -127,7 +127,8 @@ config X86
        select MODULES_USE_ELF_RELA if X86_64
        select CLONE_BACKWARDS if X86_32
        select ARCH_USE_BUILTIN_BSWAP
-       select ARCH_USE_QUEUE_RWLOCK
+       select ARCH_USE_QUEUED_SPINLOCKS
+       select ARCH_USE_QUEUED_RWLOCKS
        select OLD_SIGSUSPEND3 if X86_32 || IA32_EMULATION
        select OLD_SIGACTION if X86_32
        select COMPAT_OLD_SIGACTION if IA32_EMULATION
@@ -666,7 +667,7 @@ config PARAVIRT_DEBUG
 config PARAVIRT_SPINLOCKS
        bool "Paravirtualization layer for spinlocks"
        depends on PARAVIRT && SMP
-       select UNINLINE_SPIN_UNLOCK
+       select UNINLINE_SPIN_UNLOCK if !QUEUED_SPINLOCKS
        ---help---
          Paravirtualized spinlocks allow a pvops backend to replace the
          spinlock implementation with something virtualization-friendly
index ef17683..48304b8 100644 (file)
@@ -1109,6 +1109,8 @@ struct boot_params *make_boot_params(struct efi_config *c)
        if (!cmdline_ptr)
                goto fail;
        hdr->cmd_line_ptr = (unsigned long)cmdline_ptr;
+       /* Fill in upper bits of command line address, NOP on 32 bit  */
+       boot_params->ext_cmd_line_ptr = (u64)(unsigned long)cmdline_ptr >> 32;
 
        hdr->ramdisk_image = 0;
        hdr->ramdisk_size = 0;
index 89dd0d7..805d25c 100644 (file)
@@ -2,15 +2,14 @@
 #define BOOT_COMPRESSED_MISC_H
 
 /*
- * we have to be careful, because no indirections are allowed here, and
- * paravirt_ops is a kind of one. As it will only run in baremetal anyway,
- * we just keep it from happening
+ * Special hack: we have to be careful, because no indirections are allowed here,
+ * and paravirt_ops is a kind of one. As it will only run in baremetal anyway,
+ * we just keep it from happening. (This list needs to be extended when new
+ * paravirt and debugging variants are added.)
  */
 #undef CONFIG_PARAVIRT
+#undef CONFIG_PARAVIRT_SPINLOCKS
 #undef CONFIG_KASAN
-#ifdef CONFIG_X86_32
-#define _ASM_X86_DESC_H 1
-#endif
 
 #include <linux/linkage.h>
 #include <linux/screen_info.h>
index 959e45b..e51a8f8 100644 (file)
 #define smp_mb()       mb()
 #define smp_rmb()      dma_rmb()
 #define smp_wmb()      barrier()
-#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
+#define smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)
 #else /* !SMP */
 #define smp_mb()       barrier()
 #define smp_rmb()      barrier()
 #define smp_wmb()      barrier()
-#define set_mb(var, value) do { var = value; barrier(); } while (0)
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); barrier(); } while (0)
 #endif /* SMP */
 
 #define read_barrier_depends()         do { } while (0)
index 99c105d..ad19841 100644 (file)
@@ -4,8 +4,6 @@
 #include <linux/compiler.h>
 #include <asm/alternative.h> /* Provides LOCK_PREFIX */
 
-#define __HAVE_ARCH_CMPXCHG 1
-
 /*
  * Non-existant functions to indicate usage errors at link time
  * (or compile-time if the compiler implements __compiletime_error().
index 808dae6..1f5b728 100644 (file)
@@ -127,50 +127,14 @@ static inline gfp_t dma_alloc_coherent_gfp_flags(struct device *dev, gfp_t gfp)
 
 #define dma_alloc_coherent(d,s,h,f)    dma_alloc_attrs(d,s,h,f,NULL)
 
-static inline void *
+void *
 dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
-               gfp_t gfp, struct dma_attrs *attrs)
-{
-       struct dma_map_ops *ops = get_dma_ops(dev);
-       void *memory;
-
-       gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
-
-       if (dma_alloc_from_coherent(dev, size, dma_handle, &memory))
-               return memory;
-
-       if (!dev)
-               dev = &x86_dma_fallback_dev;
-
-       if (!is_device_dma_capable(dev))
-               return NULL;
-
-       if (!ops->alloc)
-               return NULL;
-
-       memory = ops->alloc(dev, size, dma_handle,
-                           dma_alloc_coherent_gfp_flags(dev, gfp), attrs);
-       debug_dma_alloc_coherent(dev, size, *dma_handle, memory);
-
-       return memory;
-}
+               gfp_t gfp, struct dma_attrs *attrs);
 
 #define dma_free_coherent(d,s,c,h) dma_free_attrs(d,s,c,h,NULL)
 
-static inline void dma_free_attrs(struct device *dev, size_t size,
-                                 void *vaddr, dma_addr_t bus,
-                                 struct dma_attrs *attrs)
-{
-       struct dma_map_ops *ops = get_dma_ops(dev);
-
-       WARN_ON(irqs_disabled());       /* for portability */
-
-       if (dma_release_from_coherent(dev, get_order(size), vaddr))
-               return;
-
-       debug_dma_free_coherent(dev, size, vaddr, bus);
-       if (ops->free)
-               ops->free(dev, size, vaddr, bus, attrs);
-}
+void dma_free_attrs(struct device *dev, size_t size,
+                   void *vaddr, dma_addr_t bus,
+                   struct dma_attrs *attrs);
 
 #endif
index e42f758..055ea99 100644 (file)
@@ -50,7 +50,7 @@ extern const struct hypervisor_x86 *x86_hyper;
 /* Recognized hypervisors */
 extern const struct hypervisor_x86 x86_hyper_vmware;
 extern const struct hypervisor_x86 x86_hyper_ms_hyperv;
-extern const struct hypervisor_x86 x86_hyper_xen_hvm;
+extern const struct hypervisor_x86 x86_hyper_xen;
 extern const struct hypervisor_x86 x86_hyper_kvm;
 
 extern void init_hypervisor(struct cpuinfo_x86 *c);
index dea2e7e..f4a555b 100644 (file)
@@ -207,6 +207,7 @@ union kvm_mmu_page_role {
                unsigned nxe:1;
                unsigned cr0_wp:1;
                unsigned smep_andnot_wp:1;
+               unsigned smap_andnot_wp:1;
        };
 };
 
@@ -400,6 +401,7 @@ struct kvm_vcpu_arch {
        struct kvm_mmu_memory_cache mmu_page_header_cache;
 
        struct fpu guest_fpu;
+       bool eager_fpu;
        u64 xcr0;
        u64 guest_supported_xcr0;
        u32 guest_xstate_size;
@@ -743,6 +745,7 @@ struct kvm_x86_ops {
        void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
        unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
        void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
+       void (*fpu_activate)(struct kvm_vcpu *vcpu);
        void (*fpu_deactivate)(struct kvm_vcpu *vcpu);
 
        void (*tlb_flush)(struct kvm_vcpu *vcpu);
index 8957810..d143bfa 100644 (file)
@@ -712,6 +712,31 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
 
 #if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS)
 
+#ifdef CONFIG_QUEUED_SPINLOCKS
+
+static __always_inline void pv_queued_spin_lock_slowpath(struct qspinlock *lock,
+                                                       u32 val)
+{
+       PVOP_VCALL2(pv_lock_ops.queued_spin_lock_slowpath, lock, val);
+}
+
+static __always_inline void pv_queued_spin_unlock(struct qspinlock *lock)
+{
+       PVOP_VCALLEE1(pv_lock_ops.queued_spin_unlock, lock);
+}
+
+static __always_inline void pv_wait(u8 *ptr, u8 val)
+{
+       PVOP_VCALL2(pv_lock_ops.wait, ptr, val);
+}
+
+static __always_inline void pv_kick(int cpu)
+{
+       PVOP_VCALL1(pv_lock_ops.kick, cpu);
+}
+
+#else /* !CONFIG_QUEUED_SPINLOCKS */
+
 static __always_inline void __ticket_lock_spinning(struct arch_spinlock *lock,
                                                        __ticket_t ticket)
 {
@@ -724,7 +749,9 @@ static __always_inline void __ticket_unlock_kick(struct arch_spinlock *lock,
        PVOP_VCALL2(pv_lock_ops.unlock_kick, lock, ticket);
 }
 
-#endif
+#endif /* CONFIG_QUEUED_SPINLOCKS */
+
+#endif /* SMP && PARAVIRT_SPINLOCKS */
 
 #ifdef CONFIG_X86_32
 #define PV_SAVE_REGS "pushl %ecx; pushl %edx;"
index f7b0b5c..8766c7c 100644 (file)
@@ -333,9 +333,19 @@ struct arch_spinlock;
 typedef u16 __ticket_t;
 #endif
 
+struct qspinlock;
+
 struct pv_lock_ops {
+#ifdef CONFIG_QUEUED_SPINLOCKS
+       void (*queued_spin_lock_slowpath)(struct qspinlock *lock, u32 val);
+       struct paravirt_callee_save queued_spin_unlock;
+
+       void (*wait)(u8 *ptr, u8 val);
+       void (*kick)(int cpu);
+#else /* !CONFIG_QUEUED_SPINLOCKS */
        struct paravirt_callee_save lock_spinning;
        void (*unlock_kick)(struct arch_spinlock *lock, __ticket_t ticket);
+#endif /* !CONFIG_QUEUED_SPINLOCKS */
 };
 
 /* This contains all the paravirt structures: we get a convenient
index 8f32718..dca7171 100644 (file)
@@ -99,11 +99,9 @@ static __always_inline bool should_resched(void)
   extern asmlinkage void ___preempt_schedule(void);
 # define __preempt_schedule() asm ("call ___preempt_schedule")
   extern asmlinkage void preempt_schedule(void);
-# ifdef CONFIG_CONTEXT_TRACKING
-    extern asmlinkage void ___preempt_schedule_context(void);
-#   define __preempt_schedule_context() asm ("call ___preempt_schedule_context")
-    extern asmlinkage void preempt_schedule_context(void);
-# endif
+  extern asmlinkage void ___preempt_schedule_notrace(void);
+# define __preempt_schedule_notrace() asm ("call ___preempt_schedule_notrace")
+  extern asmlinkage void preempt_schedule_notrace(void);
 #endif
 
 #endif /* __ASM_PREEMPT_H */
index 19507ff..5fabf13 100644 (file)
@@ -107,7 +107,7 @@ static inline unsigned long regs_return_value(struct pt_regs *regs)
 static inline int user_mode(struct pt_regs *regs)
 {
 #ifdef CONFIG_X86_32
-       return (regs->cs & SEGMENT_RPL_MASK) == USER_RPL;
+       return ((regs->cs & SEGMENT_RPL_MASK) | (regs->flags & X86_VM_MASK)) >= USER_RPL;
 #else
        return !!(regs->cs & 3);
 #endif
diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h
new file mode 100644 (file)
index 0000000..9d51fae
--- /dev/null
@@ -0,0 +1,57 @@
+#ifndef _ASM_X86_QSPINLOCK_H
+#define _ASM_X86_QSPINLOCK_H
+
+#include <asm/cpufeature.h>
+#include <asm-generic/qspinlock_types.h>
+#include <asm/paravirt.h>
+
+#define        queued_spin_unlock queued_spin_unlock
+/**
+ * queued_spin_unlock - release a queued spinlock
+ * @lock : Pointer to queued spinlock structure
+ *
+ * A smp_store_release() on the least-significant byte.
+ */
+static inline void native_queued_spin_unlock(struct qspinlock *lock)
+{
+       smp_store_release((u8 *)lock, 0);
+}
+
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
+extern void __pv_init_lock_hash(void);
+extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
+extern void __raw_callee_save___pv_queued_spin_unlock(struct qspinlock *lock);
+
+static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
+{
+       pv_queued_spin_lock_slowpath(lock, val);
+}
+
+static inline void queued_spin_unlock(struct qspinlock *lock)
+{
+       pv_queued_spin_unlock(lock);
+}
+#else
+static inline void queued_spin_unlock(struct qspinlock *lock)
+{
+       native_queued_spin_unlock(lock);
+}
+#endif
+
+#define virt_queued_spin_lock virt_queued_spin_lock
+
+static inline bool virt_queued_spin_lock(struct qspinlock *lock)
+{
+       if (!static_cpu_has(X86_FEATURE_HYPERVISOR))
+               return false;
+
+       while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0)
+               cpu_relax();
+
+       return true;
+}
+
+#include <asm-generic/qspinlock.h>
+
+#endif /* _ASM_X86_QSPINLOCK_H */
diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h
new file mode 100644 (file)
index 0000000..b002e71
--- /dev/null
@@ -0,0 +1,6 @@
+#ifndef __ASM_QSPINLOCK_PARAVIRT_H
+#define __ASM_QSPINLOCK_PARAVIRT_H
+
+PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock);
+
+#endif
index 5a9856e..7d5a192 100644 (file)
 #define TLS_SIZE                       (GDT_ENTRY_TLS_ENTRIES* 8)
 
 #ifdef __KERNEL__
+
+/*
+ * early_idt_handler_array is an array of entry points referenced in the
+ * early IDT.  For simplicity, it's a real array with one entry point
+ * every nine bytes.  That leaves room for an optional 'push $0' if the
+ * vector has no error code (two bytes), a 'push $vector_number' (two
+ * bytes), and a jump to the common entry code (up to five bytes).
+ */
+#define EARLY_IDT_HANDLER_SIZE 9
+
 #ifndef __ASSEMBLY__
 
-extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][2+2+5];
+extern const char early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDLER_SIZE];
 #ifdef CONFIG_TRACING
-# define trace_early_idt_handlers early_idt_handlers
+# define trace_early_idt_handler_array early_idt_handler_array
 #endif
 
 /*
index 17a8dce..222a6a3 100644 (file)
@@ -37,16 +37,6 @@ DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
 DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id);
 DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number);
 
-static inline struct cpumask *cpu_sibling_mask(int cpu)
-{
-       return per_cpu(cpu_sibling_map, cpu);
-}
-
-static inline struct cpumask *cpu_core_mask(int cpu)
-{
-       return per_cpu(cpu_core_map, cpu);
-}
-
 static inline struct cpumask *cpu_llc_shared_mask(int cpu)
 {
        return per_cpu(cpu_llc_shared_map, cpu);
index cf87de3..be0a059 100644 (file)
 extern struct static_key paravirt_ticketlocks_enabled;
 static __always_inline bool static_key_false(struct static_key *key);
 
+#ifdef CONFIG_QUEUED_SPINLOCKS
+#include <asm/qspinlock.h>
+#else
+
 #ifdef CONFIG_PARAVIRT_SPINLOCKS
 
 static inline void __ticket_enter_slowpath(arch_spinlock_t *lock)
@@ -169,7 +173,7 @@ static inline int arch_spin_is_contended(arch_spinlock_t *lock)
        struct __raw_tickets tmp = READ_ONCE(lock->tickets);
 
        tmp.head &= ~TICKET_SLOWPATH_FLAG;
-       return (tmp.tail - tmp.head) > TICKET_LOCK_INC;
+       return (__ticket_t)(tmp.tail - tmp.head) > TICKET_LOCK_INC;
 }
 #define arch_spin_is_contended arch_spin_is_contended
 
@@ -196,6 +200,7 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
                cpu_relax();
        }
 }
+#endif /* CONFIG_QUEUED_SPINLOCKS */
 
 /*
  * Read-write spinlocks, allowing multiple readers
index 5f9d757..65c3e37 100644 (file)
@@ -23,6 +23,9 @@ typedef u32 __ticketpair_t;
 
 #define TICKET_SHIFT   (sizeof(__ticket_t) * 8)
 
+#ifdef CONFIG_QUEUED_SPINLOCKS
+#include <asm-generic/qspinlock_types.h>
+#else
 typedef struct arch_spinlock {
        union {
                __ticketpair_t head_tail;
@@ -33,6 +36,7 @@ typedef struct arch_spinlock {
 } arch_spinlock_t;
 
 #define __ARCH_SPIN_LOCK_UNLOCKED      { { 0 } }
+#endif /* CONFIG_QUEUED_SPINLOCKS */
 
 #include <asm-generic/qrwlock_types.h>
 
index 0e8f04f..5a77593 100644 (file)
@@ -124,7 +124,7 @@ extern const struct cpumask *cpu_coregroup_mask(int cpu);
 
 #ifdef ENABLE_TOPO_DEFINES
 #define topology_core_cpumask(cpu)             (per_cpu(cpu_core_map, cpu))
-#define topology_thread_cpumask(cpu)           (per_cpu(cpu_sibling_map, cpu))
+#define topology_sibling_cpumask(cpu)          (per_cpu(cpu_sibling_map, cpu))
 #endif
 
 static inline void arch_fix_phys_package_id(int num, u32 slot)
index ace9dec..a8df874 100644 (file)
@@ -74,7 +74,8 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un
  * @addr: User space pointer to start of block to check
  * @size: Size of block to check
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Checks if a pointer to a block of memory in user space is valid.
  *
@@ -145,7 +146,8 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
  * @x:   Variable to store result.
  * @ptr: Source address, in user space.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple variable from user space to kernel
  * space.  It supports simple types like char and int, but not larger
@@ -240,7 +242,8 @@ extern void __put_user_8(void);
  * @x:   Value to copy to user space.
  * @ptr: Destination address, in user space.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple value from kernel space to user
  * space.  It supports simple types like char and int, but not larger
@@ -455,7 +458,8 @@ struct __large_struct { unsigned long buf[100]; };
  * @x:   Variable to store result.
  * @ptr: Source address, in user space.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple variable from user space to kernel
  * space.  It supports simple types like char and int, but not larger
@@ -479,7 +483,8 @@ struct __large_struct { unsigned long buf[100]; };
  * @x:   Value to copy to user space.
  * @ptr: Destination address, in user space.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple value from kernel space to user
  * space.  It supports simple types like char and int, but not larger
index 3c03a5d..7c8ad34 100644 (file)
@@ -70,7 +70,8 @@ __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
  * @from: Source address, in kernel space.
  * @n:    Number of bytes to copy.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Copy data from kernel space to user space.  Caller must check
  * the specified block with access_ok() before calling this function.
@@ -117,7 +118,8 @@ __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
  * @from: Source address, in user space.
  * @n:    Number of bytes to copy.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Copy data from user space to kernel space.  Caller must check
  * the specified block with access_ok() before calling this function.
index 358dcd3..c44a5d5 100644 (file)
@@ -269,4 +269,9 @@ static inline bool xen_arch_need_swiotlb(struct device *dev,
        return false;
 }
 
+static inline unsigned long xen_get_swiotlb_free_pages(unsigned int order)
+{
+       return __get_free_pages(__GFP_NOWARN, order);
+}
+
 #endif /* _ASM_X86_XEN_PAGE_H */
index c469490..3c6bb34 100644 (file)
 #define MSR_CORE_C3_RESIDENCY          0x000003fc
 #define MSR_CORE_C6_RESIDENCY          0x000003fd
 #define MSR_CORE_C7_RESIDENCY          0x000003fe
+#define MSR_KNL_CORE_C6_RESIDENCY      0x000003ff
 #define MSR_PKG_C2_RESIDENCY           0x0000060d
 #define MSR_PKG_C8_RESIDENCY           0x00000630
 #define MSR_PKG_C9_RESIDENCY           0x00000631
index 9f6b934..b27f6ec 100644 (file)
@@ -41,6 +41,25 @@ void common(void) {
        OFFSET(pbe_orig_address, pbe, orig_address);
        OFFSET(pbe_next, pbe, next);
 
+#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
+       BLANK();
+       OFFSET(IA32_SIGCONTEXT_ax, sigcontext_ia32, ax);
+       OFFSET(IA32_SIGCONTEXT_bx, sigcontext_ia32, bx);
+       OFFSET(IA32_SIGCONTEXT_cx, sigcontext_ia32, cx);
+       OFFSET(IA32_SIGCONTEXT_dx, sigcontext_ia32, dx);
+       OFFSET(IA32_SIGCONTEXT_si, sigcontext_ia32, si);
+       OFFSET(IA32_SIGCONTEXT_di, sigcontext_ia32, di);
+       OFFSET(IA32_SIGCONTEXT_bp, sigcontext_ia32, bp);
+       OFFSET(IA32_SIGCONTEXT_sp, sigcontext_ia32, sp);
+       OFFSET(IA32_SIGCONTEXT_ip, sigcontext_ia32, ip);
+
+       BLANK();
+       OFFSET(TI_sysenter_return, thread_info, sysenter_return);
+
+       BLANK();
+       OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe_ia32, uc.uc_mcontext);
+#endif
+
 #ifdef CONFIG_PARAVIRT
        BLANK();
        OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
index 47703ae..6ce3902 100644 (file)
@@ -17,17 +17,6 @@ void foo(void);
 
 void foo(void)
 {
-       OFFSET(IA32_SIGCONTEXT_ax, sigcontext, ax);
-       OFFSET(IA32_SIGCONTEXT_bx, sigcontext, bx);
-       OFFSET(IA32_SIGCONTEXT_cx, sigcontext, cx);
-       OFFSET(IA32_SIGCONTEXT_dx, sigcontext, dx);
-       OFFSET(IA32_SIGCONTEXT_si, sigcontext, si);
-       OFFSET(IA32_SIGCONTEXT_di, sigcontext, di);
-       OFFSET(IA32_SIGCONTEXT_bp, sigcontext, bp);
-       OFFSET(IA32_SIGCONTEXT_sp, sigcontext, sp);
-       OFFSET(IA32_SIGCONTEXT_ip, sigcontext, ip);
-       BLANK();
-
        OFFSET(CPUINFO_x86, cpuinfo_x86, x86);
        OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor);
        OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model);
@@ -37,10 +26,6 @@ void foo(void)
        OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id);
        BLANK();
 
-       OFFSET(TI_sysenter_return, thread_info, sysenter_return);
-       OFFSET(TI_cpu, thread_info, cpu);
-       BLANK();
-
        OFFSET(PT_EBX, pt_regs, bx);
        OFFSET(PT_ECX, pt_regs, cx);
        OFFSET(PT_EDX, pt_regs, dx);
@@ -60,9 +45,6 @@ void foo(void)
        OFFSET(PT_OLDSS,  pt_regs, ss);
        BLANK();
 
-       OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext);
-       BLANK();
-
        OFFSET(saved_context_gdt_desc, saved_context, gdt_desc);
        BLANK();
 
index 5ce6f2d..dcaab87 100644 (file)
@@ -29,27 +29,6 @@ int main(void)
        BLANK();
 #endif
 
-#ifdef CONFIG_IA32_EMULATION
-       OFFSET(TI_sysenter_return, thread_info, sysenter_return);
-       BLANK();
-
-#define ENTRY(entry) OFFSET(IA32_SIGCONTEXT_ ## entry, sigcontext_ia32, entry)
-       ENTRY(ax);
-       ENTRY(bx);
-       ENTRY(cx);
-       ENTRY(dx);
-       ENTRY(si);
-       ENTRY(di);
-       ENTRY(bp);
-       ENTRY(sp);
-       ENTRY(ip);
-       BLANK();
-#undef ENTRY
-
-       OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe_ia32, uc.uc_mcontext);
-       BLANK();
-#endif
-
 #define ENTRY(entry) OFFSET(pt_regs_ ## entry, pt_regs, entry)
        ENTRY(bx);
        ENTRY(cx);
index 36ce402..d820d8e 100644 (file)
@@ -27,8 +27,8 @@
 
 static const __initconst struct hypervisor_x86 * const hypervisors[] =
 {
-#ifdef CONFIG_XEN_PVHVM
-       &x86_hyper_xen_hvm,
+#ifdef CONFIG_XEN
+       &x86_hyper_xen,
 #endif
        &x86_hyper_vmware,
        &x86_hyper_ms_hyperv,
index e535533..95cf78d 100644 (file)
 static DEFINE_MUTEX(mce_chrdev_read_mutex);
 
 #define rcu_dereference_check_mce(p) \
-       rcu_dereference_index_check((p), \
-                             rcu_read_lock_sched_held() || \
-                             lockdep_is_held(&mce_chrdev_read_mutex))
+({ \
+       rcu_lockdep_assert(rcu_read_lock_sched_held() || \
+                          lockdep_is_held(&mce_chrdev_read_mutex), \
+                          "suspicious rcu_dereference_check_mce() usage"); \
+       smp_load_acquire(&(p)); \
+})
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/mce.h>
@@ -708,6 +711,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
                          struct pt_regs *regs)
 {
        int i, ret = 0;
+       char *tmp;
 
        for (i = 0; i < mca_cfg.banks; i++) {
                m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
@@ -716,9 +720,11 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
                        if (quirk_no_way_out)
                                quirk_no_way_out(i, m, regs);
                }
-               if (mce_severity(m, mca_cfg.tolerant, msg, true) >=
-                   MCE_PANIC_SEVERITY)
+
+               if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
+                       *msg = tmp;
                        ret = 1;
+               }
        }
        return ret;
 }
@@ -1884,7 +1890,7 @@ out:
 static unsigned int mce_chrdev_poll(struct file *file, poll_table *wait)
 {
        poll_wait(file, &mce_chrdev_wait, wait);
-       if (rcu_access_index(mcelog.next))
+       if (READ_ONCE(mcelog.next))
                return POLLIN | POLLRDNORM;
        if (!mce_apei_read_done && apei_check_mce())
                return POLLIN | POLLRDNORM;
@@ -1929,8 +1935,8 @@ void register_mce_write_callback(ssize_t (*fn)(struct file *filp,
 }
 EXPORT_SYMBOL_GPL(register_mce_write_callback);
 
-ssize_t mce_chrdev_write(struct file *filp, const char __user *ubuf,
-                        size_t usize, loff_t *off)
+static ssize_t mce_chrdev_write(struct file *filp, const char __user *ubuf,
+                               size_t usize, loff_t *off)
 {
        if (mce_write)
                return mce_write(filp, ubuf, usize, off);
index 87848eb..5801a14 100644 (file)
@@ -135,6 +135,7 @@ static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
 }
 
 static atomic_t active_events;
+static atomic_t pmc_refcount;
 static DEFINE_MUTEX(pmc_reserve_mutex);
 
 #ifdef CONFIG_X86_LOCAL_APIC
@@ -190,6 +191,7 @@ static bool check_hw_exists(void)
        u64 val, val_fail, val_new= ~0;
        int i, reg, reg_fail, ret = 0;
        int bios_fail = 0;
+       int reg_safe = -1;
 
        /*
         * Check to see if the BIOS enabled any of the counters, if so
@@ -204,6 +206,8 @@ static bool check_hw_exists(void)
                        bios_fail = 1;
                        val_fail = val;
                        reg_fail = reg;
+               } else {
+                       reg_safe = i;
                }
        }
 
@@ -222,11 +226,22 @@ static bool check_hw_exists(void)
        }
 
        /*
+        * If all the counters are enabled, the below test will always
+        * fail.  The tools will also become useless in this scenario.
+        * Just fail and disable the hardware counters.
+        */
+
+       if (reg_safe == -1) {
+               reg = reg_safe;
+               goto msr_fail;
+       }
+
+       /*
         * Read the current value, change it and read it back to see if it
         * matches, this is needed to detect certain hardware emulators
         * (qemu/kvm) that don't trap on the MSR access and always return 0s.
         */
-       reg = x86_pmu_event_addr(0);
+       reg = x86_pmu_event_addr(reg_safe);
        if (rdmsrl_safe(reg, &val))
                goto msr_fail;
        val ^= 0xffffUL;
@@ -256,11 +271,8 @@ msr_fail:
 
 static void hw_perf_event_destroy(struct perf_event *event)
 {
-       if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) {
-               release_pmc_hardware();
-               release_ds_buffers();
-               mutex_unlock(&pmc_reserve_mutex);
-       }
+       x86_release_hardware();
+       atomic_dec(&active_events);
 }
 
 void hw_perf_lbr_event_destroy(struct perf_event *event)
@@ -310,6 +322,35 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event)
        return x86_pmu_extra_regs(val, event);
 }
 
+int x86_reserve_hardware(void)
+{
+       int err = 0;
+
+       if (!atomic_inc_not_zero(&pmc_refcount)) {
+               mutex_lock(&pmc_reserve_mutex);
+               if (atomic_read(&pmc_refcount) == 0) {
+                       if (!reserve_pmc_hardware())
+                               err = -EBUSY;
+                       else
+                               reserve_ds_buffers();
+               }
+               if (!err)
+                       atomic_inc(&pmc_refcount);
+               mutex_unlock(&pmc_reserve_mutex);
+       }
+
+       return err;
+}
+
+void x86_release_hardware(void)
+{
+       if (atomic_dec_and_mutex_lock(&pmc_refcount, &pmc_reserve_mutex)) {
+               release_pmc_hardware();
+               release_ds_buffers();
+               mutex_unlock(&pmc_reserve_mutex);
+       }
+}
+
 /*
  * Check if we can create event of a certain type (that no conflicting events
  * are present).
@@ -322,21 +363,34 @@ int x86_add_exclusive(unsigned int what)
                return 0;
 
        mutex_lock(&pmc_reserve_mutex);
-       for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++)
+       for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++) {
                if (i != what && atomic_read(&x86_pmu.lbr_exclusive[i]))
                        goto out;
+       }
 
        atomic_inc(&x86_pmu.lbr_exclusive[what]);
        ret = 0;
 
 out:
        mutex_unlock(&pmc_reserve_mutex);
+
+       /*
+        * Assuming that all exclusive events will share the PMI handler
+        * (which checks active_events for whether there is work to do),
+        * we can bump active_events counter right here, except for
+        * x86_lbr_exclusive_lbr events that go through x86_pmu_event_init()
+        * path, which already bumps active_events for them.
+        */
+       if (!ret && what != x86_lbr_exclusive_lbr)
+               atomic_inc(&active_events);
+
        return ret;
 }
 
 void x86_del_exclusive(unsigned int what)
 {
        atomic_dec(&x86_pmu.lbr_exclusive[what]);
+       atomic_dec(&active_events);
 }
 
 int x86_setup_perfctr(struct perf_event *event)
@@ -513,22 +567,11 @@ static int __x86_pmu_event_init(struct perf_event *event)
        if (!x86_pmu_initialized())
                return -ENODEV;
 
-       err = 0;
-       if (!atomic_inc_not_zero(&active_events)) {
-               mutex_lock(&pmc_reserve_mutex);
-               if (atomic_read(&active_events) == 0) {
-                       if (!reserve_pmc_hardware())
-                               err = -EBUSY;
-                       else
-                               reserve_ds_buffers();
-               }
-               if (!err)
-                       atomic_inc(&active_events);
-               mutex_unlock(&pmc_reserve_mutex);
-       }
+       err = x86_reserve_hardware();
        if (err)
                return err;
 
+       atomic_inc(&active_events);
        event->destroy = hw_perf_event_destroy;
 
        event->hw.idx = -1;
@@ -611,6 +654,7 @@ struct sched_state {
        int     event;          /* event index */
        int     counter;        /* counter index */
        int     unassigned;     /* number of events to be assigned left */
+       int     nr_gp;          /* number of GP counters used */
        unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
 };
 
@@ -620,27 +664,29 @@ struct sched_state {
 struct perf_sched {
        int                     max_weight;
        int                     max_events;
-       struct perf_event       **events;
-       struct sched_state      state;
+       int                     max_gp;
        int                     saved_states;
+       struct event_constraint **constraints;
+       struct sched_state      state;
        struct sched_state      saved[SCHED_STATES_MAX];
 };
 
 /*
  * Initialize interator that runs through all events and counters.
  */
-static void perf_sched_init(struct perf_sched *sched, struct perf_event **events,
-                           int num, int wmin, int wmax)
+static void perf_sched_init(struct perf_sched *sched, struct event_constraint **constraints,
+                           int num, int wmin, int wmax, int gpmax)
 {
        int idx;
 
        memset(sched, 0, sizeof(*sched));
        sched->max_events       = num;
        sched->max_weight       = wmax;
-       sched->events           = events;
+       sched->max_gp           = gpmax;
+       sched->constraints      = constraints;
 
        for (idx = 0; idx < num; idx++) {
-               if (events[idx]->hw.constraint->weight == wmin)
+               if (constraints[idx]->weight == wmin)
                        break;
        }
 
@@ -687,7 +733,7 @@ static bool __perf_sched_find_counter(struct perf_sched *sched)
        if (sched->state.event >= sched->max_events)
                return false;
 
-       c = sched->events[sched->state.event]->hw.constraint;
+       c = sched->constraints[sched->state.event];
        /* Prefer fixed purpose counters */
        if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) {
                idx = INTEL_PMC_IDX_FIXED;
@@ -696,11 +742,16 @@ static bool __perf_sched_find_counter(struct perf_sched *sched)
                                goto done;
                }
        }
+
        /* Grab the first unused counter starting with idx */
        idx = sched->state.counter;
        for_each_set_bit_from(idx, c->idxmsk, INTEL_PMC_IDX_FIXED) {
-               if (!__test_and_set_bit(idx, sched->state.used))
+               if (!__test_and_set_bit(idx, sched->state.used)) {
+                       if (sched->state.nr_gp++ >= sched->max_gp)
+                               return false;
+
                        goto done;
+               }
        }
 
        return false;
@@ -745,7 +796,7 @@ static bool perf_sched_next_event(struct perf_sched *sched)
                        if (sched->state.weight > sched->max_weight)
                                return false;
                }
-               c = sched->events[sched->state.event]->hw.constraint;
+               c = sched->constraints[sched->state.event];
        } while (c->weight != sched->state.weight);
 
        sched->state.counter = 0;       /* start with first counter */
@@ -756,12 +807,12 @@ static bool perf_sched_next_event(struct perf_sched *sched)
 /*
  * Assign a counter for each event.
  */
-int perf_assign_events(struct perf_event **events, int n,
-                       int wmin, int wmax, int *assign)
+int perf_assign_events(struct event_constraint **constraints, int n,
+                       int wmin, int wmax, int gpmax, int *assign)
 {
        struct perf_sched sched;
 
-       perf_sched_init(&sched, events, n, wmin, wmax);
+       perf_sched_init(&sched, constraints, n, wmin, wmax, gpmax);
 
        do {
                if (!perf_sched_find_counter(&sched))
@@ -788,9 +839,9 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
                x86_pmu.start_scheduling(cpuc);
 
        for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
-               hwc = &cpuc->event_list[i]->hw;
+               cpuc->event_constraint[i] = NULL;
                c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]);
-               hwc->constraint = c;
+               cpuc->event_constraint[i] = c;
 
                wmin = min(wmin, c->weight);
                wmax = max(wmax, c->weight);
@@ -801,7 +852,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
         */
        for (i = 0; i < n; i++) {
                hwc = &cpuc->event_list[i]->hw;
-               c = hwc->constraint;
+               c = cpuc->event_constraint[i];
 
                /* never assigned */
                if (hwc->idx == -1)
@@ -821,9 +872,26 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
        }
 
        /* slow path */
-       if (i != n)
-               unsched = perf_assign_events(cpuc->event_list, n, wmin,
-                                            wmax, assign);
+       if (i != n) {
+               int gpmax = x86_pmu.num_counters;
+
+               /*
+                * Do not allow scheduling of more than half the available
+                * generic counters.
+                *
+                * This helps avoid counter starvation of sibling thread by
+                * ensuring at most half the counters cannot be in exclusive
+                * mode. There is no designated counters for the limits. Any
+                * N/2 counters can be used. This helps with events with
+                * specific counter constraints.
+                */
+               if (is_ht_workaround_enabled() && !cpuc->is_fake &&
+                   READ_ONCE(cpuc->excl_cntrs->exclusive_present))
+                       gpmax /= 2;
+
+               unsched = perf_assign_events(cpuc->event_constraint, n, wmin,
+                                            wmax, gpmax, assign);
+       }
 
        /*
         * In case of success (unsched = 0), mark events as committed,
@@ -840,12 +908,9 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
                        e = cpuc->event_list[i];
                        e->hw.flags |= PERF_X86_EVENT_COMMITTED;
                        if (x86_pmu.commit_scheduling)
-                               x86_pmu.commit_scheduling(cpuc, e, assign[i]);
+                               x86_pmu.commit_scheduling(cpuc, i, assign[i]);
                }
-       }
-
-       if (!assign || unsched) {
-
+       } else {
                for (i = 0; i < n; i++) {
                        e = cpuc->event_list[i];
                        /*
@@ -1058,13 +1123,16 @@ int x86_perf_event_set_period(struct perf_event *event)
 
        per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
 
-       /*
-        * The hw event starts counting from this event offset,
-        * mark it to be able to extra future deltas:
-        */
-       local64_set(&hwc->prev_count, (u64)-left);
+       if (!(hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) ||
+           local64_read(&hwc->prev_count) != (u64)-left) {
+               /*
+                * The hw event starts counting from this event offset,
+                * mark it to be able to extra future deltas:
+                */
+               local64_set(&hwc->prev_count, (u64)-left);
 
-       wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
+               wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
+       }
 
        /*
         * Due to erratum on certan cpu we need
@@ -1292,8 +1360,10 @@ static void x86_pmu_del(struct perf_event *event, int flags)
                x86_pmu.put_event_constraints(cpuc, event);
 
        /* Delete the array entry. */
-       while (++i < cpuc->n_events)
+       while (++i < cpuc->n_events) {
                cpuc->event_list[i-1] = cpuc->event_list[i];
+               cpuc->event_constraint[i-1] = cpuc->event_constraint[i];
+       }
        --cpuc->n_events;
 
        perf_event_update_userpage(event);
@@ -1374,6 +1444,10 @@ perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
        u64 finish_clock;
        int ret;
 
+       /*
+        * All PMUs/events that share this PMI handler should make sure to
+        * increment active_events for their events.
+        */
        if (!atomic_read(&active_events))
                return NMI_DONE;
 
index 6ac5cb7..3e7fd27 100644 (file)
@@ -74,6 +74,9 @@ struct event_constraint {
 #define PERF_X86_EVENT_EXCL            0x0040 /* HT exclusivity on counter */
 #define PERF_X86_EVENT_DYNAMIC         0x0080 /* dynamic alloc'd constraint */
 #define PERF_X86_EVENT_RDPMC_ALLOWED   0x0100 /* grant rdpmc permission */
+#define PERF_X86_EVENT_EXCL_ACCT       0x0200 /* accounted EXCL event */
+#define PERF_X86_EVENT_AUTO_RELOAD     0x0400 /* use PEBS auto-reload */
+#define PERF_X86_EVENT_FREERUNNING     0x0800 /* use freerunning PEBS */
 
 
 struct amd_nb {
@@ -87,6 +90,18 @@ struct amd_nb {
 #define MAX_PEBS_EVENTS                8
 
 /*
+ * Flags PEBS can handle without an PMI.
+ *
+ * TID can only be handled by flushing at context switch.
+ *
+ */
+#define PEBS_FREERUNNING_FLAGS \
+       (PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \
+       PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \
+       PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \
+       PERF_SAMPLE_TRANSACTION)
+
+/*
  * A debug store configuration.
  *
  * We only support architectures that use 64bit fields.
@@ -132,10 +147,7 @@ enum intel_excl_state_type {
 };
 
 struct intel_excl_states {
-       enum intel_excl_state_type init_state[X86_PMC_IDX_MAX];
        enum intel_excl_state_type state[X86_PMC_IDX_MAX];
-       int  num_alloc_cntrs;/* #counters allocated */
-       int  max_alloc_cntrs;/* max #counters allowed */
        bool sched_started; /* true if scheduling has started */
 };
 
@@ -144,6 +156,11 @@ struct intel_excl_cntrs {
 
        struct intel_excl_states states[2];
 
+       union {
+               u16     has_exclusive[2];
+               u32     exclusive_present;
+       };
+
        int             refcnt;         /* per-core: #HT threads */
        unsigned        core_id;        /* per-core: core id */
 };
@@ -172,7 +189,11 @@ struct cpu_hw_events {
                                             added in the current transaction */
        int                     assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
        u64                     tags[X86_PMC_IDX_MAX];
+
        struct perf_event       *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
+       struct event_constraint *event_constraint[X86_PMC_IDX_MAX];
+
+       int                     n_excl; /* the number of exclusive events */
 
        unsigned int            group_flag;
        int                     is_fake;
@@ -519,12 +540,10 @@ struct x86_pmu {
        void            (*put_event_constraints)(struct cpu_hw_events *cpuc,
                                                 struct perf_event *event);
 
-       void            (*commit_scheduling)(struct cpu_hw_events *cpuc,
-                                            struct perf_event *event,
-                                            int cntr);
-
        void            (*start_scheduling)(struct cpu_hw_events *cpuc);
 
+       void            (*commit_scheduling)(struct cpu_hw_events *cpuc, int idx, int cntr);
+
        void            (*stop_scheduling)(struct cpu_hw_events *cpuc);
 
        struct event_constraint *event_constraints;
@@ -697,6 +716,10 @@ int x86_add_exclusive(unsigned int what);
 
 void x86_del_exclusive(unsigned int what);
 
+int x86_reserve_hardware(void);
+
+void x86_release_hardware(void);
+
 void hw_perf_lbr_event_destroy(struct perf_event *event);
 
 int x86_setup_perfctr(struct perf_event *event);
@@ -717,8 +740,8 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
 
 void x86_pmu_enable_all(int added);
 
-int perf_assign_events(struct perf_event **events, int n,
-                       int wmin, int wmax, int *assign);
+int perf_assign_events(struct event_constraint **constraints, int n,
+                       int wmin, int wmax, int gpmax, int *assign);
 int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign);
 
 void x86_pmu_stop(struct perf_event *event, int flags);
@@ -860,6 +883,8 @@ void intel_pmu_pebs_enable_all(void);
 
 void intel_pmu_pebs_disable_all(void);
 
+void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in);
+
 void intel_ds_init(void);
 
 void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
@@ -929,4 +954,8 @@ static inline struct intel_shared_regs *allocate_shared_regs(int cpu)
        return NULL;
 }
 
+static inline int is_ht_workaround_enabled(void)
+{
+       return 0;
+}
 #endif /* CONFIG_CPU_SUP_INTEL */
index 219d3fb..b9826a9 100644 (file)
@@ -1134,7 +1134,7 @@ static __initconst const u64 slm_hw_cache_extra_regs
  [ C(LL  ) ] = {
        [ C(OP_READ) ] = {
                [ C(RESULT_ACCESS) ] = SLM_DMND_READ|SLM_LLC_ACCESS,
-               [ C(RESULT_MISS)   ] = SLM_DMND_READ|SLM_LLC_MISS,
+               [ C(RESULT_MISS)   ] = 0,
        },
        [ C(OP_WRITE) ] = {
                [ C(RESULT_ACCESS) ] = SLM_DMND_WRITE|SLM_LLC_ACCESS,
@@ -1184,8 +1184,7 @@ static __initconst const u64 slm_hw_cache_event_ids
        [ C(OP_READ) ] = {
                /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
                [ C(RESULT_ACCESS) ] = 0x01b7,
-               /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
-               [ C(RESULT_MISS)   ] = 0x01b7,
+               [ C(RESULT_MISS)   ] = 0,
        },
        [ C(OP_WRITE) ] = {
                /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
@@ -1217,7 +1216,7 @@ static __initconst const u64 slm_hw_cache_event_ids
  [ C(ITLB) ] = {
        [ C(OP_READ) ] = {
                [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
-               [ C(RESULT_MISS)   ] = 0x0282, /* ITLB.MISSES */
+               [ C(RESULT_MISS)   ] = 0x40205, /* PAGE_WALKS.I_SIDE_WALKS */
        },
        [ C(OP_WRITE) ] = {
                [ C(RESULT_ACCESS) ] = -1,
@@ -1904,9 +1903,8 @@ static void
 intel_start_scheduling(struct cpu_hw_events *cpuc)
 {
        struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
-       struct intel_excl_states *xl, *xlo;
+       struct intel_excl_states *xl;
        int tid = cpuc->excl_thread_id;
-       int o_tid = 1 - tid; /* sibling thread */
 
        /*
         * nothing needed if in group validation mode
@@ -1917,35 +1915,52 @@ intel_start_scheduling(struct cpu_hw_events *cpuc)
        /*
         * no exclusion needed
         */
-       if (!excl_cntrs)
+       if (WARN_ON_ONCE(!excl_cntrs))
                return;
 
-       xlo = &excl_cntrs->states[o_tid];
        xl = &excl_cntrs->states[tid];
 
        xl->sched_started = true;
-       xl->num_alloc_cntrs = 0;
        /*
         * lock shared state until we are done scheduling
         * in stop_event_scheduling()
         * makes scheduling appear as a transaction
         */
-       WARN_ON_ONCE(!irqs_disabled());
        raw_spin_lock(&excl_cntrs->lock);
+}
 
-       /*
-        * save initial state of sibling thread
-        */
-       memcpy(xlo->init_state, xlo->state, sizeof(xlo->init_state));
+static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr)
+{
+       struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
+       struct event_constraint *c = cpuc->event_constraint[idx];
+       struct intel_excl_states *xl;
+       int tid = cpuc->excl_thread_id;
+
+       if (cpuc->is_fake || !is_ht_workaround_enabled())
+               return;
+
+       if (WARN_ON_ONCE(!excl_cntrs))
+               return;
+
+       if (!(c->flags & PERF_X86_EVENT_DYNAMIC))
+               return;
+
+       xl = &excl_cntrs->states[tid];
+
+       lockdep_assert_held(&excl_cntrs->lock);
+
+       if (c->flags & PERF_X86_EVENT_EXCL)
+               xl->state[cntr] = INTEL_EXCL_EXCLUSIVE;
+       else
+               xl->state[cntr] = INTEL_EXCL_SHARED;
 }
 
 static void
 intel_stop_scheduling(struct cpu_hw_events *cpuc)
 {
        struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
-       struct intel_excl_states *xl, *xlo;
+       struct intel_excl_states *xl;
        int tid = cpuc->excl_thread_id;
-       int o_tid = 1 - tid; /* sibling thread */
 
        /*
         * nothing needed if in group validation mode
@@ -1955,17 +1970,11 @@ intel_stop_scheduling(struct cpu_hw_events *cpuc)
        /*
         * no exclusion needed
         */
-       if (!excl_cntrs)
+       if (WARN_ON_ONCE(!excl_cntrs))
                return;
 
-       xlo = &excl_cntrs->states[o_tid];
        xl = &excl_cntrs->states[tid];
 
-       /*
-        * make new sibling thread state visible
-        */
-       memcpy(xlo->state, xlo->init_state, sizeof(xlo->state));
-
        xl->sched_started = false;
        /*
         * release shared state lock (acquired in intel_start_scheduling())
@@ -1977,12 +1986,10 @@ static struct event_constraint *
 intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
                           int idx, struct event_constraint *c)
 {
-       struct event_constraint *cx;
        struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
-       struct intel_excl_states *xl, *xlo;
-       int is_excl, i;
+       struct intel_excl_states *xlo;
        int tid = cpuc->excl_thread_id;
-       int o_tid = 1 - tid; /* alternate */
+       int is_excl, i;
 
        /*
         * validating a group does not require
@@ -1994,34 +2001,8 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
        /*
         * no exclusion needed
         */
-       if (!excl_cntrs)
+       if (WARN_ON_ONCE(!excl_cntrs))
                return c;
-       /*
-        * event requires exclusive counter access
-        * across HT threads
-        */
-       is_excl = c->flags & PERF_X86_EVENT_EXCL;
-
-       /*
-        * xl = state of current HT
-        * xlo = state of sibling HT
-        */
-       xl = &excl_cntrs->states[tid];
-       xlo = &excl_cntrs->states[o_tid];
-
-       /*
-        * do not allow scheduling of more than max_alloc_cntrs
-        * which is set to half the available generic counters.
-        * this helps avoid counter starvation of sibling thread
-        * by ensuring at most half the counters cannot be in
-        * exclusive mode. There is not designated counters for the
-        * limits. Any N/2 counters can be used. This helps with
-        * events with specifix counter constraints
-        */
-       if (xl->num_alloc_cntrs++ == xl->max_alloc_cntrs)
-               return &emptyconstraint;
-
-       cx = c;
 
        /*
         * because we modify the constraint, we need
@@ -2032,10 +2013,7 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
         * been cloned (marked dynamic)
         */
        if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) {
-
-               /* sanity check */
-               if (idx < 0)
-                       return &emptyconstraint;
+               struct event_constraint *cx;
 
                /*
                 * grab pre-allocated constraint entry
@@ -2046,13 +2024,14 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
                 * initialize dynamic constraint
                 * with static constraint
                 */
-               memcpy(cx, c, sizeof(*cx));
+               *cx = *c;
 
                /*
                 * mark constraint as dynamic, so we
                 * can free it later on
                 */
                cx->flags |= PERF_X86_EVENT_DYNAMIC;
+               c = cx;
        }
 
        /*
@@ -2063,6 +2042,22 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
         */
 
        /*
+        * state of sibling HT
+        */
+       xlo = &excl_cntrs->states[tid ^ 1];
+
+       /*
+        * event requires exclusive counter access
+        * across HT threads
+        */
+       is_excl = c->flags & PERF_X86_EVENT_EXCL;
+       if (is_excl && !(event->hw.flags & PERF_X86_EVENT_EXCL_ACCT)) {
+               event->hw.flags |= PERF_X86_EVENT_EXCL_ACCT;
+               if (!cpuc->n_excl++)
+                       WRITE_ONCE(excl_cntrs->has_exclusive[tid], 1);
+       }
+
+       /*
         * Modify static constraint with current dynamic
         * state of thread
         *
@@ -2070,44 +2065,44 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
         * SHARED   : sibling counter measuring non-exclusive event
         * UNUSED   : sibling counter unused
         */
-       for_each_set_bit(i, cx->idxmsk, X86_PMC_IDX_MAX) {
+       for_each_set_bit(i, c->idxmsk, X86_PMC_IDX_MAX) {
                /*
                 * exclusive event in sibling counter
                 * our corresponding counter cannot be used
                 * regardless of our event
                 */
-               if (xl->state[i] == INTEL_EXCL_EXCLUSIVE)
-                       __clear_bit(i, cx->idxmsk);
+               if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE)
+                       __clear_bit(i, c->idxmsk);
                /*
                 * if measuring an exclusive event, sibling
                 * measuring non-exclusive, then counter cannot
                 * be used
                 */
-               if (is_excl && xl->state[i] == INTEL_EXCL_SHARED)
-                       __clear_bit(i, cx->idxmsk);
+               if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED)
+                       __clear_bit(i, c->idxmsk);
        }
 
        /*
         * recompute actual bit weight for scheduling algorithm
         */
-       cx->weight = hweight64(cx->idxmsk64);
+       c->weight = hweight64(c->idxmsk64);
 
        /*
         * if we return an empty mask, then switch
         * back to static empty constraint to avoid
         * the cost of freeing later on
         */
-       if (cx->weight == 0)
-               cx = &emptyconstraint;
+       if (c->weight == 0)
+               c = &emptyconstraint;
 
-       return cx;
+       return c;
 }
 
 static struct event_constraint *
 intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
                            struct perf_event *event)
 {
-       struct event_constraint *c1 = event->hw.constraint;
+       struct event_constraint *c1 = cpuc->event_constraint[idx];
        struct event_constraint *c2;
 
        /*
@@ -2133,10 +2128,8 @@ static void intel_put_excl_constraints(struct cpu_hw_events *cpuc,
 {
        struct hw_perf_event *hwc = &event->hw;
        struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
-       struct intel_excl_states *xlo, *xl;
-       unsigned long flags = 0; /* keep compiler happy */
        int tid = cpuc->excl_thread_id;
-       int o_tid = 1 - tid;
+       struct intel_excl_states *xl;
 
        /*
         * nothing needed if in group validation mode
@@ -2144,31 +2137,35 @@ static void intel_put_excl_constraints(struct cpu_hw_events *cpuc,
        if (cpuc->is_fake)
                return;
 
-       WARN_ON_ONCE(!excl_cntrs);
-
-       if (!excl_cntrs)
+       if (WARN_ON_ONCE(!excl_cntrs))
                return;
 
-       xl = &excl_cntrs->states[tid];
-       xlo = &excl_cntrs->states[o_tid];
+       if (hwc->flags & PERF_X86_EVENT_EXCL_ACCT) {
+               hwc->flags &= ~PERF_X86_EVENT_EXCL_ACCT;
+               if (!--cpuc->n_excl)
+                       WRITE_ONCE(excl_cntrs->has_exclusive[tid], 0);
+       }
 
        /*
-        * put_constraint may be called from x86_schedule_events()
-        * which already has the lock held so here make locking
-        * conditional
+        * If event was actually assigned, then mark the counter state as
+        * unused now.
         */
-       if (!xl->sched_started)
-               raw_spin_lock_irqsave(&excl_cntrs->lock, flags);
+       if (hwc->idx >= 0) {
+               xl = &excl_cntrs->states[tid];
 
-       /*
-        * if event was actually assigned, then mark the
-        * counter state as unused now
-        */
-       if (hwc->idx >= 0)
-               xlo->state[hwc->idx] = INTEL_EXCL_UNUSED;
+               /*
+                * put_constraint may be called from x86_schedule_events()
+                * which already has the lock held so here make locking
+                * conditional.
+                */
+               if (!xl->sched_started)
+                       raw_spin_lock(&excl_cntrs->lock);
+
+               xl->state[hwc->idx] = INTEL_EXCL_UNUSED;
 
-       if (!xl->sched_started)
-               raw_spin_unlock_irqrestore(&excl_cntrs->lock, flags);
+               if (!xl->sched_started)
+                       raw_spin_unlock(&excl_cntrs->lock);
+       }
 }
 
 static void
@@ -2189,8 +2186,6 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
 static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
                                        struct perf_event *event)
 {
-       struct event_constraint *c = event->hw.constraint;
-
        intel_put_shared_regs_event_constraints(cpuc, event);
 
        /*
@@ -2198,48 +2193,8 @@ static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
         * all events are subject to and must call the
         * put_excl_constraints() routine
         */
-       if (c && cpuc->excl_cntrs)
+       if (cpuc->excl_cntrs)
                intel_put_excl_constraints(cpuc, event);
-
-       /* cleanup dynamic constraint */
-       if (c && (c->flags & PERF_X86_EVENT_DYNAMIC))
-               event->hw.constraint = NULL;
-}
-
-static void intel_commit_scheduling(struct cpu_hw_events *cpuc,
-                                   struct perf_event *event, int cntr)
-{
-       struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
-       struct event_constraint *c = event->hw.constraint;
-       struct intel_excl_states *xlo, *xl;
-       int tid = cpuc->excl_thread_id;
-       int o_tid = 1 - tid;
-       int is_excl;
-
-       if (cpuc->is_fake || !c)
-               return;
-
-       is_excl = c->flags & PERF_X86_EVENT_EXCL;
-
-       if (!(c->flags & PERF_X86_EVENT_DYNAMIC))
-               return;
-
-       WARN_ON_ONCE(!excl_cntrs);
-
-       if (!excl_cntrs)
-               return;
-
-       xl = &excl_cntrs->states[tid];
-       xlo = &excl_cntrs->states[o_tid];
-
-       WARN_ON_ONCE(!raw_spin_is_locked(&excl_cntrs->lock));
-
-       if (cntr >= 0) {
-               if (is_excl)
-                       xlo->init_state[cntr] = INTEL_EXCL_EXCLUSIVE;
-               else
-                       xlo->init_state[cntr] = INTEL_EXCL_SHARED;
-       }
 }
 
 static void intel_pebs_aliases_core2(struct perf_event *event)
@@ -2305,8 +2260,15 @@ static int intel_pmu_hw_config(struct perf_event *event)
        if (ret)
                return ret;
 
-       if (event->attr.precise_ip && x86_pmu.pebs_aliases)
-               x86_pmu.pebs_aliases(event);
+       if (event->attr.precise_ip) {
+               if (!event->attr.freq) {
+                       event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
+                       if (!(event->attr.sample_type & ~PEBS_FREERUNNING_FLAGS))
+                               event->hw.flags |= PERF_X86_EVENT_FREERUNNING;
+               }
+               if (x86_pmu.pebs_aliases)
+                       x86_pmu.pebs_aliases(event);
+       }
 
        if (needs_branch_stack(event)) {
                ret = intel_pmu_setup_lbr_filter(event);
@@ -2533,34 +2495,6 @@ ssize_t intel_event_sysfs_show(char *page, u64 config)
        return x86_event_sysfs_show(page, config, event);
 }
 
-static __initconst const struct x86_pmu core_pmu = {
-       .name                   = "core",
-       .handle_irq             = x86_pmu_handle_irq,
-       .disable_all            = x86_pmu_disable_all,
-       .enable_all             = core_pmu_enable_all,
-       .enable                 = core_pmu_enable_event,
-       .disable                = x86_pmu_disable_event,
-       .hw_config              = x86_pmu_hw_config,
-       .schedule_events        = x86_schedule_events,
-       .eventsel               = MSR_ARCH_PERFMON_EVENTSEL0,
-       .perfctr                = MSR_ARCH_PERFMON_PERFCTR0,
-       .event_map              = intel_pmu_event_map,
-       .max_events             = ARRAY_SIZE(intel_perfmon_event_map),
-       .apic                   = 1,
-       /*
-        * Intel PMCs cannot be accessed sanely above 32 bit width,
-        * so we install an artificial 1<<31 period regardless of
-        * the generic event period:
-        */
-       .max_period             = (1ULL << 31) - 1,
-       .get_event_constraints  = intel_get_event_constraints,
-       .put_event_constraints  = intel_put_event_constraints,
-       .event_constraints      = intel_core_event_constraints,
-       .guest_get_msrs         = core_guest_get_msrs,
-       .format_attrs           = intel_arch_formats_attr,
-       .events_sysfs_show      = intel_event_sysfs_show,
-};
-
 struct intel_shared_regs *allocate_shared_regs(int cpu)
 {
        struct intel_shared_regs *regs;
@@ -2583,19 +2517,11 @@ struct intel_shared_regs *allocate_shared_regs(int cpu)
 static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu)
 {
        struct intel_excl_cntrs *c;
-       int i;
 
        c = kzalloc_node(sizeof(struct intel_excl_cntrs),
                         GFP_KERNEL, cpu_to_node(cpu));
        if (c) {
                raw_spin_lock_init(&c->lock);
-               for (i = 0; i < X86_PMC_IDX_MAX; i++) {
-                       c->states[0].state[i] = INTEL_EXCL_UNUSED;
-                       c->states[0].init_state[i] = INTEL_EXCL_UNUSED;
-
-                       c->states[1].state[i] = INTEL_EXCL_UNUSED;
-                       c->states[1].init_state[i] = INTEL_EXCL_UNUSED;
-               }
                c->core_id = -1;
        }
        return c;
@@ -2650,7 +2576,7 @@ static void intel_pmu_cpu_starting(int cpu)
        if (!(x86_pmu.flags & PMU_FL_NO_HT_SHARING)) {
                void **onln = &cpuc->kfree_on_online[X86_PERF_KFREE_SHARED];
 
-               for_each_cpu(i, topology_thread_cpumask(cpu)) {
+               for_each_cpu(i, topology_sibling_cpumask(cpu)) {
                        struct intel_shared_regs *pc;
 
                        pc = per_cpu(cpu_hw_events, i).shared_regs;
@@ -2668,9 +2594,7 @@ static void intel_pmu_cpu_starting(int cpu)
                cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR];
 
        if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
-               int h = x86_pmu.num_counters >> 1;
-
-               for_each_cpu(i, topology_thread_cpumask(cpu)) {
+               for_each_cpu(i, topology_sibling_cpumask(cpu)) {
                        struct intel_excl_cntrs *c;
 
                        c = per_cpu(cpu_hw_events, i).excl_cntrs;
@@ -2683,11 +2607,6 @@ static void intel_pmu_cpu_starting(int cpu)
                }
                cpuc->excl_cntrs->core_id = core_id;
                cpuc->excl_cntrs->refcnt++;
-               /*
-                * set hard limit to half the number of generic counters
-                */
-               cpuc->excl_cntrs->states[0].max_alloc_cntrs = h;
-               cpuc->excl_cntrs->states[1].max_alloc_cntrs = h;
        }
 }
 
@@ -2723,6 +2642,15 @@ static void intel_pmu_cpu_dying(int cpu)
        fini_debug_store_on_cpu(cpu);
 }
 
+static void intel_pmu_sched_task(struct perf_event_context *ctx,
+                                bool sched_in)
+{
+       if (x86_pmu.pebs_active)
+               intel_pmu_pebs_sched_task(ctx, sched_in);
+       if (x86_pmu.lbr_nr)
+               intel_pmu_lbr_sched_task(ctx, sched_in);
+}
+
 PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
 
 PMU_FORMAT_ATTR(ldlat, "config1:0-15");
@@ -2743,6 +2671,44 @@ static struct attribute *intel_arch3_formats_attr[] = {
        NULL,
 };
 
+static __initconst const struct x86_pmu core_pmu = {
+       .name                   = "core",
+       .handle_irq             = x86_pmu_handle_irq,
+       .disable_all            = x86_pmu_disable_all,
+       .enable_all             = core_pmu_enable_all,
+       .enable                 = core_pmu_enable_event,
+       .disable                = x86_pmu_disable_event,
+       .hw_config              = x86_pmu_hw_config,
+       .schedule_events        = x86_schedule_events,
+       .eventsel               = MSR_ARCH_PERFMON_EVENTSEL0,
+       .perfctr                = MSR_ARCH_PERFMON_PERFCTR0,
+       .event_map              = intel_pmu_event_map,
+       .max_events             = ARRAY_SIZE(intel_perfmon_event_map),
+       .apic                   = 1,
+       /*
+        * Intel PMCs cannot be accessed sanely above 32-bit width,
+        * so we install an artificial 1<<31 period regardless of
+        * the generic event period:
+        */
+       .max_period             = (1ULL<<31) - 1,
+       .get_event_constraints  = intel_get_event_constraints,
+       .put_event_constraints  = intel_put_event_constraints,
+       .event_constraints      = intel_core_event_constraints,
+       .guest_get_msrs         = core_guest_get_msrs,
+       .format_attrs           = intel_arch_formats_attr,
+       .events_sysfs_show      = intel_event_sysfs_show,
+
+       /*
+        * Virtual (or funny metal) CPU can define x86_pmu.extra_regs
+        * together with PMU version 1 and thus be using core_pmu with
+        * shared_regs. We need following callbacks here to allocate
+        * it properly.
+        */
+       .cpu_prepare            = intel_pmu_cpu_prepare,
+       .cpu_starting           = intel_pmu_cpu_starting,
+       .cpu_dying              = intel_pmu_cpu_dying,
+};
+
 static __initconst const struct x86_pmu intel_pmu = {
        .name                   = "Intel",
        .handle_irq             = intel_pmu_handle_irq,
@@ -2774,7 +2740,7 @@ static __initconst const struct x86_pmu intel_pmu = {
        .cpu_starting           = intel_pmu_cpu_starting,
        .cpu_dying              = intel_pmu_cpu_dying,
        .guest_get_msrs         = intel_guest_get_msrs,
-       .sched_task             = intel_pmu_lbr_sched_task,
+       .sched_task             = intel_pmu_sched_task,
 };
 
 static __init void intel_clovertown_quirk(void)
@@ -2947,8 +2913,8 @@ static __init void intel_ht_bug(void)
 {
        x86_pmu.flags |= PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED;
 
-       x86_pmu.commit_scheduling = intel_commit_scheduling;
        x86_pmu.start_scheduling = intel_start_scheduling;
+       x86_pmu.commit_scheduling = intel_commit_scheduling;
        x86_pmu.stop_scheduling = intel_stop_scheduling;
 }
 
@@ -3261,6 +3227,8 @@ __init int intel_pmu_init(void)
 
        case 61: /* 14nm Broadwell Core-M */
        case 86: /* 14nm Broadwell Xeon D */
+       case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
+       case 79: /* 14nm Broadwell Server */
                x86_pmu.late_ack = true;
                memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
                memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
@@ -3330,13 +3298,13 @@ __init int intel_pmu_init(void)
                 * counter, so do not extend mask to generic counters
                 */
                for_each_event_constraint(c, x86_pmu.event_constraints) {
-                       if (c->cmask != FIXED_EVENT_FLAGS
-                           || c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) {
-                               continue;
+                       if (c->cmask == FIXED_EVENT_FLAGS
+                           && c->idxmsk64 != INTEL_PMC_MSK_FIXED_REF_CYCLES) {
+                               c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
                        }
-
-                       c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
-                       c->weight += x86_pmu.num_counters;
+                       c->idxmsk64 &=
+                               ~(~0UL << (INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed));
+                       c->weight = hweight64(c->idxmsk64);
                }
        }
 
@@ -3394,7 +3362,7 @@ static __init int fixup_ht_bug(void)
        if (!(x86_pmu.flags & PMU_FL_EXCL_ENABLED))
                return 0;
 
-       w = cpumask_weight(topology_thread_cpumask(cpu));
+       w = cpumask_weight(topology_sibling_cpumask(cpu));
        if (w > 1) {
                pr_info("PMU erratum BJ122, BV98, HSD29 worked around, HT is on\n");
                return 0;
@@ -3404,8 +3372,8 @@ static __init int fixup_ht_bug(void)
 
        x86_pmu.flags &= ~(PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED);
 
-       x86_pmu.commit_scheduling = NULL;
        x86_pmu.start_scheduling = NULL;
+       x86_pmu.commit_scheduling = NULL;
        x86_pmu.stop_scheduling = NULL;
 
        watchdog_nmi_enable_all();
index ac1f0c5..7795f3f 100644 (file)
@@ -483,17 +483,26 @@ static int bts_event_add(struct perf_event *event, int mode)
 
 static void bts_event_destroy(struct perf_event *event)
 {
+       x86_release_hardware();
        x86_del_exclusive(x86_lbr_exclusive_bts);
 }
 
 static int bts_event_init(struct perf_event *event)
 {
+       int ret;
+
        if (event->attr.type != bts_pmu.type)
                return -ENOENT;
 
        if (x86_add_exclusive(x86_lbr_exclusive_bts))
                return -EBUSY;
 
+       ret = x86_reserve_hardware();
+       if (ret) {
+               x86_del_exclusive(x86_lbr_exclusive_bts);
+               return ret;
+       }
+
        event->destroy = bts_event_destroy;
 
        return 0;
index e4d1b8b..1880761 100644 (file)
 #define MSR_IA32_QM_CTR                0x0c8e
 #define MSR_IA32_QM_EVTSEL     0x0c8d
 
-static unsigned int cqm_max_rmid = -1;
+static u32 cqm_max_rmid = -1;
 static unsigned int cqm_l3_scale; /* supposedly cacheline size */
 
-struct intel_cqm_state {
-       raw_spinlock_t          lock;
-       int                     rmid;
-       int                     cnt;
+/**
+ * struct intel_pqr_state - State cache for the PQR MSR
+ * @rmid:              The cached Resource Monitoring ID
+ * @closid:            The cached Class Of Service ID
+ * @rmid_usecnt:       The usage counter for rmid
+ *
+ * The upper 32 bits of MSR_IA32_PQR_ASSOC contain closid and the
+ * lower 10 bits rmid. The update to MSR_IA32_PQR_ASSOC always
+ * contains both parts, so we need to cache them.
+ *
+ * The cache also helps to avoid pointless updates if the value does
+ * not change.
+ */
+struct intel_pqr_state {
+       u32                     rmid;
+       u32                     closid;
+       int                     rmid_usecnt;
 };
 
-static DEFINE_PER_CPU(struct intel_cqm_state, cqm_state);
+/*
+ * The cached intel_pqr_state is strictly per CPU and can never be
+ * updated from a remote CPU. Both functions which modify the state
+ * (intel_cqm_event_start and intel_cqm_event_stop) are called with
+ * interrupts disabled, which is sufficient for the protection.
+ */
+static DEFINE_PER_CPU(struct intel_pqr_state, pqr_state);
 
 /*
  * Protects cache_cgroups and cqm_rmid_free_lru and cqm_rmid_limbo_lru.
@@ -57,7 +76,7 @@ static cpumask_t cqm_cpumask;
  * near-zero occupancy value, i.e. no cachelines are tagged with this
  * RMID, once __intel_cqm_rmid_rotate() returns.
  */
-static unsigned int intel_cqm_rotation_rmid;
+static u32 intel_cqm_rotation_rmid;
 
 #define INVALID_RMID           (-1)
 
@@ -69,7 +88,7 @@ static unsigned int intel_cqm_rotation_rmid;
  * Likewise, an rmid value of -1 is used to indicate "no rmid currently
  * assigned" and is used as part of the rotation code.
  */
-static inline bool __rmid_valid(unsigned int rmid)
+static inline bool __rmid_valid(u32 rmid)
 {
        if (!rmid || rmid == INVALID_RMID)
                return false;
@@ -77,7 +96,7 @@ static inline bool __rmid_valid(unsigned int rmid)
        return true;
 }
 
-static u64 __rmid_read(unsigned int rmid)
+static u64 __rmid_read(u32 rmid)
 {
        u64 val;
 
@@ -102,7 +121,7 @@ enum rmid_recycle_state {
 };
 
 struct cqm_rmid_entry {
-       unsigned int rmid;
+       u32 rmid;
        enum rmid_recycle_state state;
        struct list_head list;
        unsigned long queue_time;
@@ -147,7 +166,7 @@ static LIST_HEAD(cqm_rmid_limbo_lru);
  */
 static struct cqm_rmid_entry **cqm_rmid_ptrs;
 
-static inline struct cqm_rmid_entry *__rmid_entry(int rmid)
+static inline struct cqm_rmid_entry *__rmid_entry(u32 rmid)
 {
        struct cqm_rmid_entry *entry;
 
@@ -162,7 +181,7 @@ static inline struct cqm_rmid_entry *__rmid_entry(int rmid)
  *
  * We expect to be called with cache_mutex held.
  */
-static int __get_rmid(void)
+static u32 __get_rmid(void)
 {
        struct cqm_rmid_entry *entry;
 
@@ -177,7 +196,7 @@ static int __get_rmid(void)
        return entry->rmid;
 }
 
-static void __put_rmid(unsigned int rmid)
+static void __put_rmid(u32 rmid)
 {
        struct cqm_rmid_entry *entry;
 
@@ -372,7 +391,7 @@ static bool __conflict_event(struct perf_event *a, struct perf_event *b)
 }
 
 struct rmid_read {
-       unsigned int rmid;
+       u32 rmid;
        atomic64_t value;
 };
 
@@ -381,12 +400,11 @@ static void __intel_cqm_event_count(void *info);
 /*
  * Exchange the RMID of a group of events.
  */
-static unsigned int
-intel_cqm_xchg_rmid(struct perf_event *group, unsigned int rmid)
+static u32 intel_cqm_xchg_rmid(struct perf_event *group, u32 rmid)
 {
        struct perf_event *event;
-       unsigned int old_rmid = group->hw.cqm_rmid;
        struct list_head *head = &group->hw.cqm_group_entry;
+       u32 old_rmid = group->hw.cqm_rmid;
 
        lockdep_assert_held(&cache_mutex);
 
@@ -451,7 +469,7 @@ static void intel_cqm_stable(void *arg)
  * If we have group events waiting for an RMID that don't conflict with
  * events already running, assign @rmid.
  */
-static bool intel_cqm_sched_in_event(unsigned int rmid)
+static bool intel_cqm_sched_in_event(u32 rmid)
 {
        struct perf_event *leader, *event;
 
@@ -598,7 +616,7 @@ static bool intel_cqm_rmid_stabilize(unsigned int *available)
 static void __intel_cqm_pick_and_rotate(struct perf_event *next)
 {
        struct perf_event *rotor;
-       unsigned int rmid;
+       u32 rmid;
 
        lockdep_assert_held(&cache_mutex);
 
@@ -626,7 +644,7 @@ static void __intel_cqm_pick_and_rotate(struct perf_event *next)
 static void intel_cqm_sched_out_conflicting_events(struct perf_event *event)
 {
        struct perf_event *group, *g;
-       unsigned int rmid;
+       u32 rmid;
 
        lockdep_assert_held(&cache_mutex);
 
@@ -828,8 +846,8 @@ static void intel_cqm_setup_event(struct perf_event *event,
                                  struct perf_event **group)
 {
        struct perf_event *iter;
-       unsigned int rmid;
        bool conflict = false;
+       u32 rmid;
 
        list_for_each_entry(iter, &cache_groups, hw.cqm_groups_entry) {
                rmid = iter->hw.cqm_rmid;
@@ -860,7 +878,7 @@ static void intel_cqm_setup_event(struct perf_event *event,
 static void intel_cqm_event_read(struct perf_event *event)
 {
        unsigned long flags;
-       unsigned int rmid;
+       u32 rmid;
        u64 val;
 
        /*
@@ -961,55 +979,48 @@ out:
 
 static void intel_cqm_event_start(struct perf_event *event, int mode)
 {
-       struct intel_cqm_state *state = this_cpu_ptr(&cqm_state);
-       unsigned int rmid = event->hw.cqm_rmid;
-       unsigned long flags;
+       struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
+       u32 rmid = event->hw.cqm_rmid;
 
        if (!(event->hw.cqm_state & PERF_HES_STOPPED))
                return;
 
        event->hw.cqm_state &= ~PERF_HES_STOPPED;
 
-       raw_spin_lock_irqsave(&state->lock, flags);
-
-       if (state->cnt++)
-               WARN_ON_ONCE(state->rmid != rmid);
-       else
+       if (state->rmid_usecnt++) {
+               if (!WARN_ON_ONCE(state->rmid != rmid))
+                       return;
+       } else {
                WARN_ON_ONCE(state->rmid);
+       }
 
        state->rmid = rmid;
-       wrmsrl(MSR_IA32_PQR_ASSOC, state->rmid);
-
-       raw_spin_unlock_irqrestore(&state->lock, flags);
+       wrmsr(MSR_IA32_PQR_ASSOC, rmid, state->closid);
 }
 
 static void intel_cqm_event_stop(struct perf_event *event, int mode)
 {
-       struct intel_cqm_state *state = this_cpu_ptr(&cqm_state);
-       unsigned long flags;
+       struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
 
        if (event->hw.cqm_state & PERF_HES_STOPPED)
                return;
 
        event->hw.cqm_state |= PERF_HES_STOPPED;
 
-       raw_spin_lock_irqsave(&state->lock, flags);
        intel_cqm_event_read(event);
 
-       if (!--state->cnt) {
+       if (!--state->rmid_usecnt) {
                state->rmid = 0;
-               wrmsrl(MSR_IA32_PQR_ASSOC, 0);
+               wrmsr(MSR_IA32_PQR_ASSOC, 0, state->closid);
        } else {
                WARN_ON_ONCE(!state->rmid);
        }
-
-       raw_spin_unlock_irqrestore(&state->lock, flags);
 }
 
 static int intel_cqm_event_add(struct perf_event *event, int mode)
 {
        unsigned long flags;
-       unsigned int rmid;
+       u32 rmid;
 
        raw_spin_lock_irqsave(&cache_lock, flags);
 
@@ -1024,11 +1035,6 @@ static int intel_cqm_event_add(struct perf_event *event, int mode)
        return 0;
 }
 
-static void intel_cqm_event_del(struct perf_event *event, int mode)
-{
-       intel_cqm_event_stop(event, mode);
-}
-
 static void intel_cqm_event_destroy(struct perf_event *event)
 {
        struct perf_event *group_other = NULL;
@@ -1057,7 +1063,7 @@ static void intel_cqm_event_destroy(struct perf_event *event)
                        list_replace(&event->hw.cqm_groups_entry,
                                     &group_other->hw.cqm_groups_entry);
                } else {
-                       unsigned int rmid = event->hw.cqm_rmid;
+                       u32 rmid = event->hw.cqm_rmid;
 
                        if (__rmid_valid(rmid))
                                __put_rmid(rmid);
@@ -1221,7 +1227,7 @@ static struct pmu intel_cqm_pmu = {
        .task_ctx_nr         = perf_sw_context,
        .event_init          = intel_cqm_event_init,
        .add                 = intel_cqm_event_add,
-       .del                 = intel_cqm_event_del,
+       .del                 = intel_cqm_event_stop,
        .start               = intel_cqm_event_start,
        .stop                = intel_cqm_event_stop,
        .read                = intel_cqm_event_read,
@@ -1243,12 +1249,12 @@ static inline void cqm_pick_event_reader(int cpu)
 
 static void intel_cqm_cpu_prepare(unsigned int cpu)
 {
-       struct intel_cqm_state *state = &per_cpu(cqm_state, cpu);
+       struct intel_pqr_state *state = &per_cpu(pqr_state, cpu);
        struct cpuinfo_x86 *c = &cpu_data(cpu);
 
-       raw_spin_lock_init(&state->lock);
        state->rmid = 0;
-       state->cnt  = 0;
+       state->closid = 0;
+       state->rmid_usecnt = 0;
 
        WARN_ON(c->x86_cache_max_rmid != cqm_max_rmid);
        WARN_ON(c->x86_cache_occ_scale != cqm_l3_scale);
index 813f75d..71fc402 100644 (file)
@@ -11,7 +11,7 @@
 #define BTS_RECORD_SIZE                24
 
 #define BTS_BUFFER_SIZE                (PAGE_SIZE << 4)
-#define PEBS_BUFFER_SIZE       PAGE_SIZE
+#define PEBS_BUFFER_SIZE       (PAGE_SIZE << 4)
 #define PEBS_FIXUP_SIZE                PAGE_SIZE
 
 /*
@@ -250,7 +250,7 @@ static int alloc_pebs_buffer(int cpu)
 {
        struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
        int node = cpu_to_node(cpu);
-       int max, thresh = 1; /* always use a single PEBS record */
+       int max;
        void *buffer, *ibuffer;
 
        if (!x86_pmu.pebs)
@@ -280,9 +280,6 @@ static int alloc_pebs_buffer(int cpu)
        ds->pebs_absolute_maximum = ds->pebs_buffer_base +
                max * x86_pmu.pebs_record_size;
 
-       ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
-               thresh * x86_pmu.pebs_record_size;
-
        return 0;
 }
 
@@ -549,6 +546,19 @@ int intel_pmu_drain_bts_buffer(void)
        return 1;
 }
 
+static inline void intel_pmu_drain_pebs_buffer(void)
+{
+       struct pt_regs regs;
+
+       x86_pmu.drain_pebs(&regs);
+}
+
+void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in)
+{
+       if (!sched_in)
+               intel_pmu_drain_pebs_buffer();
+}
+
 /*
  * PEBS
  */
@@ -684,33 +694,81 @@ struct event_constraint *intel_pebs_constraints(struct perf_event *event)
        return &emptyconstraint;
 }
 
+static inline bool pebs_is_enabled(struct cpu_hw_events *cpuc)
+{
+       return (cpuc->pebs_enabled & ((1ULL << MAX_PEBS_EVENTS) - 1));
+}
+
 void intel_pmu_pebs_enable(struct perf_event *event)
 {
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        struct hw_perf_event *hwc = &event->hw;
+       struct debug_store *ds = cpuc->ds;
+       bool first_pebs;
+       u64 threshold;
 
        hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
 
+       first_pebs = !pebs_is_enabled(cpuc);
        cpuc->pebs_enabled |= 1ULL << hwc->idx;
 
        if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
                cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
        else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
                cpuc->pebs_enabled |= 1ULL << 63;
+
+       /*
+        * When the event is constrained enough we can use a larger
+        * threshold and run the event with less frequent PMI.
+        */
+       if (hwc->flags & PERF_X86_EVENT_FREERUNNING) {
+               threshold = ds->pebs_absolute_maximum -
+                       x86_pmu.max_pebs_events * x86_pmu.pebs_record_size;
+
+               if (first_pebs)
+                       perf_sched_cb_inc(event->ctx->pmu);
+       } else {
+               threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size;
+
+               /*
+                * If not all events can use larger buffer,
+                * roll back to threshold = 1
+                */
+               if (!first_pebs &&
+                   (ds->pebs_interrupt_threshold > threshold))
+                       perf_sched_cb_dec(event->ctx->pmu);
+       }
+
+       /* Use auto-reload if possible to save a MSR write in the PMI */
+       if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
+               ds->pebs_event_reset[hwc->idx] =
+                       (u64)(-hwc->sample_period) & x86_pmu.cntval_mask;
+       }
+
+       if (first_pebs || ds->pebs_interrupt_threshold > threshold)
+               ds->pebs_interrupt_threshold = threshold;
 }
 
 void intel_pmu_pebs_disable(struct perf_event *event)
 {
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        struct hw_perf_event *hwc = &event->hw;
+       struct debug_store *ds = cpuc->ds;
 
        cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
 
-       if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_LDLAT)
+       if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
                cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
-       else if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_ST)
+       else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
                cpuc->pebs_enabled &= ~(1ULL << 63);
 
+       if (ds->pebs_interrupt_threshold >
+           ds->pebs_buffer_base + x86_pmu.pebs_record_size) {
+               intel_pmu_drain_pebs_buffer();
+               if (!pebs_is_enabled(cpuc))
+                       perf_sched_cb_dec(event->ctx->pmu);
+       }
+
        if (cpuc->enabled)
                wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
 
@@ -846,8 +904,10 @@ static inline u64 intel_hsw_transaction(struct pebs_record_hsw *pebs)
        return txn;
 }
 
-static void __intel_pmu_pebs_event(struct perf_event *event,
-                                  struct pt_regs *iregs, void *__pebs)
+static void setup_pebs_sample_data(struct perf_event *event,
+                                  struct pt_regs *iregs, void *__pebs,
+                                  struct perf_sample_data *data,
+                                  struct pt_regs *regs)
 {
 #define PERF_X86_EVENT_PEBS_HSW_PREC \
                (PERF_X86_EVENT_PEBS_ST_HSW | \
@@ -859,13 +919,11 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
         */
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        struct pebs_record_hsw *pebs = __pebs;
-       struct perf_sample_data data;
-       struct pt_regs regs;
        u64 sample_type;
        int fll, fst, dsrc;
        int fl = event->hw.flags;
 
-       if (!intel_pmu_save_and_restart(event))
+       if (pebs == NULL)
                return;
 
        sample_type = event->attr.sample_type;
@@ -874,15 +932,15 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
        fll = fl & PERF_X86_EVENT_PEBS_LDLAT;
        fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC);
 
-       perf_sample_data_init(&data, 0, event->hw.last_period);
+       perf_sample_data_init(data, 0, event->hw.last_period);
 
-       data.period = event->hw.last_period;
+       data->period = event->hw.last_period;
 
        /*
         * Use latency for weight (only avail with PEBS-LL)
         */
        if (fll && (sample_type & PERF_SAMPLE_WEIGHT))
-               data.weight = pebs->lat;
+               data->weight = pebs->lat;
 
        /*
         * data.data_src encodes the data source
@@ -895,7 +953,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
                        val = precise_datala_hsw(event, pebs->dse);
                else if (fst)
                        val = precise_store_data(pebs->dse);
-               data.data_src.val = val;
+               data->data_src.val = val;
        }
 
        /*
@@ -908,61 +966,123 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
         * PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly.
         * A possible PERF_SAMPLE_REGS will have to transfer all regs.
         */
-       regs = *iregs;
-       regs.flags = pebs->flags;
-       set_linear_ip(&regs, pebs->ip);
-       regs.bp = pebs->bp;
-       regs.sp = pebs->sp;
+       *regs = *iregs;
+       regs->flags = pebs->flags;
+       set_linear_ip(regs, pebs->ip);
+       regs->bp = pebs->bp;
+       regs->sp = pebs->sp;
 
        if (sample_type & PERF_SAMPLE_REGS_INTR) {
-               regs.ax = pebs->ax;
-               regs.bx = pebs->bx;
-               regs.cx = pebs->cx;
-               regs.dx = pebs->dx;
-               regs.si = pebs->si;
-               regs.di = pebs->di;
-               regs.bp = pebs->bp;
-               regs.sp = pebs->sp;
-
-               regs.flags = pebs->flags;
+               regs->ax = pebs->ax;
+               regs->bx = pebs->bx;
+               regs->cx = pebs->cx;
+               regs->dx = pebs->dx;
+               regs->si = pebs->si;
+               regs->di = pebs->di;
+               regs->bp = pebs->bp;
+               regs->sp = pebs->sp;
+
+               regs->flags = pebs->flags;
 #ifndef CONFIG_X86_32
-               regs.r8 = pebs->r8;
-               regs.r9 = pebs->r9;
-               regs.r10 = pebs->r10;
-               regs.r11 = pebs->r11;
-               regs.r12 = pebs->r12;
-               regs.r13 = pebs->r13;
-               regs.r14 = pebs->r14;
-               regs.r15 = pebs->r15;
+               regs->r8 = pebs->r8;
+               regs->r9 = pebs->r9;
+               regs->r10 = pebs->r10;
+               regs->r11 = pebs->r11;
+               regs->r12 = pebs->r12;
+               regs->r13 = pebs->r13;
+               regs->r14 = pebs->r14;
+               regs->r15 = pebs->r15;
 #endif
        }
 
        if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
-               regs.ip = pebs->real_ip;
-               regs.flags |= PERF_EFLAGS_EXACT;
-       } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs))
-               regs.flags |= PERF_EFLAGS_EXACT;
+               regs->ip = pebs->real_ip;
+               regs->flags |= PERF_EFLAGS_EXACT;
+       } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(regs))
+               regs->flags |= PERF_EFLAGS_EXACT;
        else
-               regs.flags &= ~PERF_EFLAGS_EXACT;
+               regs->flags &= ~PERF_EFLAGS_EXACT;
 
        if ((sample_type & PERF_SAMPLE_ADDR) &&
            x86_pmu.intel_cap.pebs_format >= 1)
-               data.addr = pebs->dla;
+               data->addr = pebs->dla;
 
        if (x86_pmu.intel_cap.pebs_format >= 2) {
                /* Only set the TSX weight when no memory weight. */
                if ((sample_type & PERF_SAMPLE_WEIGHT) && !fll)
-                       data.weight = intel_hsw_weight(pebs);
+                       data->weight = intel_hsw_weight(pebs);
 
                if (sample_type & PERF_SAMPLE_TRANSACTION)
-                       data.txn = intel_hsw_transaction(pebs);
+                       data->txn = intel_hsw_transaction(pebs);
        }
 
        if (has_branch_stack(event))
-               data.br_stack = &cpuc->lbr_stack;
+               data->br_stack = &cpuc->lbr_stack;
+}
+
+static inline void *
+get_next_pebs_record_by_bit(void *base, void *top, int bit)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       void *at;
+       u64 pebs_status;
+
+       if (base == NULL)
+               return NULL;
+
+       for (at = base; at < top; at += x86_pmu.pebs_record_size) {
+               struct pebs_record_nhm *p = at;
 
-       if (perf_event_overflow(event, &data, &regs))
+               if (test_bit(bit, (unsigned long *)&p->status)) {
+                       /* PEBS v3 has accurate status bits */
+                       if (x86_pmu.intel_cap.pebs_format >= 3)
+                               return at;
+
+                       if (p->status == (1 << bit))
+                               return at;
+
+                       /* clear non-PEBS bit and re-check */
+                       pebs_status = p->status & cpuc->pebs_enabled;
+                       pebs_status &= (1ULL << MAX_PEBS_EVENTS) - 1;
+                       if (pebs_status == (1 << bit))
+                               return at;
+               }
+       }
+       return NULL;
+}
+
+static void __intel_pmu_pebs_event(struct perf_event *event,
+                                  struct pt_regs *iregs,
+                                  void *base, void *top,
+                                  int bit, int count)
+{
+       struct perf_sample_data data;
+       struct pt_regs regs;
+       void *at = get_next_pebs_record_by_bit(base, top, bit);
+
+       if (!intel_pmu_save_and_restart(event) &&
+           !(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD))
+               return;
+
+       while (count > 1) {
+               setup_pebs_sample_data(event, iregs, at, &data, &regs);
+               perf_event_output(event, &data, &regs);
+               at += x86_pmu.pebs_record_size;
+               at = get_next_pebs_record_by_bit(at, top, bit);
+               count--;
+       }
+
+       setup_pebs_sample_data(event, iregs, at, &data, &regs);
+
+       /*
+        * All but the last records are processed.
+        * The last one is left to be able to call the overflow handler.
+        */
+       if (perf_event_overflow(event, &data, &regs)) {
                x86_pmu_stop(event, 0);
+               return;
+       }
+
 }
 
 static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
@@ -992,72 +1112,99 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
        if (!event->attr.precise_ip)
                return;
 
-       n = top - at;
+       n = (top - at) / x86_pmu.pebs_record_size;
        if (n <= 0)
                return;
 
-       /*
-        * Should not happen, we program the threshold at 1 and do not
-        * set a reset value.
-        */
-       WARN_ONCE(n > 1, "bad leftover pebs %d\n", n);
-       at += n - 1;
-
-       __intel_pmu_pebs_event(event, iregs, at);
+       __intel_pmu_pebs_event(event, iregs, at, top, 0, n);
 }
 
 static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
 {
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        struct debug_store *ds = cpuc->ds;
-       struct perf_event *event = NULL;
-       void *at, *top;
-       u64 status = 0;
-       int bit;
+       struct perf_event *event;
+       void *base, *at, *top;
+       short counts[MAX_PEBS_EVENTS] = {};
+       short error[MAX_PEBS_EVENTS] = {};
+       int bit, i;
 
        if (!x86_pmu.pebs_active)
                return;
 
-       at  = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
+       base = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
        top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
 
        ds->pebs_index = ds->pebs_buffer_base;
 
-       if (unlikely(at > top))
+       if (unlikely(base >= top))
                return;
 
-       /*
-        * Should not happen, we program the threshold at 1 and do not
-        * set a reset value.
-        */
-       WARN_ONCE(top - at > x86_pmu.max_pebs_events * x86_pmu.pebs_record_size,
-                 "Unexpected number of pebs records %ld\n",
-                 (long)(top - at) / x86_pmu.pebs_record_size);
-
-       for (; at < top; at += x86_pmu.pebs_record_size) {
+       for (at = base; at < top; at += x86_pmu.pebs_record_size) {
                struct pebs_record_nhm *p = at;
 
-               for_each_set_bit(bit, (unsigned long *)&p->status,
-                                x86_pmu.max_pebs_events) {
-                       event = cpuc->events[bit];
-                       if (!test_bit(bit, cpuc->active_mask))
-                               continue;
-
-                       WARN_ON_ONCE(!event);
+               /* PEBS v3 has accurate status bits */
+               if (x86_pmu.intel_cap.pebs_format >= 3) {
+                       for_each_set_bit(bit, (unsigned long *)&p->status,
+                                        MAX_PEBS_EVENTS)
+                               counts[bit]++;
 
-                       if (!event->attr.precise_ip)
-                               continue;
+                       continue;
+               }
 
-                       if (__test_and_set_bit(bit, (unsigned long *)&status))
+               bit = find_first_bit((unsigned long *)&p->status,
+                                       x86_pmu.max_pebs_events);
+               if (bit >= x86_pmu.max_pebs_events)
+                       continue;
+               if (!test_bit(bit, cpuc->active_mask))
+                       continue;
+               /*
+                * The PEBS hardware does not deal well with the situation
+                * when events happen near to each other and multiple bits
+                * are set. But it should happen rarely.
+                *
+                * If these events include one PEBS and multiple non-PEBS
+                * events, it doesn't impact PEBS record. The record will
+                * be handled normally. (slow path)
+                *
+                * If these events include two or more PEBS events, the
+                * records for the events can be collapsed into a single
+                * one, and it's not possible to reconstruct all events
+                * that caused the PEBS record. It's called collision.
+                * If collision happened, the record will be dropped.
+                *
+                */
+               if (p->status != (1 << bit)) {
+                       u64 pebs_status;
+
+                       /* slow path */
+                       pebs_status = p->status & cpuc->pebs_enabled;
+                       pebs_status &= (1ULL << MAX_PEBS_EVENTS) - 1;
+                       if (pebs_status != (1 << bit)) {
+                               for_each_set_bit(i, (unsigned long *)&pebs_status,
+                                                MAX_PEBS_EVENTS)
+                                       error[i]++;
                                continue;
-
-                       break;
+                       }
                }
+               counts[bit]++;
+       }
 
-               if (!event || bit >= x86_pmu.max_pebs_events)
+       for (bit = 0; bit < x86_pmu.max_pebs_events; bit++) {
+               if ((counts[bit] == 0) && (error[bit] == 0))
                        continue;
+               event = cpuc->events[bit];
+               WARN_ON_ONCE(!event);
+               WARN_ON_ONCE(!event->attr.precise_ip);
 
-               __intel_pmu_pebs_event(event, iregs, at);
+               /* log dropped samples number */
+               if (error[bit])
+                       perf_log_lost_samples(event, error[bit]);
+
+               if (counts[bit]) {
+                       __intel_pmu_pebs_event(event, iregs, base,
+                                              top, bit, counts[bit]);
+               }
        }
 }
 
index 94e5b50..452a7bd 100644 (file)
@@ -96,6 +96,7 @@ enum {
        X86_BR_NO_TX            = 1 << 14,/* not in transaction */
        X86_BR_ZERO_CALL        = 1 << 15,/* zero length call */
        X86_BR_CALL_STACK       = 1 << 16,/* call stack */
+       X86_BR_IND_JMP          = 1 << 17,/* indirect jump */
 };
 
 #define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
@@ -113,6 +114,7 @@ enum {
         X86_BR_IRQ      |\
         X86_BR_ABORT    |\
         X86_BR_IND_CALL |\
+        X86_BR_IND_JMP  |\
         X86_BR_ZERO_CALL)
 
 #define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
@@ -262,9 +264,6 @@ void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        struct x86_perf_task_context *task_ctx;
 
-       if (!x86_pmu.lbr_nr)
-               return;
-
        /*
         * If LBR callstack feature is enabled and the stack was saved when
         * the task was scheduled out, restore the stack. Otherwise flush
@@ -523,6 +522,9 @@ static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
                        X86_BR_CALL_STACK;
        }
 
+       if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP)
+               mask |= X86_BR_IND_JMP;
+
        /*
         * stash actual user request into reg, it may
         * be used by fixup code for some CPU
@@ -736,7 +738,7 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
                        break;
                case 4:
                case 5:
-                       ret = X86_BR_JMP;
+                       ret = X86_BR_IND_JMP;
                        break;
                }
                break;
@@ -844,6 +846,7 @@ static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
         */
        [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL | LBR_IND_JMP,
        [PERF_SAMPLE_BRANCH_COND_SHIFT]     = LBR_JCC,
+       [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
 };
 
 static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
@@ -856,6 +859,7 @@ static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
                                                | LBR_FAR,
        [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT]     = LBR_IND_CALL,
        [PERF_SAMPLE_BRANCH_COND_SHIFT]         = LBR_JCC,
+       [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT]     = LBR_IND_JMP,
 };
 
 static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
@@ -870,6 +874,7 @@ static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
        [PERF_SAMPLE_BRANCH_COND_SHIFT]         = LBR_JCC,
        [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT]   = LBR_REL_CALL | LBR_IND_CALL
                                                | LBR_RETURN | LBR_CALL_STACK,
+       [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT]     = LBR_IND_JMP,
 };
 
 /* core */
index ffe666c..159887c 100644 (file)
@@ -151,7 +151,7 @@ static int __init pt_pmu_hw_init(void)
 
                de_attr->attr.attr.name = pt_caps[i].name;
 
-               sysfs_attr_init(&de_attrs->attr.attr);
+               sysfs_attr_init(&de_attr->attr.attr);
 
                de_attr->attr.attr.mode         = S_IRUGO;
                de_attr->attr.show              = pt_cap_show;
@@ -187,15 +187,6 @@ static bool pt_event_valid(struct perf_event *event)
  * These all are cpu affine and operate on a local PT
  */
 
-static bool pt_is_running(void)
-{
-       u64 ctl;
-
-       rdmsrl(MSR_IA32_RTIT_CTL, ctl);
-
-       return !!(ctl & RTIT_CTL_TRACEEN);
-}
-
 static void pt_config(struct perf_event *event)
 {
        u64 reg;
@@ -609,16 +600,19 @@ static unsigned int pt_topa_next_entry(struct pt_buffer *buf, unsigned int pg)
  * @handle:    Current output handle.
  *
  * Place INT and STOP marks to prevent overwriting old data that the consumer
- * hasn't yet collected.
+ * hasn't yet collected and waking up the consumer after a certain fraction of
+ * the buffer has filled up. Only needed and sensible for non-snapshot counters.
+ *
+ * This obviously relies on buf::head to figure out buffer markers, so it has
+ * to be called after pt_buffer_reset_offsets() and before the hardware tracing
+ * is enabled.
  */
 static int pt_buffer_reset_markers(struct pt_buffer *buf,
                                   struct perf_output_handle *handle)
 
 {
-       unsigned long idx, npages, end;
-
-       if (buf->snapshot)
-               return 0;
+       unsigned long head = local64_read(&buf->head);
+       unsigned long idx, npages, wakeup;
 
        /* can't stop in the middle of an output region */
        if (buf->output_off + handle->size + 1 <
@@ -634,17 +628,26 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf,
        buf->topa_index[buf->stop_pos]->stop = 0;
        buf->topa_index[buf->intr_pos]->intr = 0;
 
-       if (pt_cap_get(PT_CAP_topa_multiple_entries)) {
-               npages = (handle->size + 1) >> PAGE_SHIFT;
-               end = (local64_read(&buf->head) >> PAGE_SHIFT) + npages;
-               /*if (end > handle->wakeup >> PAGE_SHIFT)
-                 end = handle->wakeup >> PAGE_SHIFT;*/
-               idx = end & (buf->nr_pages - 1);
-               buf->stop_pos = idx;
-               idx = (local64_read(&buf->head) >> PAGE_SHIFT) + npages - 1;
-               idx &= buf->nr_pages - 1;
-               buf->intr_pos = idx;
-       }
+       /* how many pages till the STOP marker */
+       npages = handle->size >> PAGE_SHIFT;
+
+       /* if it's on a page boundary, fill up one more page */
+       if (!offset_in_page(head + handle->size + 1))
+               npages++;
+
+       idx = (head >> PAGE_SHIFT) + npages;
+       idx &= buf->nr_pages - 1;
+       buf->stop_pos = idx;
+
+       wakeup = handle->wakeup >> PAGE_SHIFT;
+
+       /* in the worst case, wake up the consumer one page before hard stop */
+       idx = (head >> PAGE_SHIFT) + npages - 1;
+       if (idx > wakeup)
+               idx = wakeup;
+
+       idx &= buf->nr_pages - 1;
+       buf->intr_pos = idx;
 
        buf->topa_index[buf->stop_pos]->stop = 1;
        buf->topa_index[buf->intr_pos]->intr = 1;
@@ -664,7 +667,7 @@ static void pt_buffer_setup_topa_index(struct pt_buffer *buf)
        struct topa *cur = buf->first, *prev = buf->last;
        struct topa_entry *te_cur = TOPA_ENTRY(cur, 0),
                *te_prev = TOPA_ENTRY(prev, prev->last - 1);
-       int pg = 0, idx = 0, ntopa = 0;
+       int pg = 0, idx = 0;
 
        while (pg < buf->nr_pages) {
                int tidx;
@@ -679,9 +682,9 @@ static void pt_buffer_setup_topa_index(struct pt_buffer *buf)
                        /* advance to next topa table */
                        idx = 0;
                        cur = list_entry(cur->list.next, struct topa, list);
-                       ntopa++;
-               } else
+               } else {
                        idx++;
+               }
                te_cur = TOPA_ENTRY(cur, idx);
        }
 
@@ -693,7 +696,14 @@ static void pt_buffer_setup_topa_index(struct pt_buffer *buf)
  * @head:      Write pointer (aux_head) from AUX buffer.
  *
  * Find the ToPA table and entry corresponding to given @head and set buffer's
- * "current" pointers accordingly.
+ * "current" pointers accordingly. This is done after we have obtained the
+ * current aux_head position from a successful call to perf_aux_output_begin()
+ * to make sure the hardware is writing to the right place.
+ *
+ * This function modifies buf::{cur,cur_idx,output_off} that will be programmed
+ * into PT msrs when the tracing is enabled and buf::head and buf::data_size,
+ * which are used to determine INT and STOP markers' locations by a subsequent
+ * call to pt_buffer_reset_markers().
  */
 static void pt_buffer_reset_offsets(struct pt_buffer *buf, unsigned long head)
 {
@@ -891,6 +901,7 @@ void intel_pt_interrupt(void)
                }
 
                pt_buffer_reset_offsets(buf, pt->handle.head);
+               /* snapshot counters don't use PMI, so it's safe */
                ret = pt_buffer_reset_markers(buf, &pt->handle);
                if (ret) {
                        perf_aux_output_end(&pt->handle, 0, true);
@@ -913,7 +924,7 @@ static void pt_event_start(struct perf_event *event, int mode)
        struct pt *pt = this_cpu_ptr(&pt_ctx);
        struct pt_buffer *buf = perf_get_aux(&pt->handle);
 
-       if (pt_is_running() || !buf || pt_buffer_is_full(buf, pt)) {
+       if (!buf || pt_buffer_is_full(buf, pt)) {
                event->hw.state = PERF_HES_STOPPED;
                return;
        }
@@ -944,7 +955,6 @@ static void pt_event_stop(struct perf_event *event, int mode)
        event->hw.state = PERF_HES_STOPPED;
 
        if (mode & PERF_EF_UPDATE) {
-               struct pt *pt = this_cpu_ptr(&pt_ctx);
                struct pt_buffer *buf = perf_get_aux(&pt->handle);
 
                if (!buf)
index 999289b..358c54a 100644 (file)
@@ -722,6 +722,7 @@ static int __init rapl_pmu_init(void)
                break;
        case 60: /* Haswell */
        case 69: /* Haswell-Celeron */
+       case 61: /* Broadwell */
                rapl_cntr_mask = RAPL_IDX_HSW;
                rapl_pmu_events_group.attrs = rapl_events_hsw_attr;
                break;
index c635b8b..7c1de16 100644 (file)
@@ -365,9 +365,8 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int
        bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
 
        for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
-               hwc = &box->event_list[i]->hw;
                c = uncore_get_event_constraint(box, box->event_list[i]);
-               hwc->constraint = c;
+               box->event_constraint[i] = c;
                wmin = min(wmin, c->weight);
                wmax = max(wmax, c->weight);
        }
@@ -375,7 +374,7 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int
        /* fastpath, try to reuse previous register */
        for (i = 0; i < n; i++) {
                hwc = &box->event_list[i]->hw;
-               c = hwc->constraint;
+               c = box->event_constraint[i];
 
                /* never assigned */
                if (hwc->idx == -1)
@@ -395,8 +394,8 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int
        }
        /* slow path */
        if (i != n)
-               ret = perf_assign_events(box->event_list, n,
-                                        wmin, wmax, assign);
+               ret = perf_assign_events(box->event_constraint, n,
+                                        wmin, wmax, n, assign);
 
        if (!assign || ret) {
                for (i = 0; i < n; i++)
@@ -840,6 +839,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
        box->phys_id = phys_id;
        box->pci_dev = pdev;
        box->pmu = pmu;
+       uncore_box_init(box);
        pci_set_drvdata(pdev, box);
 
        raw_spin_lock(&uncore_box_lock);
@@ -922,6 +922,9 @@ static int __init uncore_pci_init(void)
        case 69: /* Haswell Celeron */
                ret = hsw_uncore_pci_init();
                break;
+       case 61: /* Broadwell */
+               ret = bdw_uncore_pci_init();
+               break;
        default:
                return 0;
        }
@@ -1003,8 +1006,10 @@ static int uncore_cpu_starting(int cpu)
                        pmu = &type->pmus[j];
                        box = *per_cpu_ptr(pmu->box, cpu);
                        /* called by uncore_cpu_init? */
-                       if (box && box->phys_id >= 0)
+                       if (box && box->phys_id >= 0) {
+                               uncore_box_init(box);
                                continue;
+                       }
 
                        for_each_online_cpu(k) {
                                exist = *per_cpu_ptr(pmu->box, k);
@@ -1020,8 +1025,10 @@ static int uncore_cpu_starting(int cpu)
                                }
                        }
 
-                       if (box)
+                       if (box) {
                                box->phys_id = phys_id;
+                               uncore_box_init(box);
+                       }
                }
        }
        return 0;
index 6c8c1e7..0f77f0a 100644 (file)
@@ -97,6 +97,7 @@ struct intel_uncore_box {
        atomic_t refcnt;
        struct perf_event *events[UNCORE_PMC_IDX_MAX];
        struct perf_event *event_list[UNCORE_PMC_IDX_MAX];
+       struct event_constraint *event_constraint[UNCORE_PMC_IDX_MAX];
        unsigned long active_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
        u64 tags[UNCORE_PMC_IDX_MAX];
        struct pci_dev *pci_dev;
@@ -257,14 +258,6 @@ static inline int uncore_num_counters(struct intel_uncore_box *box)
        return box->pmu->type->num_counters;
 }
 
-static inline void uncore_box_init(struct intel_uncore_box *box)
-{
-       if (!test_and_set_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) {
-               if (box->pmu->type->ops->init_box)
-                       box->pmu->type->ops->init_box(box);
-       }
-}
-
 static inline void uncore_disable_box(struct intel_uncore_box *box)
 {
        if (box->pmu->type->ops->disable_box)
@@ -273,8 +266,6 @@ static inline void uncore_disable_box(struct intel_uncore_box *box)
 
 static inline void uncore_enable_box(struct intel_uncore_box *box)
 {
-       uncore_box_init(box);
-
        if (box->pmu->type->ops->enable_box)
                box->pmu->type->ops->enable_box(box);
 }
@@ -297,6 +288,14 @@ static inline u64 uncore_read_counter(struct intel_uncore_box *box,
        return box->pmu->type->ops->read_counter(box, event);
 }
 
+static inline void uncore_box_init(struct intel_uncore_box *box)
+{
+       if (!test_and_set_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) {
+               if (box->pmu->type->ops->init_box)
+                       box->pmu->type->ops->init_box(box);
+       }
+}
+
 static inline bool uncore_box_is_fake(struct intel_uncore_box *box)
 {
        return (box->phys_id < 0);
@@ -326,6 +325,7 @@ extern struct event_constraint uncore_constraint_empty;
 int snb_uncore_pci_init(void);
 int ivb_uncore_pci_init(void);
 int hsw_uncore_pci_init(void);
+int bdw_uncore_pci_init(void);
 void snb_uncore_cpu_init(void);
 void nhm_uncore_cpu_init(void);
 
index 3001015..b005a78 100644 (file)
@@ -1,6 +1,14 @@
 /* Nehalem/SandBridge/Haswell uncore support */
 #include "perf_event_intel_uncore.h"
 
+/* Uncore IMC PCI IDs */
+#define PCI_DEVICE_ID_INTEL_SNB_IMC    0x0100
+#define PCI_DEVICE_ID_INTEL_IVB_IMC    0x0154
+#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150
+#define PCI_DEVICE_ID_INTEL_HSW_IMC    0x0c00
+#define PCI_DEVICE_ID_INTEL_HSW_U_IMC  0x0a04
+#define PCI_DEVICE_ID_INTEL_BDW_IMC    0x1604
+
 /* SNB event control */
 #define SNB_UNC_CTL_EV_SEL_MASK                        0x000000ff
 #define SNB_UNC_CTL_UMASK_MASK                 0x0000ff00
@@ -472,6 +480,18 @@ static const struct pci_device_id hsw_uncore_pci_ids[] = {
                PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC),
                .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
        },
+       { /* IMC */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_U_IMC),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+       },
+       { /* end: all zeroes */ },
+};
+
+static const struct pci_device_id bdw_uncore_pci_ids[] = {
+       { /* IMC */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BDW_IMC),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+       },
        { /* end: all zeroes */ },
 };
 
@@ -490,6 +510,11 @@ static struct pci_driver hsw_uncore_pci_driver = {
        .id_table       = hsw_uncore_pci_ids,
 };
 
+static struct pci_driver bdw_uncore_pci_driver = {
+       .name           = "bdw_uncore",
+       .id_table       = bdw_uncore_pci_ids,
+};
+
 struct imc_uncore_pci_dev {
        __u32 pci_id;
        struct pci_driver *driver;
@@ -502,6 +527,8 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
        IMC_DEV(IVB_IMC, &ivb_uncore_pci_driver),    /* 3rd Gen Core processor */
        IMC_DEV(IVB_E3_IMC, &ivb_uncore_pci_driver), /* Xeon E3-1200 v2/3rd Gen Core processor */
        IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver),    /* 4th Gen Core Processor */
+       IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver),  /* 4th Gen Core ULT Mobile Processor */
+       IMC_DEV(BDW_IMC, &bdw_uncore_pci_driver),    /* 5th Gen Core U */
        {  /* end marker */ }
 };
 
@@ -549,6 +576,11 @@ int hsw_uncore_pci_init(void)
        return imc_uncore_pci_init();
 }
 
+int bdw_uncore_pci_init(void)
+{
+       return imc_uncore_pci_init();
+}
+
 /* end of Sandy Bridge uncore support */
 
 /* Nehalem uncore support */
index 12d9548..6d6e85d 100644 (file)
                                ((1ULL << (n)) - 1)))
 
 /* Haswell-EP Ubox */
-#define HSWEP_U_MSR_PMON_CTR0                  0x705
-#define HSWEP_U_MSR_PMON_CTL0                  0x709
+#define HSWEP_U_MSR_PMON_CTR0                  0x709
+#define HSWEP_U_MSR_PMON_CTL0                  0x705
 #define HSWEP_U_MSR_PMON_FILTER                        0x707
 
 #define HSWEP_U_MSR_PMON_UCLK_FIXED_CTL                0x703
@@ -1914,7 +1914,7 @@ static struct intel_uncore_type hswep_uncore_cbox = {
        .name                   = "cbox",
        .num_counters           = 4,
        .num_boxes              = 18,
-       .perf_ctr_bits          = 44,
+       .perf_ctr_bits          = 48,
        .event_ctl              = HSWEP_C0_MSR_PMON_CTL0,
        .perf_ctr               = HSWEP_C0_MSR_PMON_CTR0,
        .event_mask             = SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK,
index e7d8c76..18ca99f 100644 (file)
@@ -12,7 +12,8 @@ static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c,
 {
 #ifdef CONFIG_SMP
        seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
-       seq_printf(m, "siblings\t: %d\n", cpumask_weight(cpu_core_mask(cpu)));
+       seq_printf(m, "siblings\t: %d\n",
+                  cpumask_weight(topology_core_cpumask(cpu)));
        seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
        seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
        seq_printf(m, "apicid\t\t: %d\n", c->apicid);
index 2b55ee6..5a46681 100644 (file)
@@ -167,7 +167,7 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
        clear_bss();
 
        for (i = 0; i < NUM_EXCEPTION_VECTORS; i++)
-               set_intr_gate(i, early_idt_handlers[i]);
+               set_intr_gate(i, early_idt_handler_array[i]);
        load_idt((const struct desc_ptr *)&idt_descr);
 
        copy_bootdata(__va(real_mode_data));
index d031bad..53eeb22 100644 (file)
@@ -478,21 +478,22 @@ is486:
 __INIT
 setup_once:
        /*
-        * Set up a idt with 256 entries pointing to ignore_int,
-        * interrupt gates. It doesn't actually load idt - that needs
-        * to be done on each CPU. Interrupts are enabled elsewhere,
-        * when we can be relatively sure everything is ok.
+        * Set up a idt with 256 interrupt gates that push zero if there
+        * is no error code and then jump to early_idt_handler_common.
+        * It doesn't actually load the idt - that needs to be done on
+        * each CPU. Interrupts are enabled elsewhere, when we can be
+        * relatively sure everything is ok.
         */
 
        movl $idt_table,%edi
-       movl $early_idt_handlers,%eax
+       movl $early_idt_handler_array,%eax
        movl $NUM_EXCEPTION_VECTORS,%ecx
 1:
        movl %eax,(%edi)
        movl %eax,4(%edi)
        /* interrupt gate, dpl=0, present */
        movl $(0x8E000000 + __KERNEL_CS),2(%edi)
-       addl $9,%eax
+       addl $EARLY_IDT_HANDLER_SIZE,%eax
        addl $8,%edi
        loop 1b
 
@@ -524,26 +525,28 @@ setup_once:
        andl $0,setup_once_ref  /* Once is enough, thanks */
        ret
 
-ENTRY(early_idt_handlers)
+ENTRY(early_idt_handler_array)
        # 36(%esp) %eflags
        # 32(%esp) %cs
        # 28(%esp) %eip
        # 24(%rsp) error code
        i = 0
        .rept NUM_EXCEPTION_VECTORS
-       .if (EXCEPTION_ERRCODE_MASK >> i) & 1
-       ASM_NOP2
-       .else
+       .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1
        pushl $0                # Dummy error code, to make stack frame uniform
        .endif
        pushl $i                # 20(%esp) Vector number
-       jmp early_idt_handler
+       jmp early_idt_handler_common
        i = i + 1
+       .fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
        .endr
-ENDPROC(early_idt_handlers)
+ENDPROC(early_idt_handler_array)
        
-       /* This is global to keep gas from relaxing the jumps */
-ENTRY(early_idt_handler)
+early_idt_handler_common:
+       /*
+        * The stack is the hardware frame, an error code or zero, and the
+        * vector number.
+        */
        cld
 
        cmpl $2,(%esp)          # X86_TRAP_NMI
@@ -603,7 +606,7 @@ ex_entry:
 is_nmi:
        addl $8,%esp            /* drop vector number and error code */
        iret
-ENDPROC(early_idt_handler)
+ENDPROC(early_idt_handler_common)
 
 /* This is the default interrupt "handler" :-) */
        ALIGN
index ae6588b..df7e780 100644 (file)
@@ -321,26 +321,28 @@ bad_address:
        jmp bad_address
 
        __INIT
-       .globl early_idt_handlers
-early_idt_handlers:
+ENTRY(early_idt_handler_array)
        # 104(%rsp) %rflags
        #  96(%rsp) %cs
        #  88(%rsp) %rip
        #  80(%rsp) error code
        i = 0
        .rept NUM_EXCEPTION_VECTORS
-       .if (EXCEPTION_ERRCODE_MASK >> i) & 1
-       ASM_NOP2
-       .else
+       .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1
        pushq $0                # Dummy error code, to make stack frame uniform
        .endif
        pushq $i                # 72(%rsp) Vector number
-       jmp early_idt_handler
+       jmp early_idt_handler_common
        i = i + 1
+       .fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
        .endr
+ENDPROC(early_idt_handler_array)
 
-/* This is global to keep gas from relaxing the jumps */
-ENTRY(early_idt_handler)
+early_idt_handler_common:
+       /*
+        * The stack is the hardware frame, an error code or zero, and the
+        * vector number.
+        */
        cld
 
        cmpl $2,(%rsp)          # X86_TRAP_NMI
@@ -412,7 +414,7 @@ ENTRY(early_idt_handler)
 is_nmi:
        addq $16,%rsp           # drop vector number and error code
        INTERRUPT_RETURN
-ENDPROC(early_idt_handler)
+ENDPROC(early_idt_handler_common)
 
        __INITDATA
 
index 05fd74f..64341aa 100644 (file)
@@ -40,7 +40,5 @@ EXPORT_SYMBOL(empty_zero_page);
 
 #ifdef CONFIG_PREEMPT
 EXPORT_SYMBOL(___preempt_schedule);
-#ifdef CONFIG_CONTEXT_TRACKING
-EXPORT_SYMBOL(___preempt_schedule_context);
-#endif
+EXPORT_SYMBOL(___preempt_schedule_notrace);
 #endif
index 0091832..6185d31 100644 (file)
@@ -173,6 +173,21 @@ static void init_thread_xstate(void)
                xstate_size = sizeof(struct i387_fxsave_struct);
        else
                xstate_size = sizeof(struct i387_fsave_struct);
+
+       /*
+        * Quirk: we don't yet handle the XSAVES* instructions
+        * correctly, as we don't correctly convert between
+        * standard and compacted format when interfacing
+        * with user-space - so disable it for now.
+        *
+        * The difference is small: with recent CPUs the
+        * compacted format is only marginally smaller than
+        * the standard FPU state format.
+        *
+        * ( This is easy to backport while we are fixing
+        *   XSAVES* support. )
+        */
+       setup_clear_cpu_cap(X86_FEATURE_XSAVES);
 }
 
 /*
index 9435620..1681504 100644 (file)
@@ -584,6 +584,39 @@ static void kvm_kick_cpu(int cpu)
        kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid);
 }
 
+
+#ifdef CONFIG_QUEUED_SPINLOCKS
+
+#include <asm/qspinlock.h>
+
+static void kvm_wait(u8 *ptr, u8 val)
+{
+       unsigned long flags;
+
+       if (in_nmi())
+               return;
+
+       local_irq_save(flags);
+
+       if (READ_ONCE(*ptr) != val)
+               goto out;
+
+       /*
+        * halt until it's our turn and kicked. Note that we do safe halt
+        * for irq enabled case to avoid hang when lock info is overwritten
+        * in irq spinlock slowpath and no spurious interrupt occur to save us.
+        */
+       if (arch_irqs_disabled_flags(flags))
+               halt();
+       else
+               safe_halt();
+
+out:
+       local_irq_restore(flags);
+}
+
+#else /* !CONFIG_QUEUED_SPINLOCKS */
+
 enum kvm_contention_stat {
        TAKEN_SLOW,
        TAKEN_SLOW_PICKUP,
@@ -817,6 +850,8 @@ static void kvm_unlock_kick(struct arch_spinlock *lock, __ticket_t ticket)
        }
 }
 
+#endif /* !CONFIG_QUEUED_SPINLOCKS */
+
 /*
  * Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present.
  */
@@ -828,8 +863,16 @@ void __init kvm_spinlock_init(void)
        if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
                return;
 
+#ifdef CONFIG_QUEUED_SPINLOCKS
+       __pv_init_lock_hash();
+       pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
+       pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock);
+       pv_lock_ops.wait = kvm_wait;
+       pv_lock_ops.kick = kvm_kick_cpu;
+#else /* !CONFIG_QUEUED_SPINLOCKS */
        pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning);
        pv_lock_ops.unlock_kick = kvm_unlock_kick;
+#endif
 }
 
 static __init int kvm_spinlock_init_jump(void)
index bbb6c73..33ee3e0 100644 (file)
@@ -8,11 +8,33 @@
 
 #include <asm/paravirt.h>
 
+#ifdef CONFIG_QUEUED_SPINLOCKS
+__visible void __native_queued_spin_unlock(struct qspinlock *lock)
+{
+       native_queued_spin_unlock(lock);
+}
+
+PV_CALLEE_SAVE_REGS_THUNK(__native_queued_spin_unlock);
+
+bool pv_is_native_spin_unlock(void)
+{
+       return pv_lock_ops.queued_spin_unlock.func ==
+               __raw_callee_save___native_queued_spin_unlock;
+}
+#endif
+
 struct pv_lock_ops pv_lock_ops = {
 #ifdef CONFIG_SMP
+#ifdef CONFIG_QUEUED_SPINLOCKS
+       .queued_spin_lock_slowpath = native_queued_spin_lock_slowpath,
+       .queued_spin_unlock = PV_CALLEE_SAVE(__native_queued_spin_unlock),
+       .wait = paravirt_nop,
+       .kick = paravirt_nop,
+#else /* !CONFIG_QUEUED_SPINLOCKS */
        .lock_spinning = __PV_IS_CALLEE_SAVE(paravirt_nop),
        .unlock_kick = paravirt_nop,
-#endif
+#endif /* !CONFIG_QUEUED_SPINLOCKS */
+#endif /* SMP */
 };
 EXPORT_SYMBOL(pv_lock_ops);
 
index d9f32e6..e1b0136 100644 (file)
@@ -12,6 +12,10 @@ DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
 DEF_NATIVE(pv_cpu_ops, clts, "clts");
 DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc");
 
+#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
+DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%eax)");
+#endif
+
 unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
 {
        /* arg in %eax, return in %eax */
@@ -24,6 +28,8 @@ unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
        return 0;
 }
 
+extern bool pv_is_native_spin_unlock(void);
+
 unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
                      unsigned long addr, unsigned len)
 {
@@ -47,14 +53,22 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
                PATCH_SITE(pv_mmu_ops, write_cr3);
                PATCH_SITE(pv_cpu_ops, clts);
                PATCH_SITE(pv_cpu_ops, read_tsc);
-
-       patch_site:
-               ret = paravirt_patch_insns(ibuf, len, start, end);
-               break;
+#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
+               case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
+                       if (pv_is_native_spin_unlock()) {
+                               start = start_pv_lock_ops_queued_spin_unlock;
+                               end   = end_pv_lock_ops_queued_spin_unlock;
+                               goto patch_site;
+                       }
+#endif
 
        default:
                ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
                break;
+
+patch_site:
+               ret = paravirt_patch_insns(ibuf, len, start, end);
+               break;
        }
 #undef PATCH_SITE
        return ret;
index a1da673..a1fa867 100644 (file)
@@ -21,6 +21,10 @@ DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs");
 DEF_NATIVE(, mov32, "mov %edi, %eax");
 DEF_NATIVE(, mov64, "mov %rdi, %rax");
 
+#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
+DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%rdi)");
+#endif
+
 unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
 {
        return paravirt_patch_insns(insnbuf, len,
@@ -33,6 +37,8 @@ unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
                                    start__mov64, end__mov64);
 }
 
+extern bool pv_is_native_spin_unlock(void);
+
 unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
                      unsigned long addr, unsigned len)
 {
@@ -59,14 +65,22 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
                PATCH_SITE(pv_cpu_ops, clts);
                PATCH_SITE(pv_mmu_ops, flush_tlb_single);
                PATCH_SITE(pv_cpu_ops, wbinvd);
-
-       patch_site:
-               ret = paravirt_patch_insns(ibuf, len, start, end);
-               break;
+#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
+               case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
+                       if (pv_is_native_spin_unlock()) {
+                               start = start_pv_lock_ops_queued_spin_unlock;
+                               end   = end_pv_lock_ops_queued_spin_unlock;
+                               goto patch_site;
+                       }
+#endif
 
        default:
                ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
                break;
+
+patch_site:
+               ret = paravirt_patch_insns(ibuf, len, start, end);
+               break;
        }
 #undef PATCH_SITE
        return ret;
index a25e202..353972c 100644 (file)
@@ -140,6 +140,51 @@ void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr,
                free_pages((unsigned long)vaddr, get_order(size));
 }
 
+void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
+                     gfp_t gfp, struct dma_attrs *attrs)
+{
+       struct dma_map_ops *ops = get_dma_ops(dev);
+       void *memory;
+
+       gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
+
+       if (dma_alloc_from_coherent(dev, size, dma_handle, &memory))
+               return memory;
+
+       if (!dev)
+               dev = &x86_dma_fallback_dev;
+
+       if (!is_device_dma_capable(dev))
+               return NULL;
+
+       if (!ops->alloc)
+               return NULL;
+
+       memory = ops->alloc(dev, size, dma_handle,
+                           dma_alloc_coherent_gfp_flags(dev, gfp), attrs);
+       debug_dma_alloc_coherent(dev, size, *dma_handle, memory);
+
+       return memory;
+}
+EXPORT_SYMBOL(dma_alloc_attrs);
+
+void dma_free_attrs(struct device *dev, size_t size,
+                   void *vaddr, dma_addr_t bus,
+                   struct dma_attrs *attrs)
+{
+       struct dma_map_ops *ops = get_dma_ops(dev);
+
+       WARN_ON(irqs_disabled());       /* for portability */
+
+       if (dma_release_from_coherent(dev, get_order(size), vaddr))
+               return;
+
+       debug_dma_free_coherent(dev, size, vaddr, bus);
+       if (ops->free)
+               ops->free(dev, size, vaddr, bus, attrs);
+}
+EXPORT_SYMBOL(dma_free_attrs);
+
 /*
  * See <Documentation/x86/x86_64/boot-options.txt> for the iommu kernel
  * parameter documentation.
index 8213da6..c648139 100644 (file)
@@ -57,7 +57,7 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
        .io_bitmap              = { [0 ... IO_BITMAP_LONGS] = ~0 },
 #endif
 };
-EXPORT_PER_CPU_SYMBOL_GPL(cpu_tss);
+EXPORT_PER_CPU_SYMBOL(cpu_tss);
 
 #ifdef CONFIG_X86_64
 static DEFINE_PER_CPU(unsigned char, is_idle);
@@ -156,11 +156,13 @@ void flush_thread(void)
                /* FPU state will be reallocated lazily at the first use. */
                drop_fpu(tsk);
                free_thread_xstate(tsk);
-       } else if (!used_math()) {
-               /* kthread execs. TODO: cleanup this horror. */
-               if (WARN_ON(init_fpu(tsk)))
-                       force_sig(SIGKILL, tsk);
-               user_fpu_begin();
+       } else {
+               if (!tsk_used_math(tsk)) {
+                       /* kthread execs. TODO: cleanup this horror. */
+                       if (WARN_ON(init_fpu(tsk)))
+                               force_sig(SIGKILL, tsk);
+                       user_fpu_begin();
+               }
                restore_init_xstate();
        }
 }
@@ -443,11 +445,10 @@ static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c)
 }
 
 /*
- * MONITOR/MWAIT with no hints, used for default default C1 state.
- * This invokes MWAIT with interrutps enabled and no flags,
- * which is backwards compatible with the original MWAIT implementation.
+ * MONITOR/MWAIT with no hints, used for default C1 state. This invokes MWAIT
+ * with interrupts enabled and no flags, which is backwards compatible with the
+ * original MWAIT implementation.
  */
-
 static void mwait_idle(void)
 {
        if (!current_set_polling_and_test()) {
index 50e547e..0e82096 100644 (file)
@@ -314,10 +314,10 @@ topology_sane(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o, const char *name)
                cpu1, name, cpu2, cpu_to_node(cpu1), cpu_to_node(cpu2));
 }
 
-#define link_mask(_m, c1, c2)                                          \
+#define link_mask(mfunc, c1, c2)                                       \
 do {                                                                   \
-       cpumask_set_cpu((c1), cpu_##_m##_mask(c2));                     \
-       cpumask_set_cpu((c2), cpu_##_m##_mask(c1));                     \
+       cpumask_set_cpu((c1), mfunc(c2));                               \
+       cpumask_set_cpu((c2), mfunc(c1));                               \
 } while (0)
 
 static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
@@ -398,9 +398,9 @@ void set_cpu_sibling_map(int cpu)
        cpumask_set_cpu(cpu, cpu_sibling_setup_mask);
 
        if (!has_mp) {
-               cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
+               cpumask_set_cpu(cpu, topology_sibling_cpumask(cpu));
                cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu));
-               cpumask_set_cpu(cpu, cpu_core_mask(cpu));
+               cpumask_set_cpu(cpu, topology_core_cpumask(cpu));
                c->booted_cores = 1;
                return;
        }
@@ -409,32 +409,34 @@ void set_cpu_sibling_map(int cpu)
                o = &cpu_data(i);
 
                if ((i == cpu) || (has_smt && match_smt(c, o)))
-                       link_mask(sibling, cpu, i);
+                       link_mask(topology_sibling_cpumask, cpu, i);
 
                if ((i == cpu) || (has_mp && match_llc(c, o)))
-                       link_mask(llc_shared, cpu, i);
+                       link_mask(cpu_llc_shared_mask, cpu, i);
 
        }
 
        /*
         * This needs a separate iteration over the cpus because we rely on all
-        * cpu_sibling_mask links to be set-up.
+        * topology_sibling_cpumask links to be set-up.
         */
        for_each_cpu(i, cpu_sibling_setup_mask) {
                o = &cpu_data(i);
 
                if ((i == cpu) || (has_mp && match_die(c, o))) {
-                       link_mask(core, cpu, i);
+                       link_mask(topology_core_cpumask, cpu, i);
 
                        /*
                         *  Does this new cpu bringup a new core?
                         */
-                       if (cpumask_weight(cpu_sibling_mask(cpu)) == 1) {
+                       if (cpumask_weight(
+                           topology_sibling_cpumask(cpu)) == 1) {
                                /*
                                 * for each core in package, increment
                                 * the booted_cores for this new cpu
                                 */
-                               if (cpumask_first(cpu_sibling_mask(i)) == i)
+                               if (cpumask_first(
+                                   topology_sibling_cpumask(i)) == i)
                                        c->booted_cores++;
                                /*
                                 * increment the core count for all
@@ -1009,8 +1011,8 @@ static __init void disable_smp(void)
                physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
        else
                physid_set_mask_of_physid(0, &phys_cpu_present_map);
-       cpumask_set_cpu(0, cpu_sibling_mask(0));
-       cpumask_set_cpu(0, cpu_core_mask(0));
+       cpumask_set_cpu(0, topology_sibling_cpumask(0));
+       cpumask_set_cpu(0, topology_core_cpumask(0));
 }
 
 enum {
@@ -1293,22 +1295,22 @@ static void remove_siblinginfo(int cpu)
        int sibling;
        struct cpuinfo_x86 *c = &cpu_data(cpu);
 
-       for_each_cpu(sibling, cpu_core_mask(cpu)) {
-               cpumask_clear_cpu(cpu, cpu_core_mask(sibling));
+       for_each_cpu(sibling, topology_core_cpumask(cpu)) {
+               cpumask_clear_cpu(cpu, topology_core_cpumask(sibling));
                /*/
                 * last thread sibling in this cpu core going down
                 */
-               if (cpumask_weight(cpu_sibling_mask(cpu)) == 1)
+               if (cpumask_weight(topology_sibling_cpumask(cpu)) == 1)
                        cpu_data(sibling).booted_cores--;
        }
 
-       for_each_cpu(sibling, cpu_sibling_mask(cpu))
-               cpumask_clear_cpu(cpu, cpu_sibling_mask(sibling));
+       for_each_cpu(sibling, topology_sibling_cpumask(cpu))
+               cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling));
        for_each_cpu(sibling, cpu_llc_shared_mask(cpu))
                cpumask_clear_cpu(cpu, cpu_llc_shared_mask(sibling));
        cpumask_clear(cpu_llc_shared_mask(cpu));
-       cpumask_clear(cpu_sibling_mask(cpu));
-       cpumask_clear(cpu_core_mask(cpu));
+       cpumask_clear(topology_sibling_cpumask(cpu));
+       cpumask_clear(topology_core_cpumask(cpu));
        c->phys_proc_id = 0;
        c->cpu_core_id = 0;
        cpumask_clear_cpu(cpu, cpu_sibling_setup_mask);
index 2648848..dd8d079 100644 (file)
@@ -113,7 +113,7 @@ static void check_tsc_warp(unsigned int timeout)
  */
 static inline unsigned int loop_timeout(int cpu)
 {
-       return (cpumask_weight(cpu_core_mask(cpu)) > 1) ? 2 : 20;
+       return (cpumask_weight(topology_core_cpumask(cpu)) > 1) ? 2 : 20;
 }
 
 /*
index 37d8fa4..a0695be 100644 (file)
@@ -75,7 +75,5 @@ EXPORT_SYMBOL(native_load_gs_index);
 
 #ifdef CONFIG_PREEMPT
 EXPORT_SYMBOL(___preempt_schedule);
-#ifdef CONFIG_CONTEXT_TRACKING
-EXPORT_SYMBOL(___preempt_schedule_context);
-#endif
+EXPORT_SYMBOL(___preempt_schedule_notrace);
 #endif
index 59b69f6..1d08ad3 100644 (file)
@@ -16,6 +16,8 @@
 #include <linux/module.h>
 #include <linux/vmalloc.h>
 #include <linux/uaccess.h>
+#include <asm/i387.h> /* For use_eager_fpu.  Ugh! */
+#include <asm/fpu-internal.h> /* For use_eager_fpu.  Ugh! */
 #include <asm/user.h>
 #include <asm/xsave.h>
 #include "cpuid.h"
@@ -95,6 +97,8 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
        if (best && (best->eax & (F(XSAVES) | F(XSAVEC))))
                best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
 
+       vcpu->arch.eager_fpu = guest_cpuid_has_mpx(vcpu);
+
        /*
         * The existing code assumes virtual address is 48-bit in the canonical
         * address checks; exit if it is ever changed.
index c3b1ad9..496b369 100644 (file)
@@ -117,4 +117,12 @@ static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu)
        best = kvm_find_cpuid_entry(vcpu, 7, 0);
        return best && (best->ebx & bit(X86_FEATURE_RTM));
 }
+
+static inline bool guest_cpuid_has_mpx(struct kvm_vcpu *vcpu)
+{
+       struct kvm_cpuid_entry2 *best;
+
+       best = kvm_find_cpuid_entry(vcpu, 7, 0);
+       return best && (best->ebx & bit(X86_FEATURE_MPX));
+}
 #endif
index 629af0f..4c7deb4 100644 (file)
@@ -1090,6 +1090,17 @@ static void update_divide_count(struct kvm_lapic *apic)
                                   apic->divide_count);
 }
 
+static void apic_update_lvtt(struct kvm_lapic *apic)
+{
+       u32 timer_mode = kvm_apic_get_reg(apic, APIC_LVTT) &
+                       apic->lapic_timer.timer_mode_mask;
+
+       if (apic->lapic_timer.timer_mode != timer_mode) {
+               apic->lapic_timer.timer_mode = timer_mode;
+               hrtimer_cancel(&apic->lapic_timer.timer);
+       }
+}
+
 static void apic_timer_expired(struct kvm_lapic *apic)
 {
        struct kvm_vcpu *vcpu = apic->vcpu;
@@ -1298,6 +1309,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
                                apic_set_reg(apic, APIC_LVTT + 0x10 * i,
                                             lvt_val | APIC_LVT_MASKED);
                        }
+                       apic_update_lvtt(apic);
                        atomic_set(&apic->lapic_timer.pending, 0);
 
                }
@@ -1330,20 +1342,13 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
 
                break;
 
-       case APIC_LVTT: {
-               u32 timer_mode = val & apic->lapic_timer.timer_mode_mask;
-
-               if (apic->lapic_timer.timer_mode != timer_mode) {
-                       apic->lapic_timer.timer_mode = timer_mode;
-                       hrtimer_cancel(&apic->lapic_timer.timer);
-               }
-
+       case APIC_LVTT:
                if (!kvm_apic_sw_enabled(apic))
                        val |= APIC_LVT_MASKED;
                val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask);
                apic_set_reg(apic, APIC_LVTT, val);
+               apic_update_lvtt(apic);
                break;
-       }
 
        case APIC_TMICT:
                if (apic_lvtt_tscdeadline(apic))
@@ -1576,7 +1581,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
 
        for (i = 0; i < APIC_LVT_NUM; i++)
                apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
-       apic->lapic_timer.timer_mode = 0;
+       apic_update_lvtt(apic);
        apic_set_reg(apic, APIC_LVT0,
                     SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
 
@@ -1802,6 +1807,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
 
        apic_update_ppr(apic);
        hrtimer_cancel(&apic->lapic_timer.timer);
+       apic_update_lvtt(apic);
        update_divide_count(apic);
        start_apic_timer(apic);
        apic->irr_pending = true;
index d43867c..b733376 100644 (file)
@@ -3736,8 +3736,8 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
        }
 }
 
-void update_permission_bitmask(struct kvm_vcpu *vcpu,
-               struct kvm_mmu *mmu, bool ept)
+static void update_permission_bitmask(struct kvm_vcpu *vcpu,
+                                     struct kvm_mmu *mmu, bool ept)
 {
        unsigned bit, byte, pfec;
        u8 map;
@@ -3918,6 +3918,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
 {
        bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
+       bool smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
        struct kvm_mmu *context = &vcpu->arch.mmu;
 
        MMU_WARN_ON(VALID_PAGE(context->root_hpa));
@@ -3936,6 +3937,8 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
        context->base_role.cr0_wp  = is_write_protection(vcpu);
        context->base_role.smep_andnot_wp
                = smep && !is_write_protection(vcpu);
+       context->base_role.smap_andnot_wp
+               = smap && !is_write_protection(vcpu);
 }
 EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
 
@@ -4207,12 +4210,18 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
                       const u8 *new, int bytes)
 {
        gfn_t gfn = gpa >> PAGE_SHIFT;
-       union kvm_mmu_page_role mask = { .word = 0 };
        struct kvm_mmu_page *sp;
        LIST_HEAD(invalid_list);
        u64 entry, gentry, *spte;
        int npte;
        bool remote_flush, local_flush, zap_page;
+       union kvm_mmu_page_role mask = { };
+
+       mask.cr0_wp = 1;
+       mask.cr4_pae = 1;
+       mask.nxe = 1;
+       mask.smep_andnot_wp = 1;
+       mask.smap_andnot_wp = 1;
 
        /*
         * If we don't have indirect shadow pages, it means no page is
@@ -4238,7 +4247,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
        ++vcpu->kvm->stat.mmu_pte_write;
        kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);
 
-       mask.cr0_wp = mask.cr4_pae = mask.nxe = 1;
        for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) {
                if (detect_write_misaligned(sp, gpa, bytes) ||
                      detect_write_flooding(sp)) {
index c7d6563..0ada65e 100644 (file)
@@ -71,8 +71,6 @@ enum {
 int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct);
 void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu);
 void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly);
-void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
-               bool ept);
 
 static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
 {
@@ -166,6 +164,8 @@ static inline bool permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
        int index = (pfec >> 1) +
                    (smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1));
 
+       WARN_ON(pfec & PFERR_RSVD_MASK);
+
        return (mmu->permissions[index] >> pte_access) & 1;
 }
 
index fd49c86..6e6d115 100644 (file)
@@ -718,6 +718,13 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
                                              mmu_is_nested(vcpu));
                if (likely(r != RET_MMIO_PF_INVALID))
                        return r;
+
+               /*
+                * page fault with PFEC.RSVD  = 1 is caused by shadow
+                * page fault, should not be used to walk guest page
+                * table.
+                */
+               error_code &= ~PFERR_RSVD_MASK;
        };
 
        r = mmu_topup_memory_caches(vcpu);
index ce741b8..9afa233 100644 (file)
@@ -4381,6 +4381,7 @@ static struct kvm_x86_ops svm_x86_ops = {
        .cache_reg = svm_cache_reg,
        .get_rflags = svm_get_rflags,
        .set_rflags = svm_set_rflags,
+       .fpu_activate = svm_fpu_activate,
        .fpu_deactivate = svm_fpu_deactivate,
 
        .tlb_flush = svm_flush_tlb,
index f7b6168..2d73807 100644 (file)
@@ -10185,6 +10185,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
        .cache_reg = vmx_cache_reg,
        .get_rflags = vmx_get_rflags,
        .set_rflags = vmx_set_rflags,
+       .fpu_activate = vmx_fpu_activate,
        .fpu_deactivate = vmx_fpu_deactivate,
 
        .tlb_flush = vmx_flush_tlb,
index c73efcd..ea306ad 100644 (file)
@@ -702,8 +702,9 @@ EXPORT_SYMBOL_GPL(kvm_set_xcr);
 int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
        unsigned long old_cr4 = kvm_read_cr4(vcpu);
-       unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE |
-                                  X86_CR4_PAE | X86_CR4_SMEP;
+       unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
+                                  X86_CR4_SMEP | X86_CR4_SMAP;
+
        if (cr4 & CR4_RESERVED_BITS)
                return 1;
 
@@ -744,9 +745,6 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
            (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
                kvm_mmu_reset_context(vcpu);
 
-       if ((cr4 ^ old_cr4) & X86_CR4_SMAP)
-               update_permission_bitmask(vcpu, vcpu->arch.walk_mmu, false);
-
        if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE)
                kvm_update_cpuid(vcpu);
 
@@ -6197,6 +6195,8 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
                return;
 
        page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
+       if (is_error_page(page))
+               return;
        kvm_x86_ops->set_apic_access_page_addr(vcpu, page_to_phys(page));
 
        /*
@@ -7060,7 +7060,9 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
        fpu_save_init(&vcpu->arch.guest_fpu);
        __kernel_fpu_end();
        ++vcpu->stat.fpu_reload;
-       kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
+       if (!vcpu->arch.eager_fpu)
+               kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
+
        trace_kvm_fpu(0);
 }
 
@@ -7076,11 +7078,21 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
                                                unsigned int id)
 {
+       struct kvm_vcpu *vcpu;
+
        if (check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
                printk_once(KERN_WARNING
                "kvm: SMP vm created on host with unstable TSC; "
                "guest TSC will not be reliable\n");
-       return kvm_x86_ops->vcpu_create(kvm, id);
+
+       vcpu = kvm_x86_ops->vcpu_create(kvm, id);
+
+       /*
+        * Activate fpu unconditionally in case the guest needs eager FPU.  It will be
+        * deactivated soon if it doesn't.
+        */
+       kvm_x86_ops->fpu_activate(vcpu);
+       return vcpu;
 }
 
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
index 5eb7150..e407941 100644 (file)
@@ -38,8 +38,6 @@
 
 #ifdef CONFIG_PREEMPT
        THUNK ___preempt_schedule, preempt_schedule
-#ifdef CONFIG_CONTEXT_TRACKING
-       THUNK ___preempt_schedule_context, preempt_schedule_context
-#endif
+       THUNK ___preempt_schedule_notrace, preempt_schedule_notrace
 #endif
 
index f89ba4e..2198902 100644 (file)
@@ -49,9 +49,7 @@
 
 #ifdef CONFIG_PREEMPT
        THUNK ___preempt_schedule, preempt_schedule
-#ifdef CONFIG_CONTEXT_TRACKING
-       THUNK ___preempt_schedule_context, preempt_schedule_context
-#endif
+       THUNK ___preempt_schedule_notrace, preempt_schedule_notrace
 #endif
 
 #if defined(CONFIG_TRACE_IRQFLAGS) \
index e2f5e21..91d93b9 100644 (file)
@@ -647,7 +647,8 @@ EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero);
  * @from: Source address, in kernel space.
  * @n:    Number of bytes to copy.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Copy data from kernel space to user space.
  *
@@ -668,7 +669,8 @@ EXPORT_SYMBOL(_copy_to_user);
  * @from: Source address, in user space.
  * @n:    Number of bytes to copy.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Copy data from user space to kernel space.
  *
index 181c53b..9dc9098 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/hugetlb.h>             /* hstate_index_to_shift        */
 #include <linux/prefetch.h>            /* prefetchw                    */
 #include <linux/context_tracking.h>    /* exception_enter(), ...       */
+#include <linux/uaccess.h>             /* faulthandler_disabled()      */
 
 #include <asm/traps.h>                 /* dotraplinkage, ...           */
 #include <asm/pgalloc.h>               /* pgd_*(), ...                 */
@@ -1126,9 +1127,9 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
 
        /*
         * If we're in an interrupt, have no user context or are running
-        * in an atomic region then we must not take the fault:
+        * in a region with pagefaults disabled then we must not take the fault
         */
-       if (unlikely(in_atomic() || !mm)) {
+       if (unlikely(faulthandler_disabled() || !mm)) {
                bad_area_nosemaphore(regs, error_code, address);
                return;
        }
index 4500142..eecb207 100644 (file)
@@ -35,7 +35,7 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot)
        unsigned long vaddr;
        int idx, type;
 
-       /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
+       preempt_disable();
        pagefault_disable();
 
        if (!PageHighMem(page))
@@ -100,6 +100,7 @@ void __kunmap_atomic(void *kvaddr)
 #endif
 
        pagefault_enable();
+       preempt_enable();
 }
 EXPORT_SYMBOL(__kunmap_atomic);
 
index 9ca35fc..2b7ece0 100644 (file)
@@ -59,6 +59,7 @@ void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot)
        unsigned long vaddr;
        int idx, type;
 
+       preempt_disable();
        pagefault_disable();
 
        type = kmap_atomic_idx_push();
@@ -117,5 +118,6 @@ iounmap_atomic(void __iomem *kvaddr)
        }
 
        pagefault_enable();
+       preempt_enable();
 }
 EXPORT_SYMBOL_GPL(iounmap_atomic);
index 5ead4d6..27ff212 100644 (file)
@@ -351,18 +351,20 @@ int arch_ioremap_pmd_supported(void)
  */
 void *xlate_dev_mem_ptr(phys_addr_t phys)
 {
-       void *addr;
-       unsigned long start = phys & PAGE_MASK;
+       unsigned long start  = phys &  PAGE_MASK;
+       unsigned long offset = phys & ~PAGE_MASK;
+       void *vaddr;
 
        /* If page is RAM, we can use __va. Otherwise ioremap and unmap. */
        if (page_is_ram(start >> PAGE_SHIFT))
                return __va(phys);
 
-       addr = (void __force *)ioremap_cache(start, PAGE_SIZE);
-       if (addr)
-               addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK));
+       vaddr = ioremap_cache(start, PAGE_SIZE);
+       /* Only add the offset on success and return NULL if the ioremap() failed: */
+       if (vaddr)
+               vaddr += offset;
 
-       return addr;
+       return vaddr;
 }
 
 void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
@@ -371,7 +373,6 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
                return;
 
        iounmap((void __iomem *)((unsigned long)addr & PAGE_MASK));
-       return;
 }
 
 static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss;
index 9875143..ddeff48 100644 (file)
@@ -559,6 +559,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
                                if (is_ereg(dst_reg))
                                        EMIT1(0x41);
                                EMIT3(0xC1, add_1reg(0xC8, dst_reg), 8);
+
+                               /* emit 'movzwl eax, ax' */
+                               if (is_ereg(dst_reg))
+                                       EMIT3(0x45, 0x0F, 0xB7);
+                               else
+                                       EMIT2(0x0F, 0xB7);
+                               EMIT1(add_2reg(0xC0, dst_reg, dst_reg));
                                break;
                        case 32:
                                /* emit 'bswap eax' to swap lower 4 bytes */
@@ -577,6 +584,27 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
                        break;
 
                case BPF_ALU | BPF_END | BPF_FROM_LE:
+                       switch (imm32) {
+                       case 16:
+                               /* emit 'movzwl eax, ax' to zero extend 16-bit
+                                * into 64 bit
+                                */
+                               if (is_ereg(dst_reg))
+                                       EMIT3(0x45, 0x0F, 0xB7);
+                               else
+                                       EMIT2(0x0F, 0xB7);
+                               EMIT1(add_2reg(0xC0, dst_reg, dst_reg));
+                               break;
+                       case 32:
+                               /* emit 'mov eax, eax' to clear upper 32-bits */
+                               if (is_ereg(dst_reg))
+                                       EMIT1(0x45);
+                               EMIT2(0x89, add_2reg(0xC0, dst_reg, dst_reg));
+                               break;
+                       case 64:
+                               /* nop */
+                               break;
+                       }
                        break;
 
                        /* ST: *(u8*)(dst_reg + off) = imm */
@@ -938,7 +966,12 @@ void bpf_int_jit_compile(struct bpf_prog *prog)
        }
        ctx.cleanup_addr = proglen;
 
-       for (pass = 0; pass < 10; pass++) {
+       /* JITed image shrinks with every pass and the loop iterates
+        * until the image stops shrinking. Very large bpf programs
+        * may converge on the last pass. In such case do one more
+        * pass to emit the final image
+        */
+       for (pass = 0; pass < 10 || image; pass++) {
                proglen = do_jit(prog, addrs, image, oldproglen, &ctx);
                if (proglen <= 0) {
                        image = NULL;
index e469598..14a63ed 100644 (file)
@@ -325,6 +325,26 @@ static void release_pci_root_info(struct pci_host_bridge *bridge)
        kfree(info);
 }
 
+/*
+ * An IO port or MMIO resource assigned to a PCI host bridge may be
+ * consumed by the host bridge itself or available to its child
+ * bus/devices. The ACPI specification defines a bit (Producer/Consumer)
+ * to tell whether the resource is consumed by the host bridge itself,
+ * but firmware hasn't used that bit consistently, so we can't rely on it.
+ *
+ * On x86 and IA64 platforms, all IO port and MMIO resources are assumed
+ * to be available to child bus/devices except one special case:
+ *     IO port [0xCF8-0xCFF] is consumed by the host bridge itself
+ *     to access PCI configuration space.
+ *
+ * So explicitly filter out PCI CFG IO ports[0xCF8-0xCFF].
+ */
+static bool resource_is_pcicfg_ioport(struct resource *res)
+{
+       return (res->flags & IORESOURCE_IO) &&
+               res->start == 0xCF8 && res->end == 0xCFF;
+}
+
 static void probe_pci_root_info(struct pci_root_info *info,
                                struct acpi_device *device,
                                int busnum, int domain,
@@ -346,8 +366,8 @@ static void probe_pci_root_info(struct pci_root_info *info,
                        "no IO and memory resources present in _CRS\n");
        else
                resource_list_for_each_entry_safe(entry, tmp, list) {
-                       if ((entry->res->flags & IORESOURCE_WINDOW) == 0 ||
-                           (entry->res->flags & IORESOURCE_DISABLED))
+                       if ((entry->res->flags & IORESOURCE_DISABLED) ||
+                           resource_is_pcicfg_ioport(entry->res))
                                resource_list_destroy_entry(entry);
                        else
                                entry->res->name = info->name;
@@ -462,9 +482,16 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
 
 int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
 {
-       struct pci_sysdata *sd = bridge->bus->sysdata;
-
-       ACPI_COMPANION_SET(&bridge->dev, sd->companion);
+       /*
+        * We pass NULL as parent to pci_create_root_bus(), so if it is not NULL
+        * here, pci_create_root_bus() has been called by someone else and
+        * sysdata is likely to be different from what we expect.  Let it go in
+        * that case.
+        */
+       if (!bridge->dev.parent) {
+               struct pci_sysdata *sd = bridge->bus->sysdata;
+               ACPI_COMPANION_SET(&bridge->dev, sd->companion);
+       }
        return 0;
 }
 
index 7e8a1a6..b9531d3 100644 (file)
@@ -39,7 +39,8 @@
 #define smp_mb()       barrier()
 #define smp_rmb()      barrier()
 #define smp_wmb()      barrier()
-#define set_mb(var, value) do { var = value; barrier(); } while (0)
+
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); barrier(); } while (0)
 
 #define read_barrier_depends()         do { } while (0)
 #define smp_read_barrier_depends()     do { } while (0)
index 275a3a8..e970320 100644 (file)
@@ -51,7 +51,7 @@ VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \
 $(obj)/vdso64.so.dbg: $(src)/vdso.lds $(vobjs) FORCE
        $(call if_changed,vdso)
 
-HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi
+HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi -I$(srctree)/arch/x86/include/uapi
 hostprogs-y                    += vdso2c
 
 quiet_cmd_vdso2c = VDSO2C  $@
index 94578ef..46957ea 100644 (file)
@@ -1760,6 +1760,9 @@ static struct notifier_block xen_hvm_cpu_notifier = {
 
 static void __init xen_hvm_guest_init(void)
 {
+       if (xen_pv_domain())
+               return;
+
        init_hvm_pv_info();
 
        xen_hvm_init_shared_info();
@@ -1775,6 +1778,7 @@ static void __init xen_hvm_guest_init(void)
        xen_hvm_init_time_ops();
        xen_hvm_init_mmu_ops();
 }
+#endif
 
 static bool xen_nopv = false;
 static __init int xen_parse_nopv(char *arg)
@@ -1784,14 +1788,11 @@ static __init int xen_parse_nopv(char *arg)
 }
 early_param("xen_nopv", xen_parse_nopv);
 
-static uint32_t __init xen_hvm_platform(void)
+static uint32_t __init xen_platform(void)
 {
        if (xen_nopv)
                return 0;
 
-       if (xen_pv_domain())
-               return 0;
-
        return xen_cpuid_base();
 }
 
@@ -1809,11 +1810,19 @@ bool xen_hvm_need_lapic(void)
 }
 EXPORT_SYMBOL_GPL(xen_hvm_need_lapic);
 
-const struct hypervisor_x86 x86_hyper_xen_hvm __refconst = {
-       .name                   = "Xen HVM",
-       .detect                 = xen_hvm_platform,
+static void xen_set_cpu_features(struct cpuinfo_x86 *c)
+{
+       if (xen_pv_domain())
+               clear_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
+}
+
+const struct hypervisor_x86 x86_hyper_xen = {
+       .name                   = "Xen",
+       .detect                 = xen_platform,
+#ifdef CONFIG_XEN_PVHVM
        .init_platform          = xen_hvm_guest_init,
+#endif
        .x2apic_available       = xen_x2apic_para_available,
+       .set_cpu_features       = xen_set_cpu_features,
 };
-EXPORT_SYMBOL(x86_hyper_xen_hvm);
-#endif
+EXPORT_SYMBOL(x86_hyper_xen);
index 956374c..9e2ba5c 100644 (file)
 #include "xen-ops.h"
 #include "debugfs.h"
 
+static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
+static DEFINE_PER_CPU(char *, irq_name);
+static bool xen_pvspin = true;
+
+#ifdef CONFIG_QUEUED_SPINLOCKS
+
+#include <asm/qspinlock.h>
+
+static void xen_qlock_kick(int cpu)
+{
+       xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
+}
+
+/*
+ * Halt the current CPU & release it back to the host
+ */
+static void xen_qlock_wait(u8 *byte, u8 val)
+{
+       int irq = __this_cpu_read(lock_kicker_irq);
+
+       /* If kicker interrupts not initialized yet, just spin */
+       if (irq == -1)
+               return;
+
+       /* clear pending */
+       xen_clear_irq_pending(irq);
+       barrier();
+
+       /*
+        * We check the byte value after clearing pending IRQ to make sure
+        * that we won't miss a wakeup event because of the clearing.
+        *
+        * The sync_clear_bit() call in xen_clear_irq_pending() is atomic.
+        * So it is effectively a memory barrier for x86.
+        */
+       if (READ_ONCE(*byte) != val)
+               return;
+
+       /*
+        * If an interrupt happens here, it will leave the wakeup irq
+        * pending, which will cause xen_poll_irq() to return
+        * immediately.
+        */
+
+       /* Block until irq becomes pending (or perhaps a spurious wakeup) */
+       xen_poll_irq(irq);
+}
+
+#else /* CONFIG_QUEUED_SPINLOCKS */
+
 enum xen_contention_stat {
        TAKEN_SLOW,
        TAKEN_SLOW_PICKUP,
@@ -100,12 +150,9 @@ struct xen_lock_waiting {
        __ticket_t want;
 };
 
-static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
-static DEFINE_PER_CPU(char *, irq_name);
 static DEFINE_PER_CPU(struct xen_lock_waiting, lock_waiting);
 static cpumask_t waiting_cpus;
 
-static bool xen_pvspin = true;
 __visible void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
 {
        int irq = __this_cpu_read(lock_kicker_irq);
@@ -217,6 +264,7 @@ static void xen_unlock_kick(struct arch_spinlock *lock, __ticket_t next)
                }
        }
 }
+#endif /* CONFIG_QUEUED_SPINLOCKS */
 
 static irqreturn_t dummy_handler(int irq, void *dev_id)
 {
@@ -280,8 +328,16 @@ void __init xen_init_spinlocks(void)
                return;
        }
        printk(KERN_DEBUG "xen: PV spinlocks enabled\n");
+#ifdef CONFIG_QUEUED_SPINLOCKS
+       __pv_init_lock_hash();
+       pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
+       pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock);
+       pv_lock_ops.wait = xen_qlock_wait;
+       pv_lock_ops.kick = xen_qlock_kick;
+#else
        pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning);
        pv_lock_ops.unlock_kick = xen_unlock_kick;
+#endif
 }
 
 /*
@@ -310,7 +366,7 @@ static __init int xen_parse_nopvspin(char *arg)
 }
 early_param("xen_nopvspin", xen_parse_nopvspin);
 
-#ifdef CONFIG_XEN_DEBUG_FS
+#if defined(CONFIG_XEN_DEBUG_FS) && !defined(CONFIG_QUEUED_SPINLOCKS)
 
 static struct dentry *d_spin_debug;
 
index d949769..53b4c08 100644 (file)
@@ -88,7 +88,17 @@ static void xen_vcpu_notify_restore(void *data)
        tick_resume_local();
 }
 
+static void xen_vcpu_notify_suspend(void *data)
+{
+       tick_suspend_local();
+}
+
 void xen_arch_resume(void)
 {
        on_each_cpu(xen_vcpu_notify_restore, NULL, 1);
 }
+
+void xen_arch_suspend(void)
+{
+       on_each_cpu(xen_vcpu_notify_suspend, NULL, 1);
+}
index 172a02a..ba78ccf 100644 (file)
@@ -185,4 +185,17 @@ static inline int dma_get_sgtable(struct device *dev, struct sg_table *sgt,
        return -EINVAL;
 }
 
+static inline void *dma_alloc_attrs(struct device *dev, size_t size,
+                                   dma_addr_t *dma_handle, gfp_t flag,
+                                   struct dma_attrs *attrs)
+{
+       return NULL;
+}
+
+static inline void dma_free_attrs(struct device *dev, size_t size,
+                                 void *vaddr, dma_addr_t dma_handle,
+                                 struct dma_attrs *attrs)
+{
+}
+
 #endif /* _XTENSA_DMA_MAPPING_H */
index 9e3571a..83a44a3 100644 (file)
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/hardirq.h>
+#include <linux/uaccess.h>
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
 #include <asm/hardirq.h>
-#include <asm/uaccess.h>
 #include <asm/pgalloc.h>
 
 DEFINE_PER_CPU(unsigned long, asid_cache) = ASID_USER_FIRST;
@@ -57,7 +57,7 @@ void do_page_fault(struct pt_regs *regs)
        /* If we're in an interrupt or have no user
         * context, we must not take the fault..
         */
-       if (in_atomic() || !mm) {
+       if (faulthandler_disabled() || !mm) {
                bad_page_fault(regs, address, SIGSEGV);
                return;
        }
index 8cfb71e..184cead 100644 (file)
@@ -42,6 +42,7 @@ void *kmap_atomic(struct page *page)
        enum fixed_addresses idx;
        unsigned long vaddr;
 
+       preempt_disable();
        pagefault_disable();
        if (!PageHighMem(page))
                return page_address(page);
@@ -79,6 +80,7 @@ void __kunmap_atomic(void *kvaddr)
        }
 
        pagefault_enable();
+       preempt_enable();
 }
 EXPORT_SYMBOL(__kunmap_atomic);
 
index fd154b9..03b5f8d 100644 (file)
@@ -552,6 +552,8 @@ void blk_cleanup_queue(struct request_queue *q)
                q->queue_lock = &q->__queue_lock;
        spin_unlock_irq(lock);
 
+       bdi_destroy(&q->backing_dev_info);
+
        /* @q is and will stay empty, shutdown and put */
        blk_put_queue(q);
 }
@@ -732,6 +734,8 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
 }
 EXPORT_SYMBOL(blk_init_queue_node);
 
+static void blk_queue_bio(struct request_queue *q, struct bio *bio);
+
 struct request_queue *
 blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
                         spinlock_t *lock)
@@ -1576,7 +1580,7 @@ void init_request_from_bio(struct request *req, struct bio *bio)
        blk_rq_bio_prep(req->q, req, bio);
 }
 
-void blk_queue_bio(struct request_queue *q, struct bio *bio)
+static void blk_queue_bio(struct request_queue *q, struct bio *bio)
 {
        const bool sync = !!(bio->bi_rw & REQ_SYNC);
        struct blk_plug *plug;
@@ -1684,7 +1688,6 @@ out_unlock:
                spin_unlock_irq(q->queue_lock);
        }
 }
-EXPORT_SYMBOL_GPL(blk_queue_bio);      /* for device mapper only */
 
 /*
  * If bio->bi_dev is a partition, remap the location
index 5f13f4d..1e28ddb 100644 (file)
@@ -24,7 +24,7 @@ static int get_first_sibling(unsigned int cpu)
 {
        unsigned int ret;
 
-       ret = cpumask_first(topology_thread_cpumask(cpu));
+       ret = cpumask_first(topology_sibling_cpumask(cpu));
        if (ret < nr_cpu_ids)
                return ret;
 
index ade8a2d..594eea0 100644 (file)
@@ -677,8 +677,11 @@ static void blk_mq_rq_timer(unsigned long priv)
                data.next = blk_rq_timeout(round_jiffies_up(data.next));
                mod_timer(&q->timeout, data.next);
        } else {
-               queue_for_each_hw_ctx(q, hctx, i)
-                       blk_mq_tag_idle(hctx);
+               queue_for_each_hw_ctx(q, hctx, i) {
+                       /* the hctx may be unmapped, so check it here */
+                       if (blk_mq_hw_queue_mapped(hctx))
+                               blk_mq_tag_idle(hctx);
+               }
        }
 }
 
@@ -855,6 +858,16 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
                spin_lock(&hctx->lock);
                list_splice(&rq_list, &hctx->dispatch);
                spin_unlock(&hctx->lock);
+               /*
+                * the queue is expected stopped with BLK_MQ_RQ_QUEUE_BUSY, but
+                * it's possible the queue is stopped and restarted again
+                * before this. Queue restart will dispatch requests. And since
+                * requests in rq_list aren't added into hctx->dispatch yet,
+                * the requests in rq_list might get lost.
+                *
+                * blk_mq_run_hw_queue() already checks the STOPPED bit
+                **/
+               blk_mq_run_hw_queue(hctx, true);
        }
 }
 
@@ -1571,22 +1584,6 @@ static int blk_mq_hctx_cpu_offline(struct blk_mq_hw_ctx *hctx, int cpu)
        return NOTIFY_OK;
 }
 
-static int blk_mq_hctx_cpu_online(struct blk_mq_hw_ctx *hctx, int cpu)
-{
-       struct request_queue *q = hctx->queue;
-       struct blk_mq_tag_set *set = q->tag_set;
-
-       if (set->tags[hctx->queue_num])
-               return NOTIFY_OK;
-
-       set->tags[hctx->queue_num] = blk_mq_init_rq_map(set, hctx->queue_num);
-       if (!set->tags[hctx->queue_num])
-               return NOTIFY_STOP;
-
-       hctx->tags = set->tags[hctx->queue_num];
-       return NOTIFY_OK;
-}
-
 static int blk_mq_hctx_notify(void *data, unsigned long action,
                              unsigned int cpu)
 {
@@ -1594,12 +1591,16 @@ static int blk_mq_hctx_notify(void *data, unsigned long action,
 
        if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
                return blk_mq_hctx_cpu_offline(hctx, cpu);
-       else if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
-               return blk_mq_hctx_cpu_online(hctx, cpu);
+
+       /*
+        * In case of CPU online, tags may be reallocated
+        * in blk_mq_map_swqueue() after mapping is updated.
+        */
 
        return NOTIFY_OK;
 }
 
+/* hctx->ctxs will be freed in queue's release handler */
 static void blk_mq_exit_hctx(struct request_queue *q,
                struct blk_mq_tag_set *set,
                struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
@@ -1618,7 +1619,6 @@ static void blk_mq_exit_hctx(struct request_queue *q,
 
        blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier);
        blk_free_flush_queue(hctx->fq);
-       kfree(hctx->ctxs);
        blk_mq_free_bitmap(&hctx->ctx_map);
 }
 
@@ -1775,6 +1775,7 @@ static void blk_mq_map_swqueue(struct request_queue *q)
        unsigned int i;
        struct blk_mq_hw_ctx *hctx;
        struct blk_mq_ctx *ctx;
+       struct blk_mq_tag_set *set = q->tag_set;
 
        queue_for_each_hw_ctx(q, hctx, i) {
                cpumask_clear(hctx->cpumask);
@@ -1803,16 +1804,20 @@ static void blk_mq_map_swqueue(struct request_queue *q)
                 * disable it and free the request entries.
                 */
                if (!hctx->nr_ctx) {
-                       struct blk_mq_tag_set *set = q->tag_set;
-
                        if (set->tags[i]) {
                                blk_mq_free_rq_map(set, set->tags[i], i);
                                set->tags[i] = NULL;
-                               hctx->tags = NULL;
                        }
+                       hctx->tags = NULL;
                        continue;
                }
 
+               /* unmapped hw queue can be remapped after CPU topo changed */
+               if (!set->tags[i])
+                       set->tags[i] = blk_mq_init_rq_map(set, i);
+               hctx->tags = set->tags[i];
+               WARN_ON(!hctx->tags);
+
                /*
                 * Set the map size to the number of mapped software queues.
                 * This is more accurate and more efficient than looping
@@ -1886,8 +1891,12 @@ void blk_mq_release(struct request_queue *q)
        unsigned int i;
 
        /* hctx kobj stays in hctx */
-       queue_for_each_hw_ctx(q, hctx, i)
+       queue_for_each_hw_ctx(q, hctx, i) {
+               if (!hctx)
+                       continue;
+               kfree(hctx->ctxs);
                kfree(hctx);
+       }
 
        kfree(q->queue_hw_ctx);
 
@@ -2090,9 +2099,16 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
         */
        list_for_each_entry(q, &all_q_list, all_q_node)
                blk_mq_freeze_queue_start(q);
-       list_for_each_entry(q, &all_q_list, all_q_node)
+       list_for_each_entry(q, &all_q_list, all_q_node) {
                blk_mq_freeze_queue_wait(q);
 
+               /*
+                * timeout handler can't touch hw queue during the
+                * reinitialization
+                */
+               del_timer_sync(&q->timeout);
+       }
+
        list_for_each_entry(q, &all_q_list, all_q_node)
                blk_mq_queue_reinit(q);
 
index faaf36a..2b8fd30 100644 (file)
@@ -522,8 +522,6 @@ static void blk_release_queue(struct kobject *kobj)
 
        blk_trace_shutdown(q);
 
-       bdi_destroy(&q->backing_dev_info);
-
        ida_simple_remove(&blk_queue_ida, q->id);
        call_rcu(&q->rcu_head, blk_free_queue_rcu);
 }
index ab21ba2..ed9dd80 100644 (file)
@@ -221,8 +221,8 @@ bounce:
                if (page_to_pfn(page) <= queue_bounce_pfn(q) && !force)
                        continue;
 
-               inc_zone_page_state(to->bv_page, NR_BOUNCE);
                to->bv_page = mempool_alloc(pool, q->bounce_gfp);
+               inc_zone_page_state(to->bv_page, NR_BOUNCE);
 
                if (rw == WRITE) {
                        char *vto, *vfrom;
index 59794d0..8985038 100644 (file)
@@ -157,7 +157,7 @@ struct elevator_queue *elevator_alloc(struct request_queue *q,
 
        eq = kzalloc_node(sizeof(*eq), GFP_KERNEL, q->node);
        if (unlikely(!eq))
-               goto err;
+               return NULL;
 
        eq->type = e;
        kobject_init(&eq->kobj, &elv_ktype);
@@ -165,10 +165,6 @@ struct elevator_queue *elevator_alloc(struct request_queue *q,
        hash_init(eq->hash);
 
        return eq;
-err:
-       kfree(eq);
-       elevator_put(e);
-       return NULL;
 }
 EXPORT_SYMBOL(elevator_alloc);
 
index 0a536dc..ea982ea 100644 (file)
@@ -422,9 +422,9 @@ int blk_alloc_devt(struct hd_struct *part, dev_t *devt)
        /* allocate ext devt */
        idr_preload(GFP_KERNEL);
 
-       spin_lock(&ext_devt_lock);
+       spin_lock_bh(&ext_devt_lock);
        idx = idr_alloc(&ext_devt_idr, part, 0, NR_EXT_DEVT, GFP_NOWAIT);
-       spin_unlock(&ext_devt_lock);
+       spin_unlock_bh(&ext_devt_lock);
 
        idr_preload_end();
        if (idx < 0)
@@ -449,9 +449,9 @@ void blk_free_devt(dev_t devt)
                return;
 
        if (MAJOR(devt) == BLOCK_EXT_MAJOR) {
-               spin_lock(&ext_devt_lock);
+               spin_lock_bh(&ext_devt_lock);
                idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
-               spin_unlock(&ext_devt_lock);
+               spin_unlock_bh(&ext_devt_lock);
        }
 }
 
@@ -653,7 +653,6 @@ void del_gendisk(struct gendisk *disk)
        disk->flags &= ~GENHD_FL_UP;
 
        sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
-       bdi_unregister(&disk->queue->backing_dev_info);
        blk_unregister_queue(disk);
        blk_unregister_region(disk_devt(disk), disk->minors);
 
@@ -691,13 +690,13 @@ struct gendisk *get_gendisk(dev_t devt, int *partno)
        } else {
                struct hd_struct *part;
 
-               spin_lock(&ext_devt_lock);
+               spin_lock_bh(&ext_devt_lock);
                part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
                if (part && get_disk(part_to_disk(part))) {
                        *partno = part->partno;
                        disk = part_to_disk(part);
                }
-               spin_unlock(&ext_devt_lock);
+               spin_unlock_bh(&ext_devt_lock);
        }
 
        return disk;
index 8aaf298..362905e 100644 (file)
@@ -1512,15 +1512,6 @@ config CRYPTO_USER_API_RNG
          This option enables the user-spaces interface for random
          number generator algorithms.
 
-config CRYPTO_USER_API_AEAD
-       tristate "User-space interface for AEAD cipher algorithms"
-       depends on NET
-       select CRYPTO_AEAD
-       select CRYPTO_USER_API
-       help
-         This option enables the user-spaces interface for AEAD
-         cipher algorithms.
-
 config CRYPTO_HASH_INFO
        bool
 
index 00a6fe1..69abada 100644 (file)
@@ -33,7 +33,7 @@ struct aead_ctx {
        /*
         * RSGL_MAX_ENTRIES is an artificial limit where user space at maximum
         * can cause the kernel to allocate RSGL_MAX_ENTRIES * ALG_MAX_PAGES
-        * bytes
+        * pages
         */
 #define RSGL_MAX_ENTRIES ALG_MAX_PAGES
        struct af_alg_sgl rsgl[RSGL_MAX_ENTRIES];
@@ -435,11 +435,10 @@ static int aead_recvmsg(struct socket *sock, struct msghdr *msg, size_t ignored,
                if (err < 0)
                        goto unlock;
                usedpages += err;
-               /* chain the new scatterlist with initial list */
+               /* chain the new scatterlist with previous one */
                if (cnt)
-                       scatterwalk_crypto_chain(ctx->rsgl[0].sg,
-                                       ctx->rsgl[cnt].sg, 1,
-                                       sg_nents(ctx->rsgl[cnt-1].sg));
+                       af_alg_link_sg(&ctx->rsgl[cnt-1], &ctx->rsgl[cnt]);
+
                /* we do not need more iovecs as we have sufficient memory */
                if (outlen <= usedpages)
                        break;
index 6bc9cbc..00b3980 100644 (file)
@@ -105,7 +105,7 @@ static void round_robin_cpu(unsigned int tsk_index)
        mutex_lock(&round_robin_lock);
        cpumask_clear(tmp);
        for_each_cpu(cpu, pad_busy_cpus)
-               cpumask_or(tmp, tmp, topology_thread_cpumask(cpu));
+               cpumask_or(tmp, tmp, topology_sibling_cpumask(cpu));
        cpumask_andnot(tmp, cpu_online_mask, tmp);
        /* avoid HT sibilings if possible */
        if (cpumask_empty(tmp))
index b193f84..ff6d8ad 100644 (file)
@@ -304,6 +304,8 @@ static const struct acpi_device_id acpi_pnp_device_ids[] = {
        {"PNPb006"},
        /* cs423x-pnpbios */
        {"CSC0100"},
+       {"CSC0103"},
+       {"CSC0110"},
        {"CSC0000"},
        {"GIM0100"},            /* Guillemot Turtlebeach something appears to be cs4232 compatible */
        /* es18xx-pnpbios */
index a72685c..5e8df91 100644 (file)
@@ -102,19 +102,12 @@ const struct acpi_predefined_names acpi_gbl_pre_defined_names[] = {
        {"_SB_", ACPI_TYPE_DEVICE, NULL},
        {"_SI_", ACPI_TYPE_LOCAL_SCOPE, NULL},
        {"_TZ_", ACPI_TYPE_DEVICE, NULL},
-       /*
-        * March, 2015:
-        * The _REV object is in the process of being deprecated, because
-        * other ACPI implementations permanently return 2. Thus, it
-        * has little or no value. Return 2 for compatibility with
-        * other ACPI implementations.
-        */
-       {"_REV", ACPI_TYPE_INTEGER, ACPI_CAST_PTR(char, 2)},
+       {"_REV", ACPI_TYPE_INTEGER, (char *)ACPI_CA_SUPPORT_LEVEL},
        {"_OS_", ACPI_TYPE_STRING, ACPI_OS_NAME},
-       {"_GL_", ACPI_TYPE_MUTEX, ACPI_CAST_PTR(char, 1)},
+       {"_GL_", ACPI_TYPE_MUTEX, (char *)1},
 
 #if !defined (ACPI_NO_METHOD_EXECUTION) || defined (ACPI_CONSTANT_EVAL_ONLY)
-       {"_OSI", ACPI_TYPE_METHOD, ACPI_CAST_PTR(char, 1)},
+       {"_OSI", ACPI_TYPE_METHOD, (char *)1},
 #endif
 
        /* Table terminator */
index 39748bb..7ccba39 100644 (file)
@@ -182,7 +182,7 @@ static void __init acpi_request_region (struct acpi_generic_address *gas,
                request_mem_region(addr, length, desc);
 }
 
-static int __init acpi_reserve_resources(void)
+static void __init acpi_reserve_resources(void)
 {
        acpi_request_region(&acpi_gbl_FADT.xpm1a_event_block, acpi_gbl_FADT.pm1_event_length,
                "ACPI PM1a_EVT_BLK");
@@ -211,10 +211,7 @@ static int __init acpi_reserve_resources(void)
        if (!(acpi_gbl_FADT.gpe1_block_length & 0x1))
                acpi_request_region(&acpi_gbl_FADT.xgpe1_block,
                               acpi_gbl_FADT.gpe1_block_length, "ACPI GPE1_BLK");
-
-       return 0;
 }
-device_initcall(acpi_reserve_resources);
 
 void acpi_os_printf(const char *fmt, ...)
 {
@@ -1845,6 +1842,7 @@ acpi_status __init acpi_os_initialize(void)
 
 acpi_status __init acpi_os_initialize1(void)
 {
+       acpi_reserve_resources();
        kacpid_wq = alloc_workqueue("kacpid", 0, 1);
        kacpi_notify_wq = alloc_workqueue("kacpi_notify", 0, 1);
        kacpi_hotplug_wq = alloc_ordered_workqueue("kacpi_hotplug", 0);
index 5589a6e..8244f01 100644 (file)
@@ -573,7 +573,7 @@ EXPORT_SYMBOL_GPL(acpi_dev_get_resources);
  * @ares: Input ACPI resource object.
  * @types: Valid resource types of IORESOURCE_XXX
  *
- * This is a hepler function to support acpi_dev_get_resources(), which filters
+ * This is a helper function to support acpi_dev_get_resources(), which filters
  * ACPI resource objects according to resource types.
  */
 int acpi_dev_filter_resource_type(struct acpi_resource *ares,
index 26e5b50..bf034f8 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/delay.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
+#include <linux/dmi.h>
 #include "sbshc.h"
 
 #define PREFIX "ACPI: "
@@ -87,6 +88,8 @@ enum acpi_smb_offset {
        ACPI_SMB_ALARM_DATA = 0x26,     /* 2 bytes alarm data */
 };
 
+static bool macbook;
+
 static inline int smb_hc_read(struct acpi_smb_hc *hc, u8 address, u8 *data)
 {
        return ec_read(hc->offset + address, data);
@@ -132,6 +135,8 @@ static int acpi_smbus_transaction(struct acpi_smb_hc *hc, u8 protocol,
        }
 
        mutex_lock(&hc->lock);
+       if (macbook)
+               udelay(5);
        if (smb_hc_read(hc, ACPI_SMB_PROTOCOL, &temp))
                goto end;
        if (temp) {
@@ -257,12 +262,29 @@ extern int acpi_ec_add_query_handler(struct acpi_ec *ec, u8 query_bit,
                              acpi_handle handle, acpi_ec_query_func func,
                              void *data);
 
+static int macbook_dmi_match(const struct dmi_system_id *d)
+{
+       pr_debug("Detected MacBook, enabling workaround\n");
+       macbook = true;
+       return 0;
+}
+
+static struct dmi_system_id acpi_smbus_dmi_table[] = {
+       { macbook_dmi_match, "Apple MacBook", {
+         DMI_MATCH(DMI_BOARD_VENDOR, "Apple"),
+         DMI_MATCH(DMI_PRODUCT_NAME, "MacBook") },
+       },
+       { },
+};
+
 static int acpi_smbus_hc_add(struct acpi_device *device)
 {
        int status;
        unsigned long long val;
        struct acpi_smb_hc *hc;
 
+       dmi_check_system(acpi_smbus_dmi_table);
+
        if (!device)
                return -EINVAL;
 
index 5f60155..9dca4b9 100644 (file)
@@ -270,6 +270,7 @@ config ATA_PIIX
 config SATA_DWC
        tristate "DesignWare Cores SATA support"
        depends on 460EX
+       select DW_DMAC
        help
          This option enables support for the on-chip SATA controller of the
          AppliedMicro processor 460EX.
@@ -729,15 +730,6 @@ config PATA_SC1200
 
          If unsure, say N.
 
-config PATA_SCC
-       tristate "Toshiba's Cell Reference Set IDE support"
-       depends on PCI && PPC_CELLEB
-       help
-         This option enables support for the built-in IDE controller on
-         Toshiba Cell Reference Board.
-
-         If unsure, say N.
-
 config PATA_SCH
        tristate "Intel SCH PATA support"
        depends on PCI
index b67e995..40f7865 100644 (file)
@@ -75,7 +75,6 @@ obj-$(CONFIG_PATA_PDC_OLD)    += pata_pdc202xx_old.o
 obj-$(CONFIG_PATA_RADISYS)     += pata_radisys.o
 obj-$(CONFIG_PATA_RDC)         += pata_rdc.o
 obj-$(CONFIG_PATA_SC1200)      += pata_sc1200.o
-obj-$(CONFIG_PATA_SCC)         += pata_scc.o
 obj-$(CONFIG_PATA_SCH)         += pata_sch.o
 obj-$(CONFIG_PATA_SERVERWORKS) += pata_serverworks.o
 obj-$(CONFIG_PATA_SIL680)      += pata_sil680.o
index c7a92a7..65ee944 100644 (file)
@@ -66,6 +66,7 @@ enum board_ids {
        board_ahci_yes_fbs,
 
        /* board IDs for specific chipsets in alphabetical order */
+       board_ahci_avn,
        board_ahci_mcp65,
        board_ahci_mcp77,
        board_ahci_mcp89,
@@ -84,6 +85,8 @@ enum board_ids {
 static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent);
 static int ahci_vt8251_hardreset(struct ata_link *link, unsigned int *class,
                                 unsigned long deadline);
+static int ahci_avn_hardreset(struct ata_link *link, unsigned int *class,
+                             unsigned long deadline);
 static void ahci_mcp89_apple_enable(struct pci_dev *pdev);
 static bool is_mcp89_apple(struct pci_dev *pdev);
 static int ahci_p5wdh_hardreset(struct ata_link *link, unsigned int *class,
@@ -107,6 +110,11 @@ static struct ata_port_operations ahci_p5wdh_ops = {
        .hardreset              = ahci_p5wdh_hardreset,
 };
 
+static struct ata_port_operations ahci_avn_ops = {
+       .inherits               = &ahci_ops,
+       .hardreset              = ahci_avn_hardreset,
+};
+
 static const struct ata_port_info ahci_port_info[] = {
        /* by features */
        [board_ahci] = {
@@ -151,6 +159,12 @@ static const struct ata_port_info ahci_port_info[] = {
                .port_ops       = &ahci_ops,
        },
        /* by chipsets */
+       [board_ahci_avn] = {
+               .flags          = AHCI_FLAG_COMMON,
+               .pio_mask       = ATA_PIO4,
+               .udma_mask      = ATA_UDMA6,
+               .port_ops       = &ahci_avn_ops,
+       },
        [board_ahci_mcp65] = {
                AHCI_HFLAGS     (AHCI_HFLAG_NO_FPDMA_AA | AHCI_HFLAG_NO_PMP |
                                 AHCI_HFLAG_YES_NCQ),
@@ -290,14 +304,14 @@ static const struct pci_device_id ahci_pci_tbl[] = {
        { PCI_VDEVICE(INTEL, 0x1f27), board_ahci }, /* Avoton RAID */
        { PCI_VDEVICE(INTEL, 0x1f2e), board_ahci }, /* Avoton RAID */
        { PCI_VDEVICE(INTEL, 0x1f2f), board_ahci }, /* Avoton RAID */
-       { PCI_VDEVICE(INTEL, 0x1f32), board_ahci }, /* Avoton AHCI */
-       { PCI_VDEVICE(INTEL, 0x1f33), board_ahci }, /* Avoton AHCI */
-       { PCI_VDEVICE(INTEL, 0x1f34), board_ahci }, /* Avoton RAID */
-       { PCI_VDEVICE(INTEL, 0x1f35), board_ahci }, /* Avoton RAID */
-       { PCI_VDEVICE(INTEL, 0x1f36), board_ahci }, /* Avoton RAID */
-       { PCI_VDEVICE(INTEL, 0x1f37), board_ahci }, /* Avoton RAID */
-       { PCI_VDEVICE(INTEL, 0x1f3e), board_ahci }, /* Avoton RAID */
-       { PCI_VDEVICE(INTEL, 0x1f3f), board_ahci }, /* Avoton RAID */
+       { PCI_VDEVICE(INTEL, 0x1f32), board_ahci_avn }, /* Avoton AHCI */
+       { PCI_VDEVICE(INTEL, 0x1f33), board_ahci_avn }, /* Avoton AHCI */
+       { PCI_VDEVICE(INTEL, 0x1f34), board_ahci_avn }, /* Avoton RAID */
+       { PCI_VDEVICE(INTEL, 0x1f35), board_ahci_avn }, /* Avoton RAID */
+       { PCI_VDEVICE(INTEL, 0x1f36), board_ahci_avn }, /* Avoton RAID */
+       { PCI_VDEVICE(INTEL, 0x1f37), board_ahci_avn }, /* Avoton RAID */
+       { PCI_VDEVICE(INTEL, 0x1f3e), board_ahci_avn }, /* Avoton RAID */
+       { PCI_VDEVICE(INTEL, 0x1f3f), board_ahci_avn }, /* Avoton RAID */
        { PCI_VDEVICE(INTEL, 0x2823), board_ahci }, /* Wellsburg RAID */
        { PCI_VDEVICE(INTEL, 0x2827), board_ahci }, /* Wellsburg RAID */
        { PCI_VDEVICE(INTEL, 0x8d02), board_ahci }, /* Wellsburg AHCI */
@@ -670,6 +684,79 @@ static int ahci_p5wdh_hardreset(struct ata_link *link, unsigned int *class,
        return rc;
 }
 
+/*
+ * ahci_avn_hardreset - attempt more aggressive recovery of Avoton ports.
+ *
+ * It has been observed with some SSDs that the timing of events in the
+ * link synchronization phase can leave the port in a state that can not
+ * be recovered by a SATA-hard-reset alone.  The failing signature is
+ * SStatus.DET stuck at 1 ("Device presence detected but Phy
+ * communication not established").  It was found that unloading and
+ * reloading the driver when this problem occurs allows the drive
+ * connection to be recovered (DET advanced to 0x3).  The critical
+ * component of reloading the driver is that the port state machines are
+ * reset by bouncing "port enable" in the AHCI PCS configuration
+ * register.  So, reproduce that effect by bouncing a port whenever we
+ * see DET==1 after a reset.
+ */
+static int ahci_avn_hardreset(struct ata_link *link, unsigned int *class,
+                             unsigned long deadline)
+{
+       const unsigned long *timing = sata_ehc_deb_timing(&link->eh_context);
+       struct ata_port *ap = link->ap;
+       struct ahci_port_priv *pp = ap->private_data;
+       struct ahci_host_priv *hpriv = ap->host->private_data;
+       u8 *d2h_fis = pp->rx_fis + RX_FIS_D2H_REG;
+       unsigned long tmo = deadline - jiffies;
+       struct ata_taskfile tf;
+       bool online;
+       int rc, i;
+
+       DPRINTK("ENTER\n");
+
+       ahci_stop_engine(ap);
+
+       for (i = 0; i < 2; i++) {
+               u16 val;
+               u32 sstatus;
+               int port = ap->port_no;
+               struct ata_host *host = ap->host;
+               struct pci_dev *pdev = to_pci_dev(host->dev);
+
+               /* clear D2H reception area to properly wait for D2H FIS */
+               ata_tf_init(link->device, &tf);
+               tf.command = ATA_BUSY;
+               ata_tf_to_fis(&tf, 0, 0, d2h_fis);
+
+               rc = sata_link_hardreset(link, timing, deadline, &online,
+                               ahci_check_ready);
+
+               if (sata_scr_read(link, SCR_STATUS, &sstatus) != 0 ||
+                               (sstatus & 0xf) != 1)
+                       break;
+
+               ata_link_printk(link, KERN_INFO, "avn bounce port%d\n",
+                               port);
+
+               pci_read_config_word(pdev, 0x92, &val);
+               val &= ~(1 << port);
+               pci_write_config_word(pdev, 0x92, val);
+               ata_msleep(ap, 1000);
+               val |= 1 << port;
+               pci_write_config_word(pdev, 0x92, val);
+               deadline += tmo;
+       }
+
+       hpriv->start_engine(ap);
+
+       if (online)
+               *class = ahci_dev_classify(ap);
+
+       DPRINTK("EXIT, rc=%d, class=%u\n", rc, *class);
+       return rc;
+}
+
+
 #ifdef CONFIG_PM
 static int ahci_pci_device_suspend(struct pci_dev *pdev, pm_message_t mesg)
 {
index 23716dd..5928d07 100644 (file)
@@ -45,7 +45,7 @@ static void ahci_mvebu_mbus_config(struct ahci_host_priv *hpriv,
                writel((cs->mbus_attr << 8) |
                       (dram->mbus_dram_target_id << 4) | 1,
                       hpriv->mmio + AHCI_WINDOW_CTRL(i));
-               writel(cs->base, hpriv->mmio + AHCI_WINDOW_BASE(i));
+               writel(cs->base >> 16, hpriv->mmio + AHCI_WINDOW_BASE(i));
                writel(((cs->size - 1) & 0xffff0000),
                       hpriv->mmio + AHCI_WINDOW_SIZE(i));
        }
index ea0ff00..8ff428f 100644 (file)
@@ -37,7 +37,6 @@ struct st_ahci_drv_data {
        struct reset_control *pwr;
        struct reset_control *sw_rst;
        struct reset_control *pwr_rst;
-       struct ahci_host_priv *hpriv;
 };
 
 static void st_ahci_configure_oob(void __iomem *mmio)
@@ -55,9 +54,10 @@ static void st_ahci_configure_oob(void __iomem *mmio)
        writel(new_val, mmio + ST_AHCI_OOBR);
 }
 
-static int st_ahci_deassert_resets(struct device *dev)
+static int st_ahci_deassert_resets(struct ahci_host_priv *hpriv,
+                               struct device *dev)
 {
-       struct st_ahci_drv_data *drv_data = dev_get_drvdata(dev);
+       struct st_ahci_drv_data *drv_data = hpriv->plat_data;
        int err;
 
        if (drv_data->pwr) {
@@ -90,8 +90,8 @@ static int st_ahci_deassert_resets(struct device *dev)
 static void st_ahci_host_stop(struct ata_host *host)
 {
        struct ahci_host_priv *hpriv = host->private_data;
+       struct st_ahci_drv_data *drv_data = hpriv->plat_data;
        struct device *dev = host->dev;
-       struct st_ahci_drv_data *drv_data = dev_get_drvdata(dev);
        int err;
 
        if (drv_data->pwr) {
@@ -103,29 +103,30 @@ static void st_ahci_host_stop(struct ata_host *host)
        ahci_platform_disable_resources(hpriv);
 }
 
-static int st_ahci_probe_resets(struct platform_device *pdev)
+static int st_ahci_probe_resets(struct ahci_host_priv *hpriv,
+                               struct device *dev)
 {
-       struct st_ahci_drv_data *drv_data = platform_get_drvdata(pdev);
+       struct st_ahci_drv_data *drv_data = hpriv->plat_data;
 
-       drv_data->pwr = devm_reset_control_get(&pdev->dev, "pwr-dwn");
+       drv_data->pwr = devm_reset_control_get(dev, "pwr-dwn");
        if (IS_ERR(drv_data->pwr)) {
-               dev_info(&pdev->dev, "power reset control not defined\n");
+               dev_info(dev, "power reset control not defined\n");
                drv_data->pwr = NULL;
        }
 
-       drv_data->sw_rst = devm_reset_control_get(&pdev->dev, "sw-rst");
+       drv_data->sw_rst = devm_reset_control_get(dev, "sw-rst");
        if (IS_ERR(drv_data->sw_rst)) {
-               dev_info(&pdev->dev, "soft reset control not defined\n");
+               dev_info(dev, "soft reset control not defined\n");
                drv_data->sw_rst = NULL;
        }
 
-       drv_data->pwr_rst = devm_reset_control_get(&pdev->dev, "pwr-rst");
+       drv_data->pwr_rst = devm_reset_control_get(dev, "pwr-rst");
        if (IS_ERR(drv_data->pwr_rst)) {
-               dev_dbg(&pdev->dev, "power soft reset control not defined\n");
+               dev_dbg(dev, "power soft reset control not defined\n");
                drv_data->pwr_rst = NULL;
        }
 
-       return st_ahci_deassert_resets(&pdev->dev);
+       return st_ahci_deassert_resets(hpriv, dev);
 }
 
 static struct ata_port_operations st_ahci_port_ops = {
@@ -154,15 +155,12 @@ static int st_ahci_probe(struct platform_device *pdev)
        if (!drv_data)
                return -ENOMEM;
 
-       platform_set_drvdata(pdev, drv_data);
-
        hpriv = ahci_platform_get_resources(pdev);
        if (IS_ERR(hpriv))
                return PTR_ERR(hpriv);
+       hpriv->plat_data = drv_data;
 
-       drv_data->hpriv = hpriv;
-
-       err = st_ahci_probe_resets(pdev);
+       err = st_ahci_probe_resets(hpriv, &pdev->dev);
        if (err)
                return err;
 
@@ -170,7 +168,7 @@ static int st_ahci_probe(struct platform_device *pdev)
        if (err)
                return err;
 
-       st_ahci_configure_oob(drv_data->hpriv->mmio);
+       st_ahci_configure_oob(hpriv->mmio);
 
        err = ahci_platform_init_host(pdev, hpriv, &st_ahci_port_info,
                                      &ahci_platform_sht);
@@ -185,8 +183,9 @@ static int st_ahci_probe(struct platform_device *pdev)
 #ifdef CONFIG_PM_SLEEP
 static int st_ahci_suspend(struct device *dev)
 {
-       struct st_ahci_drv_data *drv_data = dev_get_drvdata(dev);
-       struct ahci_host_priv *hpriv = drv_data->hpriv;
+       struct ata_host *host = dev_get_drvdata(dev);
+       struct ahci_host_priv *hpriv = host->private_data;
+       struct st_ahci_drv_data *drv_data = hpriv->plat_data;
        int err;
 
        err = ahci_platform_suspend_host(dev);
@@ -208,21 +207,21 @@ static int st_ahci_suspend(struct device *dev)
 
 static int st_ahci_resume(struct device *dev)
 {
-       struct st_ahci_drv_data *drv_data = dev_get_drvdata(dev);
-       struct ahci_host_priv *hpriv = drv_data->hpriv;
+       struct ata_host *host = dev_get_drvdata(dev);
+       struct ahci_host_priv *hpriv = host->private_data;
        int err;
 
        err = ahci_platform_enable_resources(hpriv);
        if (err)
                return err;
 
-       err = st_ahci_deassert_resets(dev);
+       err = st_ahci_deassert_resets(hpriv, dev);
        if (err) {
                ahci_platform_disable_resources(hpriv);
                return err;
        }
 
-       st_ahci_configure_oob(drv_data->hpriv->mmio);
+       st_ahci_configure_oob(hpriv->mmio);
 
        return ahci_platform_resume_host(dev);
 }
index 61a9c07..287c4ba 100644 (file)
@@ -1707,8 +1707,7 @@ static void ahci_handle_port_interrupt(struct ata_port *ap,
        if (unlikely(resetting))
                status &= ~PORT_IRQ_BAD_PMP;
 
-       /* if LPM is enabled, PHYRDY doesn't mean anything */
-       if (ap->link.lpm_policy > ATA_LPM_MAX_POWER) {
+       if (sata_lpm_ignore_phy_events(&ap->link)) {
                status &= ~PORT_IRQ_PHYRDY;
                ahci_scr_write(&ap->link, SCR_ERROR, SERR_PHYRDY_CHG);
        }
index f6cb1f1..577849c 100644 (file)
@@ -4235,7 +4235,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
                                                ATA_HORKAGE_ZERO_AFTER_TRIM, },
        { "Crucial_CT*MX100*",          "MU01", ATA_HORKAGE_NO_NCQ_TRIM |
                                                ATA_HORKAGE_ZERO_AFTER_TRIM, },
-       { "Samsung SSD 850 PRO*",       NULL,   ATA_HORKAGE_NO_NCQ_TRIM |
+       { "Samsung SSD 8*",             NULL,   ATA_HORKAGE_NO_NCQ_TRIM |
                                                ATA_HORKAGE_ZERO_AFTER_TRIM, },
 
        /*
@@ -6752,6 +6752,38 @@ u32 ata_wait_register(struct ata_port *ap, void __iomem *reg, u32 mask, u32 val,
        return tmp;
 }
 
+/**
+ *     sata_lpm_ignore_phy_events - test if PHY event should be ignored
+ *     @link: Link receiving the event
+ *
+ *     Test whether the received PHY event has to be ignored or not.
+ *
+ *     LOCKING:
+ *     None:
+ *
+ *     RETURNS:
+ *     True if the event has to be ignored.
+ */
+bool sata_lpm_ignore_phy_events(struct ata_link *link)
+{
+       unsigned long lpm_timeout = link->last_lpm_change +
+                                   msecs_to_jiffies(ATA_TMOUT_SPURIOUS_PHY);
+
+       /* if LPM is enabled, PHYRDY doesn't mean anything */
+       if (link->lpm_policy > ATA_LPM_MAX_POWER)
+               return true;
+
+       /* ignore the first PHY event after the LPM policy changed
+        * as it is might be spurious
+        */
+       if ((link->flags & ATA_LFLAG_CHANGED) &&
+           time_before(jiffies, lpm_timeout))
+               return true;
+
+       return false;
+}
+EXPORT_SYMBOL_GPL(sata_lpm_ignore_phy_events);
+
 /*
  * Dummy port_ops
  */
index 07f41be..cf0022e 100644 (file)
@@ -3597,6 +3597,9 @@ static int ata_eh_set_lpm(struct ata_link *link, enum ata_lpm_policy policy,
                }
        }
 
+       link->last_lpm_change = jiffies;
+       link->flags |= ATA_LFLAG_CHANGED;
+
        return 0;
 
 fail:
index 80a8054..2724595 100644 (file)
@@ -1053,7 +1053,7 @@ static struct of_device_id octeon_cf_match[] = {
        },
        {},
 };
-MODULE_DEVICE_TABLE(of, octeon_i2c_match);
+MODULE_DEVICE_TABLE(of, octeon_cf_match);
 
 static struct platform_driver octeon_cf_driver = {
        .probe          = octeon_cf_probe,
diff --git a/drivers/ata/pata_scc.c b/drivers/ata/pata_scc.c
deleted file mode 100644 (file)
index 5cd60d6..0000000
+++ /dev/null
@@ -1,1110 +0,0 @@
-/*
- * Support for IDE interfaces on Celleb platform
- *
- * (C) Copyright 2006 TOSHIBA CORPORATION
- *
- * This code is based on drivers/ata/ata_piix.c:
- *  Copyright 2003-2005 Red Hat Inc
- *  Copyright 2003-2005 Jeff Garzik
- *  Copyright (C) 1998-1999 Andrzej Krzysztofowicz, Author and Maintainer
- *  Copyright (C) 1998-2000 Andre Hedrick <andre@linux-ide.org>
- *  Copyright (C) 2003 Red Hat Inc
- *
- * and drivers/ata/ahci.c:
- *  Copyright 2004-2005 Red Hat, Inc.
- *
- * and drivers/ata/libata-core.c:
- *  Copyright 2003-2004 Red Hat, Inc.  All rights reserved.
- *  Copyright 2003-2004 Jeff Garzik
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/blkdev.h>
-#include <linux/delay.h>
-#include <linux/device.h>
-#include <scsi/scsi_host.h>
-#include <linux/libata.h>
-
-#define DRV_NAME               "pata_scc"
-#define DRV_VERSION            "0.3"
-
-#define PCI_DEVICE_ID_TOSHIBA_SCC_ATA          0x01b4
-
-/* PCI BARs */
-#define SCC_CTRL_BAR           0
-#define SCC_BMID_BAR           1
-
-/* offset of CTRL registers */
-#define SCC_CTL_PIOSHT         0x000
-#define SCC_CTL_PIOCT          0x004
-#define SCC_CTL_MDMACT         0x008
-#define SCC_CTL_MCRCST         0x00C
-#define SCC_CTL_SDMACT         0x010
-#define SCC_CTL_SCRCST         0x014
-#define SCC_CTL_UDENVT         0x018
-#define SCC_CTL_TDVHSEL        0x020
-#define SCC_CTL_MODEREG        0x024
-#define SCC_CTL_ECMODE         0xF00
-#define SCC_CTL_MAEA0          0xF50
-#define SCC_CTL_MAEC0          0xF54
-#define SCC_CTL_CCKCTRL        0xFF0
-
-/* offset of BMID registers */
-#define SCC_DMA_CMD            0x000
-#define SCC_DMA_STATUS         0x004
-#define SCC_DMA_TABLE_OFS      0x008
-#define SCC_DMA_INTMASK        0x010
-#define SCC_DMA_INTST          0x014
-#define SCC_DMA_PTERADD        0x018
-#define SCC_REG_CMD_ADDR       0x020
-#define SCC_REG_DATA           0x000
-#define SCC_REG_ERR            0x004
-#define SCC_REG_FEATURE        0x004
-#define SCC_REG_NSECT          0x008
-#define SCC_REG_LBAL           0x00C
-#define SCC_REG_LBAM           0x010
-#define SCC_REG_LBAH           0x014
-#define SCC_REG_DEVICE         0x018
-#define SCC_REG_STATUS         0x01C
-#define SCC_REG_CMD            0x01C
-#define SCC_REG_ALTSTATUS      0x020
-
-/* register value */
-#define TDVHSEL_MASTER         0x00000001
-#define TDVHSEL_SLAVE          0x00000004
-
-#define MODE_JCUSFEN           0x00000080
-
-#define ECMODE_VALUE           0x01
-
-#define CCKCTRL_ATARESET       0x00040000
-#define CCKCTRL_BUFCNT         0x00020000
-#define CCKCTRL_CRST           0x00010000
-#define CCKCTRL_OCLKEN         0x00000100
-#define CCKCTRL_ATACLKOEN      0x00000002
-#define CCKCTRL_LCLKEN         0x00000001
-
-#define QCHCD_IOS_SS           0x00000001
-
-#define QCHSD_STPDIAG          0x00020000
-
-#define INTMASK_MSK            0xD1000012
-#define INTSTS_SERROR          0x80000000
-#define INTSTS_PRERR           0x40000000
-#define INTSTS_RERR            0x10000000
-#define INTSTS_ICERR           0x01000000
-#define INTSTS_BMSINT          0x00000010
-#define INTSTS_BMHE            0x00000008
-#define INTSTS_IOIRQS          0x00000004
-#define INTSTS_INTRQ           0x00000002
-#define INTSTS_ACTEINT         0x00000001
-
-
-/* PIO transfer mode table */
-/* JCHST */
-static const unsigned long JCHSTtbl[2][7] = {
-       {0x0E, 0x05, 0x02, 0x03, 0x02, 0x00, 0x00},     /* 100MHz */
-       {0x13, 0x07, 0x04, 0x04, 0x03, 0x00, 0x00}      /* 133MHz */
-};
-
-/* JCHHT */
-static const unsigned long JCHHTtbl[2][7] = {
-       {0x0E, 0x02, 0x02, 0x02, 0x02, 0x00, 0x00},     /* 100MHz */
-       {0x13, 0x03, 0x03, 0x03, 0x03, 0x00, 0x00}      /* 133MHz */
-};
-
-/* JCHCT */
-static const unsigned long JCHCTtbl[2][7] = {
-       {0x1D, 0x1D, 0x1C, 0x0B, 0x06, 0x00, 0x00},     /* 100MHz */
-       {0x27, 0x26, 0x26, 0x0E, 0x09, 0x00, 0x00}      /* 133MHz */
-};
-
-/* DMA transfer mode  table */
-/* JCHDCTM/JCHDCTS */
-static const unsigned long JCHDCTxtbl[2][7] = {
-       {0x0A, 0x06, 0x04, 0x03, 0x01, 0x00, 0x00},     /* 100MHz */
-       {0x0E, 0x09, 0x06, 0x04, 0x02, 0x01, 0x00}      /* 133MHz */
-};
-
-/* JCSTWTM/JCSTWTS  */
-static const unsigned long JCSTWTxtbl[2][7] = {
-       {0x06, 0x04, 0x03, 0x02, 0x02, 0x02, 0x00},     /* 100MHz */
-       {0x09, 0x06, 0x04, 0x02, 0x02, 0x02, 0x02}      /* 133MHz */
-};
-
-/* JCTSS */
-static const unsigned long JCTSStbl[2][7] = {
-       {0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x00},     /* 100MHz */
-       {0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05}      /* 133MHz */
-};
-
-/* JCENVT */
-static const unsigned long JCENVTtbl[2][7] = {
-       {0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00},     /* 100MHz */
-       {0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02}      /* 133MHz */
-};
-
-/* JCACTSELS/JCACTSELM */
-static const unsigned long JCACTSELtbl[2][7] = {
-       {0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00},     /* 100MHz */
-       {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}      /* 133MHz */
-};
-
-static const struct pci_device_id scc_pci_tbl[] = {
-       { PCI_VDEVICE(TOSHIBA_2, PCI_DEVICE_ID_TOSHIBA_SCC_ATA), 0},
-       { }     /* terminate list */
-};
-
-/**
- *     scc_set_piomode - Initialize host controller PATA PIO timings
- *     @ap: Port whose timings we are configuring
- *     @adev: um
- *
- *     Set PIO mode for device.
- *
- *     LOCKING:
- *     None (inherited from caller).
- */
-
-static void scc_set_piomode (struct ata_port *ap, struct ata_device *adev)
-{
-       unsigned int pio = adev->pio_mode - XFER_PIO_0;
-       void __iomem *ctrl_base = ap->host->iomap[SCC_CTRL_BAR];
-       void __iomem *cckctrl_port = ctrl_base + SCC_CTL_CCKCTRL;
-       void __iomem *piosht_port = ctrl_base + SCC_CTL_PIOSHT;
-       void __iomem *pioct_port = ctrl_base + SCC_CTL_PIOCT;
-       unsigned long reg;
-       int offset;
-
-       reg = in_be32(cckctrl_port);
-       if (reg & CCKCTRL_ATACLKOEN)
-               offset = 1;     /* 133MHz */
-       else
-               offset = 0;     /* 100MHz */
-
-       reg = JCHSTtbl[offset][pio] << 16 | JCHHTtbl[offset][pio];
-       out_be32(piosht_port, reg);
-       reg = JCHCTtbl[offset][pio];
-       out_be32(pioct_port, reg);
-}
-
-/**
- *     scc_set_dmamode - Initialize host controller PATA DMA timings
- *     @ap: Port whose timings we are configuring
- *     @adev: um
- *
- *     Set UDMA mode for device.
- *
- *     LOCKING:
- *     None (inherited from caller).
- */
-
-static void scc_set_dmamode (struct ata_port *ap, struct ata_device *adev)
-{
-       unsigned int udma = adev->dma_mode;
-       unsigned int is_slave = (adev->devno != 0);
-       u8 speed = udma;
-       void __iomem *ctrl_base = ap->host->iomap[SCC_CTRL_BAR];
-       void __iomem *cckctrl_port = ctrl_base + SCC_CTL_CCKCTRL;
-       void __iomem *mdmact_port = ctrl_base + SCC_CTL_MDMACT;
-       void __iomem *mcrcst_port = ctrl_base + SCC_CTL_MCRCST;
-       void __iomem *sdmact_port = ctrl_base + SCC_CTL_SDMACT;
-       void __iomem *scrcst_port = ctrl_base + SCC_CTL_SCRCST;
-       void __iomem *udenvt_port = ctrl_base + SCC_CTL_UDENVT;
-       void __iomem *tdvhsel_port = ctrl_base + SCC_CTL_TDVHSEL;
-       int offset, idx;
-
-       if (in_be32(cckctrl_port) & CCKCTRL_ATACLKOEN)
-               offset = 1;     /* 133MHz */
-       else
-               offset = 0;     /* 100MHz */
-
-       if (speed >= XFER_UDMA_0)
-               idx = speed - XFER_UDMA_0;
-       else
-               return;
-
-       if (is_slave) {
-               out_be32(sdmact_port, JCHDCTxtbl[offset][idx]);
-               out_be32(scrcst_port, JCSTWTxtbl[offset][idx]);
-               out_be32(tdvhsel_port,
-                        (in_be32(tdvhsel_port) & ~TDVHSEL_SLAVE) | (JCACTSELtbl[offset][idx] << 2));
-       } else {
-               out_be32(mdmact_port, JCHDCTxtbl[offset][idx]);
-               out_be32(mcrcst_port, JCSTWTxtbl[offset][idx]);
-               out_be32(tdvhsel_port,
-                        (in_be32(tdvhsel_port) & ~TDVHSEL_MASTER) | JCACTSELtbl[offset][idx]);
-       }
-       out_be32(udenvt_port,
-                JCTSStbl[offset][idx] << 16 | JCENVTtbl[offset][idx]);
-}
-
-unsigned long scc_mode_filter(struct ata_device *adev, unsigned long mask)
-{
-       /* errata A308 workaround: limit ATAPI UDMA mode to UDMA4 */
-       if (adev->class == ATA_DEV_ATAPI &&
-           (mask & (0xE0 << ATA_SHIFT_UDMA))) {
-               printk(KERN_INFO "%s: limit ATAPI UDMA to UDMA4\n", DRV_NAME);
-               mask &= ~(0xE0 << ATA_SHIFT_UDMA);
-       }
-       return mask;
-}
-
-/**
- *     scc_tf_load - send taskfile registers to host controller
- *     @ap: Port to which output is sent
- *     @tf: ATA taskfile register set
- *
- *     Note: Original code is ata_sff_tf_load().
- */
-
-static void scc_tf_load (struct ata_port *ap, const struct ata_taskfile *tf)
-{
-       struct ata_ioports *ioaddr = &ap->ioaddr;
-       unsigned int is_addr = tf->flags & ATA_TFLAG_ISADDR;
-
-       if (tf->ctl != ap->last_ctl) {
-               out_be32(ioaddr->ctl_addr, tf->ctl);
-               ap->last_ctl = tf->ctl;
-               ata_wait_idle(ap);
-       }
-
-       if (is_addr && (tf->flags & ATA_TFLAG_LBA48)) {
-               out_be32(ioaddr->feature_addr, tf->hob_feature);
-               out_be32(ioaddr->nsect_addr, tf->hob_nsect);
-               out_be32(ioaddr->lbal_addr, tf->hob_lbal);
-               out_be32(ioaddr->lbam_addr, tf->hob_lbam);
-               out_be32(ioaddr->lbah_addr, tf->hob_lbah);
-               VPRINTK("hob: feat 0x%X nsect 0x%X, lba 0x%X 0x%X 0x%X\n",
-                       tf->hob_feature,
-                       tf->hob_nsect,
-                       tf->hob_lbal,
-                       tf->hob_lbam,
-                       tf->hob_lbah);
-       }
-
-       if (is_addr) {
-               out_be32(ioaddr->feature_addr, tf->feature);
-               out_be32(ioaddr->nsect_addr, tf->nsect);
-               out_be32(ioaddr->lbal_addr, tf->lbal);
-               out_be32(ioaddr->lbam_addr, tf->lbam);
-               out_be32(ioaddr->lbah_addr, tf->lbah);
-               VPRINTK("feat 0x%X nsect 0x%X lba 0x%X 0x%X 0x%X\n",
-                       tf->feature,
-                       tf->nsect,
-                       tf->lbal,
-                       tf->lbam,
-                       tf->lbah);
-       }
-
-       if (tf->flags & ATA_TFLAG_DEVICE) {
-               out_be32(ioaddr->device_addr, tf->device);
-               VPRINTK("device 0x%X\n", tf->device);
-       }
-
-       ata_wait_idle(ap);
-}
-
-/**
- *     scc_check_status - Read device status reg & clear interrupt
- *     @ap: port where the device is
- *
- *     Note: Original code is ata_check_status().
- */
-
-static u8 scc_check_status (struct ata_port *ap)
-{
-       return in_be32(ap->ioaddr.status_addr);
-}
-
-/**
- *     scc_tf_read - input device's ATA taskfile shadow registers
- *     @ap: Port from which input is read
- *     @tf: ATA taskfile register set for storing input
- *
- *     Note: Original code is ata_sff_tf_read().
- */
-
-static void scc_tf_read (struct ata_port *ap, struct ata_taskfile *tf)
-{
-       struct ata_ioports *ioaddr = &ap->ioaddr;
-
-       tf->command = scc_check_status(ap);
-       tf->feature = in_be32(ioaddr->error_addr);
-       tf->nsect = in_be32(ioaddr->nsect_addr);
-       tf->lbal = in_be32(ioaddr->lbal_addr);
-       tf->lbam = in_be32(ioaddr->lbam_addr);
-       tf->lbah = in_be32(ioaddr->lbah_addr);
-       tf->device = in_be32(ioaddr->device_addr);
-
-       if (tf->flags & ATA_TFLAG_LBA48) {
-               out_be32(ioaddr->ctl_addr, tf->ctl | ATA_HOB);
-               tf->hob_feature = in_be32(ioaddr->error_addr);
-               tf->hob_nsect = in_be32(ioaddr->nsect_addr);
-               tf->hob_lbal = in_be32(ioaddr->lbal_addr);
-               tf->hob_lbam = in_be32(ioaddr->lbam_addr);
-               tf->hob_lbah = in_be32(ioaddr->lbah_addr);
-               out_be32(ioaddr->ctl_addr, tf->ctl);
-               ap->last_ctl = tf->ctl;
-       }
-}
-
-/**
- *     scc_exec_command - issue ATA command to host controller
- *     @ap: port to which command is being issued
- *     @tf: ATA taskfile register set
- *
- *     Note: Original code is ata_sff_exec_command().
- */
-
-static void scc_exec_command (struct ata_port *ap,
-                             const struct ata_taskfile *tf)
-{
-       DPRINTK("ata%u: cmd 0x%X\n", ap->print_id, tf->command);
-
-       out_be32(ap->ioaddr.command_addr, tf->command);
-       ata_sff_pause(ap);
-}
-
-/**
- *     scc_check_altstatus - Read device alternate status reg
- *     @ap: port where the device is
- */
-
-static u8 scc_check_altstatus (struct ata_port *ap)
-{
-       return in_be32(ap->ioaddr.altstatus_addr);
-}
-
-/**
- *     scc_dev_select - Select device 0/1 on ATA bus
- *     @ap: ATA channel to manipulate
- *     @device: ATA device (numbered from zero) to select
- *
- *     Note: Original code is ata_sff_dev_select().
- */
-
-static void scc_dev_select (struct ata_port *ap, unsigned int device)
-{
-       u8 tmp;
-
-       if (device == 0)
-               tmp = ATA_DEVICE_OBS;
-       else
-               tmp = ATA_DEVICE_OBS | ATA_DEV1;
-
-       out_be32(ap->ioaddr.device_addr, tmp);
-       ata_sff_pause(ap);
-}
-
-/**
- *     scc_set_devctl - Write device control reg
- *     @ap: port where the device is
- *     @ctl: value to write
- */
-
-static void scc_set_devctl(struct ata_port *ap, u8 ctl)
-{
-       out_be32(ap->ioaddr.ctl_addr, ctl);
-}
-
-/**
- *     scc_bmdma_setup - Set up PCI IDE BMDMA transaction
- *     @qc: Info associated with this ATA transaction.
- *
- *     Note: Original code is ata_bmdma_setup().
- */
-
-static void scc_bmdma_setup (struct ata_queued_cmd *qc)
-{
-       struct ata_port *ap = qc->ap;
-       unsigned int rw = (qc->tf.flags & ATA_TFLAG_WRITE);
-       u8 dmactl;
-       void __iomem *mmio = ap->ioaddr.bmdma_addr;
-
-       /* load PRD table addr */
-       out_be32(mmio + SCC_DMA_TABLE_OFS, ap->bmdma_prd_dma);
-
-       /* specify data direction, triple-check start bit is clear */
-       dmactl = in_be32(mmio + SCC_DMA_CMD);
-       dmactl &= ~(ATA_DMA_WR | ATA_DMA_START);
-       if (!rw)
-               dmactl |= ATA_DMA_WR;
-       out_be32(mmio + SCC_DMA_CMD, dmactl);
-
-       /* issue r/w command */
-       ap->ops->sff_exec_command(ap, &qc->tf);
-}
-
-/**
- *     scc_bmdma_start - Start a PCI IDE BMDMA transaction
- *     @qc: Info associated with this ATA transaction.
- *
- *     Note: Original code is ata_bmdma_start().
- */
-
-static void scc_bmdma_start (struct ata_queued_cmd *qc)
-{
-       struct ata_port *ap = qc->ap;
-       u8 dmactl;
-       void __iomem *mmio = ap->ioaddr.bmdma_addr;
-
-       /* start host DMA transaction */
-       dmactl = in_be32(mmio + SCC_DMA_CMD);
-       out_be32(mmio + SCC_DMA_CMD, dmactl | ATA_DMA_START);
-}
-
-/**
- *     scc_devchk - PATA device presence detection
- *     @ap: ATA channel to examine
- *     @device: Device to examine (starting at zero)
- *
- *     Note: Original code is ata_devchk().
- */
-
-static unsigned int scc_devchk (struct ata_port *ap,
-                               unsigned int device)
-{
-       struct ata_ioports *ioaddr = &ap->ioaddr;
-       u8 nsect, lbal;
-
-       ap->ops->sff_dev_select(ap, device);
-
-       out_be32(ioaddr->nsect_addr, 0x55);
-       out_be32(ioaddr->lbal_addr, 0xaa);
-
-       out_be32(ioaddr->nsect_addr, 0xaa);
-       out_be32(ioaddr->lbal_addr, 0x55);
-
-       out_be32(ioaddr->nsect_addr, 0x55);
-       out_be32(ioaddr->lbal_addr, 0xaa);
-
-       nsect = in_be32(ioaddr->nsect_addr);
-       lbal = in_be32(ioaddr->lbal_addr);
-
-       if ((nsect == 0x55) && (lbal == 0xaa))
-               return 1;       /* we found a device */
-
-       return 0;               /* nothing found */
-}
-
-/**
- *     scc_wait_after_reset - wait for devices to become ready after reset
- *
- *     Note: Original code is ata_sff_wait_after_reset
- */
-
-static int scc_wait_after_reset(struct ata_link *link, unsigned int devmask,
-                               unsigned long deadline)
-{
-       struct ata_port *ap = link->ap;
-       struct ata_ioports *ioaddr = &ap->ioaddr;
-       unsigned int dev0 = devmask & (1 << 0);
-       unsigned int dev1 = devmask & (1 << 1);
-       int rc, ret = 0;
-
-       /* Spec mandates ">= 2ms" before checking status.  We wait
-        * 150ms, because that was the magic delay used for ATAPI
-        * devices in Hale Landis's ATADRVR, for the period of time
-        * between when the ATA command register is written, and then
-        * status is checked.  Because waiting for "a while" before
-        * checking status is fine, post SRST, we perform this magic
-        * delay here as well.
-        *
-        * Old drivers/ide uses the 2mS rule and then waits for ready.
-        */
-       ata_msleep(ap, 150);
-
-       /* always check readiness of the master device */
-       rc = ata_sff_wait_ready(link, deadline);
-       /* -ENODEV means the odd clown forgot the D7 pulldown resistor
-        * and TF status is 0xff, bail out on it too.
-        */
-       if (rc)
-               return rc;
-
-       /* if device 1 was found in ata_devchk, wait for register
-        * access briefly, then wait for BSY to clear.
-        */
-       if (dev1) {
-               int i;
-
-               ap->ops->sff_dev_select(ap, 1);
-
-               /* Wait for register access.  Some ATAPI devices fail
-                * to set nsect/lbal after reset, so don't waste too
-                * much time on it.  We're gonna wait for !BSY anyway.
-                */
-               for (i = 0; i < 2; i++) {
-                       u8 nsect, lbal;
-
-                       nsect = in_be32(ioaddr->nsect_addr);
-                       lbal = in_be32(ioaddr->lbal_addr);
-                       if ((nsect == 1) && (lbal == 1))
-                               break;
-                       ata_msleep(ap, 50);     /* give drive a breather */
-               }
-
-               rc = ata_sff_wait_ready(link, deadline);
-               if (rc) {
-                       if (rc != -ENODEV)
-                               return rc;
-                       ret = rc;
-               }
-       }
-
-       /* is all this really necessary? */
-       ap->ops->sff_dev_select(ap, 0);
-       if (dev1)
-               ap->ops->sff_dev_select(ap, 1);
-       if (dev0)
-               ap->ops->sff_dev_select(ap, 0);
-
-       return ret;
-}
-
-/**
- *     scc_bus_softreset - PATA device software reset
- *
- *     Note: Original code is ata_bus_softreset().
- */
-
-static int scc_bus_softreset(struct ata_port *ap, unsigned int devmask,
-                                      unsigned long deadline)
-{
-       struct ata_ioports *ioaddr = &ap->ioaddr;
-
-       DPRINTK("ata%u: bus reset via SRST\n", ap->print_id);
-
-       /* software reset.  causes dev0 to be selected */
-       out_be32(ioaddr->ctl_addr, ap->ctl);
-       udelay(20);
-       out_be32(ioaddr->ctl_addr, ap->ctl | ATA_SRST);
-       udelay(20);
-       out_be32(ioaddr->ctl_addr, ap->ctl);
-
-       return scc_wait_after_reset(&ap->link, devmask, deadline);
-}
-
-/**
- *     scc_softreset - reset host port via ATA SRST
- *     @ap: port to reset
- *     @classes: resulting classes of attached devices
- *     @deadline: deadline jiffies for the operation
- *
- *     Note: Original code is ata_sff_softreset().
- */
-
-static int scc_softreset(struct ata_link *link, unsigned int *classes,
-                        unsigned long deadline)
-{
-       struct ata_port *ap = link->ap;
-       unsigned int slave_possible = ap->flags & ATA_FLAG_SLAVE_POSS;
-       unsigned int devmask = 0;
-       int rc;
-       u8 err;
-
-       DPRINTK("ENTER\n");
-
-       /* determine if device 0/1 are present */
-       if (scc_devchk(ap, 0))
-               devmask |= (1 << 0);
-       if (slave_possible && scc_devchk(ap, 1))
-               devmask |= (1 << 1);
-
-       /* select device 0 again */
-       ap->ops->sff_dev_select(ap, 0);
-
-       /* issue bus reset */
-       DPRINTK("about to softreset, devmask=%x\n", devmask);
-       rc = scc_bus_softreset(ap, devmask, deadline);
-       if (rc) {
-               ata_port_err(ap, "SRST failed (err_mask=0x%x)\n", rc);
-               return -EIO;
-       }
-
-       /* determine by signature whether we have ATA or ATAPI devices */
-       classes[0] = ata_sff_dev_classify(&ap->link.device[0],
-                                         devmask & (1 << 0), &err);
-       if (slave_possible && err != 0x81)
-               classes[1] = ata_sff_dev_classify(&ap->link.device[1],
-                                                 devmask & (1 << 1), &err);
-
-       DPRINTK("EXIT, classes[0]=%u [1]=%u\n", classes[0], classes[1]);
-       return 0;
-}
-
-/**
- *     scc_bmdma_stop - Stop PCI IDE BMDMA transfer
- *     @qc: Command we are ending DMA for
- */
-
-static void scc_bmdma_stop (struct ata_queued_cmd *qc)
-{
-       struct ata_port *ap = qc->ap;
-       void __iomem *ctrl_base = ap->host->iomap[SCC_CTRL_BAR];
-       void __iomem *bmid_base = ap->host->iomap[SCC_BMID_BAR];
-       u32 reg;
-
-       while (1) {
-               reg = in_be32(bmid_base + SCC_DMA_INTST);
-
-               if (reg & INTSTS_SERROR) {
-                       printk(KERN_WARNING "%s: SERROR\n", DRV_NAME);
-                       out_be32(bmid_base + SCC_DMA_INTST, INTSTS_SERROR|INTSTS_BMSINT);
-                       out_be32(bmid_base + SCC_DMA_CMD,
-                                in_be32(bmid_base + SCC_DMA_CMD) & ~ATA_DMA_START);
-                       continue;
-               }
-
-               if (reg & INTSTS_PRERR) {
-                       u32 maea0, maec0;
-                       maea0 = in_be32(ctrl_base + SCC_CTL_MAEA0);
-                       maec0 = in_be32(ctrl_base + SCC_CTL_MAEC0);
-                       printk(KERN_WARNING "%s: PRERR [addr:%x cmd:%x]\n", DRV_NAME, maea0, maec0);
-                       out_be32(bmid_base + SCC_DMA_INTST, INTSTS_PRERR|INTSTS_BMSINT);
-                       out_be32(bmid_base + SCC_DMA_CMD,
-                                in_be32(bmid_base + SCC_DMA_CMD) & ~ATA_DMA_START);
-                       continue;
-               }
-
-               if (reg & INTSTS_RERR) {
-                       printk(KERN_WARNING "%s: Response Error\n", DRV_NAME);
-                       out_be32(bmid_base + SCC_DMA_INTST, INTSTS_RERR|INTSTS_BMSINT);
-                       out_be32(bmid_base + SCC_DMA_CMD,
-                                in_be32(bmid_base + SCC_DMA_CMD) & ~ATA_DMA_START);
-                       continue;
-               }
-
-               if (reg & INTSTS_ICERR) {
-                       out_be32(bmid_base + SCC_DMA_CMD,
-                                in_be32(bmid_base + SCC_DMA_CMD) & ~ATA_DMA_START);
-                       printk(KERN_WARNING "%s: Illegal Configuration\n", DRV_NAME);
-                       out_be32(bmid_base + SCC_DMA_INTST, INTSTS_ICERR|INTSTS_BMSINT);
-                       continue;
-               }
-
-               if (reg & INTSTS_BMSINT) {
-                       unsigned int classes;
-                       unsigned long deadline = ata_deadline(jiffies, ATA_TMOUT_BOOT);
-                       printk(KERN_WARNING "%s: Internal Bus Error\n", DRV_NAME);
-                       out_be32(bmid_base + SCC_DMA_INTST, INTSTS_BMSINT);
-                       /* TBD: SW reset */
-                       scc_softreset(&ap->link, &classes, deadline);
-                       continue;
-               }
-
-               if (reg & INTSTS_BMHE) {
-                       out_be32(bmid_base + SCC_DMA_INTST, INTSTS_BMHE);
-                       continue;
-               }
-
-               if (reg & INTSTS_ACTEINT) {
-                       out_be32(bmid_base + SCC_DMA_INTST, INTSTS_ACTEINT);
-                       continue;
-               }
-
-               if (reg & INTSTS_IOIRQS) {
-                       out_be32(bmid_base + SCC_DMA_INTST, INTSTS_IOIRQS);
-                       continue;
-               }
-               break;
-       }
-
-       /* clear start/stop bit */
-       out_be32(bmid_base + SCC_DMA_CMD,
-                in_be32(bmid_base + SCC_DMA_CMD) & ~ATA_DMA_START);
-
-       /* one-PIO-cycle guaranteed wait, per spec, for HDMA1:0 transition */
-       ata_sff_dma_pause(ap);  /* dummy read */
-}
-
-/**
- *     scc_bmdma_status - Read PCI IDE BMDMA status
- *     @ap: Port associated with this ATA transaction.
- */
-
-static u8 scc_bmdma_status (struct ata_port *ap)
-{
-       void __iomem *mmio = ap->ioaddr.bmdma_addr;
-       u8 host_stat = in_be32(mmio + SCC_DMA_STATUS);
-       u32 int_status = in_be32(mmio + SCC_DMA_INTST);
-       struct ata_queued_cmd *qc = ata_qc_from_tag(ap, ap->link.active_tag);
-       static int retry = 0;
-
-       /* return if IOS_SS is cleared */
-       if (!(in_be32(mmio + SCC_DMA_CMD) & ATA_DMA_START))
-               return host_stat;
-
-       /* errata A252,A308 workaround: Step4 */
-       if ((scc_check_altstatus(ap) & ATA_ERR)
-                                       && (int_status & INTSTS_INTRQ))
-               return (host_stat | ATA_DMA_INTR);
-
-       /* errata A308 workaround Step5 */
-       if (int_status & INTSTS_IOIRQS) {
-               host_stat |= ATA_DMA_INTR;
-
-               /* We don't check ATAPI DMA because it is limited to UDMA4 */
-               if ((qc->tf.protocol == ATA_PROT_DMA &&
-                    qc->dev->xfer_mode > XFER_UDMA_4)) {
-                       if (!(int_status & INTSTS_ACTEINT)) {
-                               printk(KERN_WARNING "ata%u: operation failed (transfer data loss)\n",
-                                      ap->print_id);
-                               host_stat |= ATA_DMA_ERR;
-                               if (retry++)
-                                       ap->udma_mask &= ~(1 << qc->dev->xfer_mode);
-                       } else
-                               retry = 0;
-               }
-       }
-
-       return host_stat;
-}
-
-/**
- *     scc_data_xfer - Transfer data by PIO
- *     @dev: device for this I/O
- *     @buf: data buffer
- *     @buflen: buffer length
- *     @rw: read/write
- *
- *     Note: Original code is ata_sff_data_xfer().
- */
-
-static unsigned int scc_data_xfer (struct ata_device *dev, unsigned char *buf,
-                                  unsigned int buflen, int rw)
-{
-       struct ata_port *ap = dev->link->ap;
-       unsigned int words = buflen >> 1;
-       unsigned int i;
-       __le16 *buf16 = (__le16 *) buf;
-       void __iomem *mmio = ap->ioaddr.data_addr;
-
-       /* Transfer multiple of 2 bytes */
-       if (rw == READ)
-               for (i = 0; i < words; i++)
-                       buf16[i] = cpu_to_le16(in_be32(mmio));
-       else
-               for (i = 0; i < words; i++)
-                       out_be32(mmio, le16_to_cpu(buf16[i]));
-
-       /* Transfer trailing 1 byte, if any. */
-       if (unlikely(buflen & 0x01)) {
-               __le16 align_buf[1] = { 0 };
-               unsigned char *trailing_buf = buf + buflen - 1;
-
-               if (rw == READ) {
-                       align_buf[0] = cpu_to_le16(in_be32(mmio));
-                       memcpy(trailing_buf, align_buf, 1);
-               } else {
-                       memcpy(align_buf, trailing_buf, 1);
-                       out_be32(mmio, le16_to_cpu(align_buf[0]));
-               }
-               words++;
-       }
-
-       return words << 1;
-}
-
-/**
- *     scc_postreset - standard postreset callback
- *     @ap: the target ata_port
- *     @classes: classes of attached devices
- *
- *     Note: Original code is ata_sff_postreset().
- */
-
-static void scc_postreset(struct ata_link *link, unsigned int *classes)
-{
-       struct ata_port *ap = link->ap;
-
-       DPRINTK("ENTER\n");
-
-       /* is double-select really necessary? */
-       if (classes[0] != ATA_DEV_NONE)
-               ap->ops->sff_dev_select(ap, 1);
-       if (classes[1] != ATA_DEV_NONE)
-               ap->ops->sff_dev_select(ap, 0);
-
-       /* bail out if no device is present */
-       if (classes[0] == ATA_DEV_NONE && classes[1] == ATA_DEV_NONE) {
-               DPRINTK("EXIT, no device\n");
-               return;
-       }
-
-       /* set up device control */
-       out_be32(ap->ioaddr.ctl_addr, ap->ctl);
-
-       DPRINTK("EXIT\n");
-}
-
-/**
- *     scc_irq_clear - Clear PCI IDE BMDMA interrupt.
- *     @ap: Port associated with this ATA transaction.
- *
- *     Note: Original code is ata_bmdma_irq_clear().
- */
-
-static void scc_irq_clear (struct ata_port *ap)
-{
-       void __iomem *mmio = ap->ioaddr.bmdma_addr;
-
-       if (!mmio)
-               return;
-
-       out_be32(mmio + SCC_DMA_STATUS, in_be32(mmio + SCC_DMA_STATUS));
-}
-
-/**
- *     scc_port_start - Set port up for dma.
- *     @ap: Port to initialize
- *
- *     Allocate space for PRD table using ata_bmdma_port_start().
- *     Set PRD table address for PTERADD. (PRD Transfer End Read)
- */
-
-static int scc_port_start (struct ata_port *ap)
-{
-       void __iomem *mmio = ap->ioaddr.bmdma_addr;
-       int rc;
-
-       rc = ata_bmdma_port_start(ap);
-       if (rc)
-               return rc;
-
-       out_be32(mmio + SCC_DMA_PTERADD, ap->bmdma_prd_dma);
-       return 0;
-}
-
-/**
- *     scc_port_stop - Undo scc_port_start()
- *     @ap: Port to shut down
- *
- *     Reset PTERADD.
- */
-
-static void scc_port_stop (struct ata_port *ap)
-{
-       void __iomem *mmio = ap->ioaddr.bmdma_addr;
-
-       out_be32(mmio + SCC_DMA_PTERADD, 0);
-}
-
-static struct scsi_host_template scc_sht = {
-       ATA_BMDMA_SHT(DRV_NAME),
-};
-
-static struct ata_port_operations scc_pata_ops = {
-       .inherits               = &ata_bmdma_port_ops,
-
-       .set_piomode            = scc_set_piomode,
-       .set_dmamode            = scc_set_dmamode,
-       .mode_filter            = scc_mode_filter,
-
-       .sff_tf_load            = scc_tf_load,
-       .sff_tf_read            = scc_tf_read,
-       .sff_exec_command       = scc_exec_command,
-       .sff_check_status       = scc_check_status,
-       .sff_check_altstatus    = scc_check_altstatus,
-       .sff_dev_select         = scc_dev_select,
-       .sff_set_devctl         = scc_set_devctl,
-
-       .bmdma_setup            = scc_bmdma_setup,
-       .bmdma_start            = scc_bmdma_start,
-       .bmdma_stop             = scc_bmdma_stop,
-       .bmdma_status           = scc_bmdma_status,
-       .sff_data_xfer          = scc_data_xfer,
-
-       .cable_detect           = ata_cable_80wire,
-       .softreset              = scc_softreset,
-       .postreset              = scc_postreset,
-
-       .sff_irq_clear          = scc_irq_clear,
-
-       .port_start             = scc_port_start,
-       .port_stop              = scc_port_stop,
-};
-
-static struct ata_port_info scc_port_info[] = {
-       {
-               .flags          = ATA_FLAG_SLAVE_POSS,
-               .pio_mask       = ATA_PIO4,
-               /* No MWDMA */
-               .udma_mask      = ATA_UDMA6,
-               .port_ops       = &scc_pata_ops,
-       },
-};
-
-/**
- *     scc_reset_controller - initialize SCC PATA controller.
- */
-
-static int scc_reset_controller(struct ata_host *host)
-{
-       void __iomem *ctrl_base = host->iomap[SCC_CTRL_BAR];
-       void __iomem *bmid_base = host->iomap[SCC_BMID_BAR];
-       void __iomem *cckctrl_port = ctrl_base + SCC_CTL_CCKCTRL;
-       void __iomem *mode_port = ctrl_base + SCC_CTL_MODEREG;
-       void __iomem *ecmode_port = ctrl_base + SCC_CTL_ECMODE;
-       void __iomem *intmask_port = bmid_base + SCC_DMA_INTMASK;
-       void __iomem *dmastatus_port = bmid_base + SCC_DMA_STATUS;
-       u32 reg = 0;
-
-       out_be32(cckctrl_port, reg);
-       reg |= CCKCTRL_ATACLKOEN;
-       out_be32(cckctrl_port, reg);
-       reg |= CCKCTRL_LCLKEN | CCKCTRL_OCLKEN;
-       out_be32(cckctrl_port, reg);
-       reg |= CCKCTRL_CRST;
-       out_be32(cckctrl_port, reg);
-
-       for (;;) {
-               reg = in_be32(cckctrl_port);
-               if (reg & CCKCTRL_CRST)
-                       break;
-               udelay(5000);
-       }
-
-       reg |= CCKCTRL_ATARESET;
-       out_be32(cckctrl_port, reg);
-       out_be32(ecmode_port, ECMODE_VALUE);
-       out_be32(mode_port, MODE_JCUSFEN);
-       out_be32(intmask_port, INTMASK_MSK);
-
-       if (in_be32(dmastatus_port) & QCHSD_STPDIAG) {
-               printk(KERN_WARNING "%s: failed to detect 80c cable. (PDIAG# is high)\n", DRV_NAME);
-               return -EIO;
-       }
-
-       return 0;
-}
-
-/**
- *     scc_setup_ports - initialize ioaddr with SCC PATA port offsets.
- *     @ioaddr: IO address structure to be initialized
- *     @base: base address of BMID region
- */
-
-static void scc_setup_ports (struct ata_ioports *ioaddr, void __iomem *base)
-{
-       ioaddr->cmd_addr = base + SCC_REG_CMD_ADDR;
-       ioaddr->altstatus_addr = ioaddr->cmd_addr + SCC_REG_ALTSTATUS;
-       ioaddr->ctl_addr = ioaddr->cmd_addr + SCC_REG_ALTSTATUS;
-       ioaddr->bmdma_addr = base;
-       ioaddr->data_addr = ioaddr->cmd_addr + SCC_REG_DATA;
-       ioaddr->error_addr = ioaddr->cmd_addr + SCC_REG_ERR;
-       ioaddr->feature_addr = ioaddr->cmd_addr + SCC_REG_FEATURE;
-       ioaddr->nsect_addr = ioaddr->cmd_addr + SCC_REG_NSECT;
-       ioaddr->lbal_addr = ioaddr->cmd_addr + SCC_REG_LBAL;
-       ioaddr->lbam_addr = ioaddr->cmd_addr + SCC_REG_LBAM;
-       ioaddr->lbah_addr = ioaddr->cmd_addr + SCC_REG_LBAH;
-       ioaddr->device_addr = ioaddr->cmd_addr + SCC_REG_DEVICE;
-       ioaddr->status_addr = ioaddr->cmd_addr + SCC_REG_STATUS;
-       ioaddr->command_addr = ioaddr->cmd_addr + SCC_REG_CMD;
-}
-
-static int scc_host_init(struct ata_host *host)
-{
-       struct pci_dev *pdev = to_pci_dev(host->dev);
-       int rc;
-
-       rc = scc_reset_controller(host);
-       if (rc)
-               return rc;
-
-       rc = dma_set_mask(&pdev->dev, ATA_DMA_MASK);
-       if (rc)
-               return rc;
-       rc = dma_set_coherent_mask(&pdev->dev, ATA_DMA_MASK);
-       if (rc)
-               return rc;
-
-       scc_setup_ports(&host->ports[0]->ioaddr, host->iomap[SCC_BMID_BAR]);
-
-       pci_set_master(pdev);
-
-       return 0;
-}
-
-/**
- *     scc_init_one - Register SCC PATA device with kernel services
- *     @pdev: PCI device to register
- *     @ent: Entry in scc_pci_tbl matching with @pdev
- *
- *     LOCKING:
- *     Inherited from PCI layer (may sleep).
- *
- *     RETURNS:
- *     Zero on success, or -ERRNO value.
- */
-
-static int scc_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
-{
-       unsigned int board_idx = (unsigned int) ent->driver_data;
-       const struct ata_port_info *ppi[] = { &scc_port_info[board_idx], NULL };
-       struct ata_host *host;
-       int rc;
-
-       ata_print_version_once(&pdev->dev, DRV_VERSION);
-
-       host = ata_host_alloc_pinfo(&pdev->dev, ppi, 1);
-       if (!host)
-               return -ENOMEM;
-
-       rc = pcim_enable_device(pdev);
-       if (rc)
-               return rc;
-
-       rc = pcim_iomap_regions(pdev, (1 << SCC_CTRL_BAR) | (1 << SCC_BMID_BAR), DRV_NAME);
-       if (rc == -EBUSY)
-               pcim_pin_device(pdev);
-       if (rc)
-               return rc;
-       host->iomap = pcim_iomap_table(pdev);
-
-       ata_port_pbar_desc(host->ports[0], SCC_CTRL_BAR, -1, "ctrl");
-       ata_port_pbar_desc(host->ports[0], SCC_BMID_BAR, -1, "bmid");
-
-       rc = scc_host_init(host);
-       if (rc)
-               return rc;
-
-       return ata_host_activate(host, pdev->irq, ata_bmdma_interrupt,
-                                IRQF_SHARED, &scc_sht);
-}
-
-static struct pci_driver scc_pci_driver = {
-       .name                   = DRV_NAME,
-       .id_table               = scc_pci_tbl,
-       .probe                  = scc_init_one,
-       .remove                 = ata_pci_remove_one,
-#ifdef CONFIG_PM_SLEEP
-       .suspend                = ata_pci_device_suspend,
-       .resume                 = ata_pci_device_resume,
-#endif
-};
-
-module_pci_driver(scc_pci_driver);
-
-MODULE_AUTHOR("Toshiba corp");
-MODULE_DESCRIPTION("SCSI low-level driver for Toshiba SCC PATA controller");
-MODULE_LICENSE("GPL");
-MODULE_DEVICE_TABLE(pci, scc_pci_tbl);
-MODULE_VERSION(DRV_VERSION);
index 9c2ba1c..df0c66c 100644 (file)
@@ -179,7 +179,7 @@ static int detect_cache_attributes(unsigned int cpu)
 {
        int ret;
 
-       if (init_cache_level(cpu))
+       if (init_cache_level(cpu) || !cache_leaves(cpu))
                return -ENOENT;
 
        per_cpu_cacheinfo(cpu) = kcalloc(cache_leaves(cpu),
index da033d3..48c0e22 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/device.h>
 #include <linux/init.h>
 #include <linux/memory.h>
+#include <linux/of.h>
 
 #include "base.h"
 
@@ -34,4 +35,5 @@ void __init driver_init(void)
        cpu_dev_init();
        memory_dev_init();
        container_dev_init();
+       of_core_init();
 }
index 6491f45..8b7d7f8 100644 (file)
@@ -61,7 +61,7 @@ static DEVICE_ATTR_RO(physical_package_id);
 define_id_show_func(core_id);
 static DEVICE_ATTR_RO(core_id);
 
-define_siblings_show_func(thread_siblings, thread_cpumask);
+define_siblings_show_func(thread_siblings, sibling_cpumask);
 static DEVICE_ATTR_RO(thread_siblings);
 static DEVICE_ATTR_RO(thread_siblings_list);
 
index eb1fed5..3ccef9e 100644 (file)
@@ -406,6 +406,7 @@ config BLK_DEV_RAM_DAX
 
 config BLK_DEV_PMEM
        tristate "Persistent memory block device support"
+       depends on HAS_IOMEM
        help
          Saying Y here will allow you to use a contiguous range of reserved
          memory as one or more persistent block devices.
index ae3fcb4..d7173cb 100644 (file)
@@ -1620,8 +1620,8 @@ out:
 
 static void loop_remove(struct loop_device *lo)
 {
-       del_gendisk(lo->lo_disk);
        blk_cleanup_queue(lo->lo_queue);
+       del_gendisk(lo->lo_disk);
        blk_mq_free_tag_set(&lo->tag_set);
        put_disk(lo->lo_disk);
        kfree(lo);
index 85b8036..683dff2 100644 (file)
@@ -1750,6 +1750,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
        struct nvme_iod *iod;
        dma_addr_t meta_dma = 0;
        void *meta = NULL;
+       void __user *metadata;
 
        if (copy_from_user(&io, uio, sizeof(io)))
                return -EFAULT;
@@ -1763,6 +1764,8 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
                meta_len = 0;
        }
 
+       metadata = (void __user *)(unsigned long)io.metadata;
+
        write = io.opcode & 1;
 
        switch (io.opcode) {
@@ -1786,13 +1789,13 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
        if (meta_len) {
                meta = dma_alloc_coherent(&dev->pci_dev->dev, meta_len,
                                                &meta_dma, GFP_KERNEL);
+
                if (!meta) {
                        status = -ENOMEM;
                        goto unmap;
                }
                if (write) {
-                       if (copy_from_user(meta, (void __user *)io.metadata,
-                                                               meta_len)) {
+                       if (copy_from_user(meta, metadata, meta_len)) {
                                status = -EFAULT;
                                goto unmap;
                        }
@@ -1819,8 +1822,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
        nvme_free_iod(dev, iod);
        if (meta) {
                if (status == NVME_SC_SUCCESS && !write) {
-                       if (copy_to_user((void __user *)io.metadata, meta,
-                                                               meta_len))
+                       if (copy_to_user(metadata, meta, meta_len))
                                status = -EFAULT;
                }
                dma_free_coherent(&dev->pci_dev->dev, meta_len, meta, meta_dma);
index 6b736b0..44f2514 100644 (file)
@@ -944,7 +944,8 @@ static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 static int nvme_trans_bdev_limits_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
                                        u8 *inq_response, int alloc_len)
 {
-       __be32 max_sectors = cpu_to_be32(queue_max_hw_sectors(ns->queue));
+       __be32 max_sectors = cpu_to_be32(
+               nvme_block_nr(ns, queue_max_hw_sectors(ns->queue)));
        __be32 max_discard = cpu_to_be32(ns->queue->limits.max_discard_sectors);
        __be32 discard_desc_count = cpu_to_be32(0x100);
 
@@ -2256,7 +2257,8 @@ static int nvme_trans_inquiry(struct nvme_ns *ns, struct sg_io_hdr *hdr,
        page_code = GET_INQ_PAGE_CODE(cmd);
        alloc_len = GET_INQ_ALLOC_LENGTH(cmd);
 
-       inq_response = kmalloc(alloc_len, GFP_KERNEL);
+       inq_response = kmalloc(max(alloc_len, STANDARD_INQUIRY_LENGTH),
+                               GFP_KERNEL);
        if (inq_response == NULL) {
                res = -ENOMEM;
                goto out_mem;
index bd2b3bb..713fc9f 100644 (file)
@@ -265,17 +265,6 @@ static void put_persistent_gnt(struct xen_blkif *blkif,
        atomic_dec(&blkif->persistent_gnt_in_use);
 }
 
-static void free_persistent_gnts_unmap_callback(int result,
-                                               struct gntab_unmap_queue_data *data)
-{
-       struct completion *c = data->data;
-
-       /* BUG_ON used to reproduce existing behaviour,
-          but is this the best way to deal with this? */
-       BUG_ON(result);
-       complete(c);
-}
-
 static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
                                  unsigned int num)
 {
@@ -285,12 +274,7 @@ static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
        struct rb_node *n;
        int segs_to_unmap = 0;
        struct gntab_unmap_queue_data unmap_data;
-       struct completion unmap_completion;
 
-       init_completion(&unmap_completion);
-
-       unmap_data.data = &unmap_completion;
-       unmap_data.done = &free_persistent_gnts_unmap_callback;
        unmap_data.pages = pages;
        unmap_data.unmap_ops = unmap;
        unmap_data.kunmap_ops = NULL;
@@ -310,8 +294,7 @@ static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
                        !rb_next(&persistent_gnt->node)) {
 
                        unmap_data.count = segs_to_unmap;
-                       gnttab_unmap_refs_async(&unmap_data);
-                       wait_for_completion(&unmap_completion);
+                       BUG_ON(gnttab_unmap_refs_sync(&unmap_data));
 
                        put_free_pages(blkif, pages, segs_to_unmap);
                        segs_to_unmap = 0;
@@ -329,8 +312,13 @@ void xen_blkbk_unmap_purged_grants(struct work_struct *work)
        struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
        struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
        struct persistent_gnt *persistent_gnt;
-       int ret, segs_to_unmap = 0;
+       int segs_to_unmap = 0;
        struct xen_blkif *blkif = container_of(work, typeof(*blkif), persistent_purge_work);
+       struct gntab_unmap_queue_data unmap_data;
+
+       unmap_data.pages = pages;
+       unmap_data.unmap_ops = unmap;
+       unmap_data.kunmap_ops = NULL;
 
        while(!list_empty(&blkif->persistent_purge_list)) {
                persistent_gnt = list_first_entry(&blkif->persistent_purge_list,
@@ -346,17 +334,16 @@ void xen_blkbk_unmap_purged_grants(struct work_struct *work)
                pages[segs_to_unmap] = persistent_gnt->page;
 
                if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
-                       ret = gnttab_unmap_refs(unmap, NULL, pages,
-                               segs_to_unmap);
-                       BUG_ON(ret);
+                       unmap_data.count = segs_to_unmap;
+                       BUG_ON(gnttab_unmap_refs_sync(&unmap_data));
                        put_free_pages(blkif, pages, segs_to_unmap);
                        segs_to_unmap = 0;
                }
                kfree(persistent_gnt);
        }
        if (segs_to_unmap > 0) {
-               ret = gnttab_unmap_refs(unmap, NULL, pages, segs_to_unmap);
-               BUG_ON(ret);
+               unmap_data.count = segs_to_unmap;
+               BUG_ON(gnttab_unmap_refs_sync(&unmap_data));
                put_free_pages(blkif, pages, segs_to_unmap);
        }
 }
index 8dcbced..6e134f4 100644 (file)
@@ -805,7 +805,9 @@ static void zram_reset_device(struct zram *zram)
        memset(&zram->stats, 0, sizeof(zram->stats));
        zram->disksize = 0;
        zram->max_comp_streams = 1;
+
        set_capacity(zram->disk, 0);
+       part_stat_set_all(&zram->disk->part0, 0);
 
        up_write(&zram->init_lock);
        /* I/O operation under all of CPU are done so let's free */
index 288547a..8c81af6 100644 (file)
@@ -88,6 +88,7 @@ static const struct usb_device_id ath3k_table[] = {
        { USB_DEVICE(0x04CA, 0x3007) },
        { USB_DEVICE(0x04CA, 0x3008) },
        { USB_DEVICE(0x04CA, 0x300b) },
+       { USB_DEVICE(0x04CA, 0x300f) },
        { USB_DEVICE(0x04CA, 0x3010) },
        { USB_DEVICE(0x0930, 0x0219) },
        { USB_DEVICE(0x0930, 0x0220) },
@@ -104,6 +105,7 @@ static const struct usb_device_id ath3k_table[] = {
        { USB_DEVICE(0x0cf3, 0xe003) },
        { USB_DEVICE(0x0CF3, 0xE004) },
        { USB_DEVICE(0x0CF3, 0xE005) },
+       { USB_DEVICE(0x0CF3, 0xE006) },
        { USB_DEVICE(0x13d3, 0x3362) },
        { USB_DEVICE(0x13d3, 0x3375) },
        { USB_DEVICE(0x13d3, 0x3393) },
@@ -143,6 +145,7 @@ static const struct usb_device_id ath3k_blist_tbl[] = {
        { USB_DEVICE(0x04ca, 0x3007), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x04ca, 0x3008), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x04ca, 0x300b), .driver_info = BTUSB_ATH3012 },
+       { USB_DEVICE(0x04ca, 0x300f), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x04ca, 0x3010), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0930, 0x0220), .driver_info = BTUSB_ATH3012 },
@@ -158,6 +161,7 @@ static const struct usb_device_id ath3k_blist_tbl[] = {
        { USB_DEVICE(0x0CF3, 0x817a), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0cf3, 0xe004), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0cf3, 0xe005), .driver_info = BTUSB_ATH3012 },
+       { USB_DEVICE(0x0cf3, 0xe006), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0cf3, 0xe003), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x13d3, 0x3362), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x13d3, 0x3375), .driver_info = BTUSB_ATH3012 },
index 4f7e8d4..6de97b3 100644 (file)
@@ -227,7 +227,6 @@ static void bt3c_receive(struct bt3c_info *info)
        iobase = info->p_dev->resource[0]->start;
 
        avail = bt3c_read(iobase, 0x7006);
-       //printk("bt3c_cs: receiving %d bytes\n", avail);
 
        bt3c_address(iobase, 0x7480);
        while (size < avail) {
@@ -250,7 +249,6 @@ static void bt3c_receive(struct bt3c_info *info)
 
                        bt_cb(info->rx_skb)->pkt_type = inb(iobase + DATA_L);
                        inb(iobase + DATA_H);
-                       //printk("bt3c: PACKET_TYPE=%02x\n", bt_cb(info->rx_skb)->pkt_type);
 
                        switch (bt_cb(info->rx_skb)->pkt_type) {
 
@@ -364,7 +362,6 @@ static irqreturn_t bt3c_interrupt(int irq, void *dev_inst)
                        if (stat & 0x0001)
                                bt3c_receive(info);
                        if (stat & 0x0002) {
-                               //BT_ERR("Ack (stat=0x%04x)", stat);
                                clear_bit(XMIT_SENDING, &(info->tx_state));
                                bt3c_write_wakeup(info);
                        }
index d0741f3..4bba866 100644 (file)
@@ -95,6 +95,78 @@ int btbcm_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr)
 }
 EXPORT_SYMBOL_GPL(btbcm_set_bdaddr);
 
+int btbcm_patchram(struct hci_dev *hdev, const char *firmware)
+{
+       const struct hci_command_hdr *cmd;
+       const struct firmware *fw;
+       const u8 *fw_ptr;
+       size_t fw_size;
+       struct sk_buff *skb;
+       u16 opcode;
+       int err;
+
+       err = request_firmware(&fw, firmware, &hdev->dev);
+       if (err < 0) {
+               BT_INFO("%s: BCM: Patch %s not found", hdev->name, firmware);
+               return err;
+       }
+
+       /* Start Download */
+       skb = __hci_cmd_sync(hdev, 0xfc2e, 0, NULL, HCI_INIT_TIMEOUT);
+       if (IS_ERR(skb)) {
+               err = PTR_ERR(skb);
+               BT_ERR("%s: BCM: Download Minidrv command failed (%d)",
+                      hdev->name, err);
+               goto done;
+       }
+       kfree_skb(skb);
+
+       /* 50 msec delay after Download Minidrv completes */
+       msleep(50);
+
+       fw_ptr = fw->data;
+       fw_size = fw->size;
+
+       while (fw_size >= sizeof(*cmd)) {
+               const u8 *cmd_param;
+
+               cmd = (struct hci_command_hdr *)fw_ptr;
+               fw_ptr += sizeof(*cmd);
+               fw_size -= sizeof(*cmd);
+
+               if (fw_size < cmd->plen) {
+                       BT_ERR("%s: BCM: Patch %s is corrupted", hdev->name,
+                              firmware);
+                       err = -EINVAL;
+                       goto done;
+               }
+
+               cmd_param = fw_ptr;
+               fw_ptr += cmd->plen;
+               fw_size -= cmd->plen;
+
+               opcode = le16_to_cpu(cmd->opcode);
+
+               skb = __hci_cmd_sync(hdev, opcode, cmd->plen, cmd_param,
+                                    HCI_INIT_TIMEOUT);
+               if (IS_ERR(skb)) {
+                       err = PTR_ERR(skb);
+                       BT_ERR("%s: BCM: Patch command %04x failed (%d)",
+                              hdev->name, opcode, err);
+                       goto done;
+               }
+               kfree_skb(skb);
+       }
+
+       /* 250 msec delay after Launch Ram completes */
+       msleep(250);
+
+done:
+       release_firmware(fw);
+       return err;
+}
+EXPORT_SYMBOL(btbcm_patchram);
+
 static int btbcm_reset(struct hci_dev *hdev)
 {
        struct sk_buff *skb;
@@ -198,12 +270,8 @@ static const struct {
 
 int btbcm_setup_patchram(struct hci_dev *hdev)
 {
-       const struct hci_command_hdr *cmd;
-       const struct firmware *fw;
-       const u8 *fw_ptr;
-       size_t fw_size;
        char fw_name[64];
-       u16 opcode, subver, rev, pid, vid;
+       u16 subver, rev, pid, vid;
        const char *hw_name = NULL;
        struct sk_buff *skb;
        struct hci_rp_read_local_version *ver;
@@ -273,74 +341,19 @@ int btbcm_setup_patchram(struct hci_dev *hdev)
                hw_name ? : "BCM", (subver & 0x7000) >> 13,
                (subver & 0x1f00) >> 8, (subver & 0x00ff), rev & 0x0fff);
 
-       err = request_firmware(&fw, fw_name, &hdev->dev);
-       if (err < 0) {
-               BT_INFO("%s: BCM: patch %s not found", hdev->name, fw_name);
+       err = btbcm_patchram(hdev, fw_name);
+       if (err == -ENOENT)
                return 0;
-       }
-
-       /* Start Download */
-       skb = __hci_cmd_sync(hdev, 0xfc2e, 0, NULL, HCI_INIT_TIMEOUT);
-       if (IS_ERR(skb)) {
-               err = PTR_ERR(skb);
-               BT_ERR("%s: BCM: Download Minidrv command failed (%d)",
-                      hdev->name, err);
-               goto reset;
-       }
-       kfree_skb(skb);
-
-       /* 50 msec delay after Download Minidrv completes */
-       msleep(50);
-
-       fw_ptr = fw->data;
-       fw_size = fw->size;
-
-       while (fw_size >= sizeof(*cmd)) {
-               const u8 *cmd_param;
-
-               cmd = (struct hci_command_hdr *)fw_ptr;
-               fw_ptr += sizeof(*cmd);
-               fw_size -= sizeof(*cmd);
-
-               if (fw_size < cmd->plen) {
-                       BT_ERR("%s: BCM: patch %s is corrupted", hdev->name,
-                              fw_name);
-                       err = -EINVAL;
-                       goto reset;
-               }
 
-               cmd_param = fw_ptr;
-               fw_ptr += cmd->plen;
-               fw_size -= cmd->plen;
-
-               opcode = le16_to_cpu(cmd->opcode);
-
-               skb = __hci_cmd_sync(hdev, opcode, cmd->plen, cmd_param,
-                                    HCI_INIT_TIMEOUT);
-               if (IS_ERR(skb)) {
-                       err = PTR_ERR(skb);
-                       BT_ERR("%s: BCM: patch command %04x failed (%d)",
-                              hdev->name, opcode, err);
-                       goto reset;
-               }
-               kfree_skb(skb);
-       }
-
-       /* 250 msec delay after Launch Ram completes */
-       msleep(250);
-
-reset:
        /* Reset */
        err = btbcm_reset(hdev);
        if (err)
-               goto done;
+               return err;
 
        /* Read Local Version Info */
        skb = btbcm_read_local_version(hdev);
-       if (IS_ERR(skb)) {
-               err = PTR_ERR(skb);
-               goto done;
-       }
+       if (IS_ERR(skb))
+               return PTR_ERR(skb);
 
        ver = (struct hci_rp_read_local_version *)skb->data;
        rev = le16_to_cpu(ver->hci_rev);
@@ -355,10 +368,7 @@ reset:
 
        set_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks);
 
-done:
-       release_firmware(fw);
-
-       return err;
+       return 0;
 }
 EXPORT_SYMBOL_GPL(btbcm_setup_patchram);
 
index 34268ae..eb6ab5f 100644 (file)
@@ -25,6 +25,7 @@
 
 int btbcm_check_bdaddr(struct hci_dev *hdev);
 int btbcm_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr);
+int btbcm_patchram(struct hci_dev *hdev, const char *firmware);
 
 int btbcm_setup_patchram(struct hci_dev *hdev);
 int btbcm_setup_apple(struct hci_dev *hdev);
@@ -41,6 +42,11 @@ static inline int btbcm_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr)
        return -EOPNOTSUPP;
 }
 
+static inline int btbcm_patchram(struct hci_dev *hdev, const char *firmware)
+{
+       return -EOPNOTSUPP;
+}
+
 static inline int btbcm_setup_patchram(struct hci_dev *hdev)
 {
        return 0;
index de7b236..3c10d4d 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/module.h>
 #include <linux/usb.h>
 #include <linux/firmware.h>
+#include <asm/unaligned.h>
 
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
@@ -57,6 +58,7 @@ static struct usb_driver btusb_driver;
 #define BTUSB_AMP              0x4000
 #define BTUSB_QCA_ROME         0x8000
 #define BTUSB_BCM_APPLE                0x10000
+#define BTUSB_REALTEK          0x20000
 
 static const struct usb_device_id btusb_table[] = {
        /* Generic Bluetooth USB device */
@@ -184,6 +186,7 @@ static const struct usb_device_id blacklist_table[] = {
        { USB_DEVICE(0x04ca, 0x3007), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x04ca, 0x3008), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x04ca, 0x300b), .driver_info = BTUSB_ATH3012 },
+       { USB_DEVICE(0x04ca, 0x300f), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x04ca, 0x3010), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0930, 0x0220), .driver_info = BTUSB_ATH3012 },
@@ -200,6 +203,7 @@ static const struct usb_device_id blacklist_table[] = {
        { USB_DEVICE(0x0cf3, 0xe003), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0cf3, 0xe004), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0cf3, 0xe005), .driver_info = BTUSB_ATH3012 },
+       { USB_DEVICE(0x0cf3, 0xe006), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x13d3, 0x3362), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x13d3, 0x3375), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x13d3, 0x3393), .driver_info = BTUSB_ATH3012 },
@@ -216,6 +220,7 @@ static const struct usb_device_id blacklist_table[] = {
        { USB_DEVICE(0x0489, 0xe03c), .driver_info = BTUSB_ATH3012 },
 
        /* QCA ROME chipset */
+       { USB_DEVICE(0x0cf3, 0xe007), .driver_info = BTUSB_QCA_ROME },
        { USB_DEVICE(0x0cf3, 0xe300), .driver_info = BTUSB_QCA_ROME },
        { USB_DEVICE(0x0cf3, 0xe360), .driver_info = BTUSB_QCA_ROME },
 
@@ -288,6 +293,28 @@ static const struct usb_device_id blacklist_table[] = {
        { USB_VENDOR_AND_INTERFACE_INFO(0x8087, 0xe0, 0x01, 0x01),
          .driver_info = BTUSB_IGNORE },
 
+       /* Realtek Bluetooth devices */
+       { USB_VENDOR_AND_INTERFACE_INFO(0x0bda, 0xe0, 0x01, 0x01),
+         .driver_info = BTUSB_REALTEK },
+
+       /* Additional Realtek 8723AE Bluetooth devices */
+       { USB_DEVICE(0x0930, 0x021d), .driver_info = BTUSB_REALTEK },
+       { USB_DEVICE(0x13d3, 0x3394), .driver_info = BTUSB_REALTEK },
+
+       /* Additional Realtek 8723BE Bluetooth devices */
+       { USB_DEVICE(0x0489, 0xe085), .driver_info = BTUSB_REALTEK },
+       { USB_DEVICE(0x0489, 0xe08b), .driver_info = BTUSB_REALTEK },
+       { USB_DEVICE(0x13d3, 0x3410), .driver_info = BTUSB_REALTEK },
+       { USB_DEVICE(0x13d3, 0x3416), .driver_info = BTUSB_REALTEK },
+       { USB_DEVICE(0x13d3, 0x3459), .driver_info = BTUSB_REALTEK },
+
+       /* Additional Realtek 8821AE Bluetooth devices */
+       { USB_DEVICE(0x0b05, 0x17dc), .driver_info = BTUSB_REALTEK },
+       { USB_DEVICE(0x13d3, 0x3414), .driver_info = BTUSB_REALTEK },
+       { USB_DEVICE(0x13d3, 0x3458), .driver_info = BTUSB_REALTEK },
+       { USB_DEVICE(0x13d3, 0x3461), .driver_info = BTUSB_REALTEK },
+       { USB_DEVICE(0x13d3, 0x3462), .driver_info = BTUSB_REALTEK },
+
        { }     /* Terminating entry */
 };
 
@@ -892,7 +919,7 @@ static int btusb_open(struct hci_dev *hdev)
         */
        if (data->setup_on_usb) {
                err = data->setup_on_usb(hdev);
-               if (err <0)
+               if (err < 0)
                        return err;
        }
 
@@ -1345,6 +1372,378 @@ static int btusb_setup_csr(struct hci_dev *hdev)
        return ret;
 }
 
+#define RTL_FRAG_LEN 252
+
+struct rtl_download_cmd {
+       __u8 index;
+       __u8 data[RTL_FRAG_LEN];
+} __packed;
+
+struct rtl_download_response {
+       __u8 status;
+       __u8 index;
+} __packed;
+
+struct rtl_rom_version_evt {
+       __u8 status;
+       __u8 version;
+} __packed;
+
+struct rtl_epatch_header {
+       __u8 signature[8];
+       __le32 fw_version;
+       __le16 num_patches;
+} __packed;
+
+#define RTL_EPATCH_SIGNATURE   "Realtech"
+#define RTL_ROM_LMP_3499       0x3499
+#define RTL_ROM_LMP_8723A      0x1200
+#define RTL_ROM_LMP_8723B      0x8723
+#define RTL_ROM_LMP_8821A      0x8821
+#define RTL_ROM_LMP_8761A      0x8761
+
+static int rtl_read_rom_version(struct hci_dev *hdev, u8 *version)
+{
+       struct rtl_rom_version_evt *rom_version;
+       struct sk_buff *skb;
+       int ret;
+
+       /* Read RTL ROM version command */
+       skb = __hci_cmd_sync(hdev, 0xfc6d, 0, NULL, HCI_INIT_TIMEOUT);
+       if (IS_ERR(skb)) {
+               BT_ERR("%s: Read ROM version failed (%ld)",
+                      hdev->name, PTR_ERR(skb));
+               return PTR_ERR(skb);
+       }
+
+       if (skb->len != sizeof(*rom_version)) {
+               BT_ERR("%s: RTL version event length mismatch", hdev->name);
+               kfree_skb(skb);
+               return -EIO;
+       }
+
+       rom_version = (struct rtl_rom_version_evt *)skb->data;
+       BT_INFO("%s: rom_version status=%x version=%x",
+               hdev->name, rom_version->status, rom_version->version);
+
+       ret = rom_version->status;
+       if (ret == 0)
+               *version = rom_version->version;
+
+       kfree_skb(skb);
+       return ret;
+}
+
+static int rtl8723b_parse_firmware(struct hci_dev *hdev, u16 lmp_subver,
+                                  const struct firmware *fw,
+                                  unsigned char **_buf)
+{
+       const u8 extension_sig[] = { 0x51, 0x04, 0xfd, 0x77 };
+       struct rtl_epatch_header *epatch_info;
+       unsigned char *buf;
+       int i, ret, len;
+       size_t min_size;
+       u8 opcode, length, data, rom_version = 0;
+       int project_id = -1;
+       const unsigned char *fwptr, *chip_id_base;
+       const unsigned char *patch_length_base, *patch_offset_base;
+       u32 patch_offset = 0;
+       u16 patch_length, num_patches;
+       const u16 project_id_to_lmp_subver[] = {
+               RTL_ROM_LMP_8723A,
+               RTL_ROM_LMP_8723B,
+               RTL_ROM_LMP_8821A,
+               RTL_ROM_LMP_8761A
+       };
+
+       ret = rtl_read_rom_version(hdev, &rom_version);
+       if (ret)
+               return -bt_to_errno(ret);
+
+       min_size = sizeof(struct rtl_epatch_header) + sizeof(extension_sig) + 3;
+       if (fw->size < min_size)
+               return -EINVAL;
+
+       fwptr = fw->data + fw->size - sizeof(extension_sig);
+       if (memcmp(fwptr, extension_sig, sizeof(extension_sig)) != 0) {
+               BT_ERR("%s: extension section signature mismatch", hdev->name);
+               return -EINVAL;
+       }
+
+       /* Loop from the end of the firmware parsing instructions, until
+        * we find an instruction that identifies the "project ID" for the
+        * hardware supported by this firwmare file.
+        * Once we have that, we double-check that that project_id is suitable
+        * for the hardware we are working with.
+        */
+       while (fwptr >= fw->data + (sizeof(struct rtl_epatch_header) + 3)) {
+               opcode = *--fwptr;
+               length = *--fwptr;
+               data = *--fwptr;
+
+               BT_DBG("check op=%x len=%x data=%x", opcode, length, data);
+
+               if (opcode == 0xff) /* EOF */
+                       break;
+
+               if (length == 0) {
+                       BT_ERR("%s: found instruction with length 0",
+                              hdev->name);
+                       return -EINVAL;
+               }
+
+               if (opcode == 0 && length == 1) {
+                       project_id = data;
+                       break;
+               }
+
+               fwptr -= length;
+       }
+
+       if (project_id < 0) {
+               BT_ERR("%s: failed to find version instruction", hdev->name);
+               return -EINVAL;
+       }
+
+       if (project_id >= ARRAY_SIZE(project_id_to_lmp_subver)) {
+               BT_ERR("%s: unknown project id %d", hdev->name, project_id);
+               return -EINVAL;
+       }
+
+       if (lmp_subver != project_id_to_lmp_subver[project_id]) {
+               BT_ERR("%s: firmware is for %x but this is a %x", hdev->name,
+                      project_id_to_lmp_subver[project_id], lmp_subver);
+               return -EINVAL;
+       }
+
+       epatch_info = (struct rtl_epatch_header *)fw->data;
+       if (memcmp(epatch_info->signature, RTL_EPATCH_SIGNATURE, 8) != 0) {
+               BT_ERR("%s: bad EPATCH signature", hdev->name);
+               return -EINVAL;
+       }
+
+       num_patches = le16_to_cpu(epatch_info->num_patches);
+       BT_DBG("fw_version=%x, num_patches=%d",
+              le32_to_cpu(epatch_info->fw_version), num_patches);
+
+       /* After the rtl_epatch_header there is a funky patch metadata section.
+        * Assuming 2 patches, the layout is:
+        * ChipID1 ChipID2 PatchLength1 PatchLength2 PatchOffset1 PatchOffset2
+        *
+        * Find the right patch for this chip.
+        */
+       min_size += 8 * num_patches;
+       if (fw->size < min_size)
+               return -EINVAL;
+
+       chip_id_base = fw->data + sizeof(struct rtl_epatch_header);
+       patch_length_base = chip_id_base + (sizeof(u16) * num_patches);
+       patch_offset_base = patch_length_base + (sizeof(u16) * num_patches);
+       for (i = 0; i < num_patches; i++) {
+               u16 chip_id = get_unaligned_le16(chip_id_base +
+                                                (i * sizeof(u16)));
+               if (chip_id == rom_version + 1) {
+                       patch_length = get_unaligned_le16(patch_length_base +
+                                                         (i * sizeof(u16)));
+                       patch_offset = get_unaligned_le32(patch_offset_base +
+                                                         (i * sizeof(u32)));
+                       break;
+               }
+       }
+
+       if (!patch_offset) {
+               BT_ERR("%s: didn't find patch for chip id %d",
+                      hdev->name, rom_version);
+               return -EINVAL;
+       }
+
+       BT_DBG("length=%x offset=%x index %d", patch_length, patch_offset, i);
+       min_size = patch_offset + patch_length;
+       if (fw->size < min_size)
+               return -EINVAL;
+
+       /* Copy the firmware into a new buffer and write the version at
+        * the end.
+        */
+       len = patch_length;
+       buf = kmemdup(fw->data + patch_offset, patch_length, GFP_KERNEL);
+       if (!buf)
+               return -ENOMEM;
+
+       memcpy(buf + patch_length - 4, &epatch_info->fw_version, 4);
+
+       *_buf = buf;
+       return len;
+}
+
+static int rtl_download_firmware(struct hci_dev *hdev,
+                                const unsigned char *data, int fw_len)
+{
+       struct rtl_download_cmd *dl_cmd;
+       int frag_num = fw_len / RTL_FRAG_LEN + 1;
+       int frag_len = RTL_FRAG_LEN;
+       int ret = 0;
+       int i;
+
+       dl_cmd = kmalloc(sizeof(struct rtl_download_cmd), GFP_KERNEL);
+       if (!dl_cmd)
+               return -ENOMEM;
+
+       for (i = 0; i < frag_num; i++) {
+               struct rtl_download_response *dl_resp;
+               struct sk_buff *skb;
+
+               BT_DBG("download fw (%d/%d)", i, frag_num);
+
+               dl_cmd->index = i;
+               if (i == (frag_num - 1)) {
+                       dl_cmd->index |= 0x80; /* data end */
+                       frag_len = fw_len % RTL_FRAG_LEN;
+               }
+               memcpy(dl_cmd->data, data, frag_len);
+
+               /* Send download command */
+               skb = __hci_cmd_sync(hdev, 0xfc20, frag_len + 1, dl_cmd,
+                                    HCI_INIT_TIMEOUT);
+               if (IS_ERR(skb)) {
+                       BT_ERR("%s: download fw command failed (%ld)",
+                              hdev->name, PTR_ERR(skb));
+                       ret = -PTR_ERR(skb);
+                       goto out;
+               }
+
+               if (skb->len != sizeof(*dl_resp)) {
+                       BT_ERR("%s: download fw event length mismatch",
+                              hdev->name);
+                       kfree_skb(skb);
+                       ret = -EIO;
+                       goto out;
+               }
+
+               dl_resp = (struct rtl_download_response *)skb->data;
+               if (dl_resp->status != 0) {
+                       kfree_skb(skb);
+                       ret = bt_to_errno(dl_resp->status);
+                       goto out;
+               }
+
+               kfree_skb(skb);
+               data += RTL_FRAG_LEN;
+       }
+
+out:
+       kfree(dl_cmd);
+       return ret;
+}
+
+static int btusb_setup_rtl8723a(struct hci_dev *hdev)
+{
+       struct btusb_data *data = dev_get_drvdata(&hdev->dev);
+       struct usb_device *udev = interface_to_usbdev(data->intf);
+       const struct firmware *fw;
+       int ret;
+
+       BT_INFO("%s: rtl: loading rtl_bt/rtl8723a_fw.bin", hdev->name);
+       ret = request_firmware(&fw, "rtl_bt/rtl8723a_fw.bin", &udev->dev);
+       if (ret < 0) {
+               BT_ERR("%s: Failed to load rtl_bt/rtl8723a_fw.bin", hdev->name);
+               return ret;
+       }
+
+       if (fw->size < 8) {
+               ret = -EINVAL;
+               goto out;
+       }
+
+       /* Check that the firmware doesn't have the epatch signature
+        * (which is only for RTL8723B and newer).
+        */
+       if (!memcmp(fw->data, RTL_EPATCH_SIGNATURE, 8)) {
+               BT_ERR("%s: unexpected EPATCH signature!", hdev->name);
+               ret = -EINVAL;
+               goto out;
+       }
+
+       ret = rtl_download_firmware(hdev, fw->data, fw->size);
+
+out:
+       release_firmware(fw);
+       return ret;
+}
+
+static int btusb_setup_rtl8723b(struct hci_dev *hdev, u16 lmp_subver,
+                               const char *fw_name)
+{
+       struct btusb_data *data = dev_get_drvdata(&hdev->dev);
+       struct usb_device *udev = interface_to_usbdev(data->intf);
+       unsigned char *fw_data = NULL;
+       const struct firmware *fw;
+       int ret;
+
+       BT_INFO("%s: rtl: loading %s", hdev->name, fw_name);
+       ret = request_firmware(&fw, fw_name, &udev->dev);
+       if (ret < 0) {
+               BT_ERR("%s: Failed to load %s", hdev->name, fw_name);
+               return ret;
+       }
+
+       ret = rtl8723b_parse_firmware(hdev, lmp_subver, fw, &fw_data);
+       if (ret < 0)
+               goto out;
+
+       ret = rtl_download_firmware(hdev, fw_data, ret);
+       kfree(fw_data);
+       if (ret < 0)
+               goto out;
+
+out:
+       release_firmware(fw);
+       return ret;
+}
+
+static int btusb_setup_realtek(struct hci_dev *hdev)
+{
+       struct sk_buff *skb;
+       struct hci_rp_read_local_version *resp;
+       u16 lmp_subver;
+
+       skb = btusb_read_local_version(hdev);
+       if (IS_ERR(skb))
+               return -PTR_ERR(skb);
+
+       resp = (struct hci_rp_read_local_version *)skb->data;
+       BT_INFO("%s: rtl: examining hci_ver=%02x hci_rev=%04x lmp_ver=%02x "
+               "lmp_subver=%04x", hdev->name, resp->hci_ver, resp->hci_rev,
+               resp->lmp_ver, resp->lmp_subver);
+
+       lmp_subver = le16_to_cpu(resp->lmp_subver);
+       kfree_skb(skb);
+
+       /* Match a set of subver values that correspond to stock firmware,
+        * which is not compatible with standard btusb.
+        * If matched, upload an alternative firmware that does conform to
+        * standard btusb. Once that firmware is uploaded, the subver changes
+        * to a different value.
+        */
+       switch (lmp_subver) {
+       case RTL_ROM_LMP_8723A:
+       case RTL_ROM_LMP_3499:
+               return btusb_setup_rtl8723a(hdev);
+       case RTL_ROM_LMP_8723B:
+               return btusb_setup_rtl8723b(hdev, lmp_subver,
+                                           "rtl_bt/rtl8723b_fw.bin");
+       case RTL_ROM_LMP_8821A:
+               return btusb_setup_rtl8723b(hdev, lmp_subver,
+                                           "rtl_bt/rtl8821a_fw.bin");
+       case RTL_ROM_LMP_8761A:
+               return btusb_setup_rtl8723b(hdev, lmp_subver,
+                                           "rtl_bt/rtl8761a_fw.bin");
+       default:
+               BT_INFO("rtl: assuming no firmware upload needed.");
+               return 0;
+       }
+}
+
 static const struct firmware *btusb_setup_intel_get_fw(struct hci_dev *hdev,
                                                       struct intel_version *ver)
 {
@@ -2577,7 +2976,7 @@ static int btusb_setup_qca(struct hci_dev *hdev)
        int i, err;
 
        err = btusb_qca_send_vendor_req(hdev, QCA_GET_TARGET_VERSION, &ver,
-                                       sizeof(ver));
+                                       sizeof(ver));
        if (err < 0)
                return err;
 
@@ -2776,6 +3175,9 @@ static int btusb_probe(struct usb_interface *intf,
                hdev->set_bdaddr = btusb_set_bdaddr_ath3012;
        }
 
+       if (id->driver_info & BTUSB_REALTEK)
+               hdev->setup = btusb_setup_realtek;
+
        if (id->driver_info & BTUSB_AMP) {
                /* AMP controllers do not support SCO packets */
                data->isoc = NULL;
index 1b3f864..ec8fa0e 100644 (file)
@@ -95,7 +95,6 @@ static void ath_hci_uart_work(struct work_struct *work)
        hci_uart_tx_wakeup(hu);
 }
 
-/* Initialize protocol */
 static int ath_open(struct hci_uart *hu)
 {
        struct ath_struct *ath;
@@ -116,8 +115,7 @@ static int ath_open(struct hci_uart *hu)
        return 0;
 }
 
-/* Flush protocol data */
-static int ath_flush(struct hci_uart *hu)
+static int ath_close(struct hci_uart *hu)
 {
        struct ath_struct *ath = hu->priv;
 
@@ -125,11 +123,17 @@ static int ath_flush(struct hci_uart *hu)
 
        skb_queue_purge(&ath->txq);
 
+       kfree_skb(ath->rx_skb);
+
+       cancel_work_sync(&ath->ctxtsw);
+
+       hu->priv = NULL;
+       kfree(ath);
+
        return 0;
 }
 
-/* Close protocol */
-static int ath_close(struct hci_uart *hu)
+static int ath_flush(struct hci_uart *hu)
 {
        struct ath_struct *ath = hu->priv;
 
@@ -137,19 +141,65 @@ static int ath_close(struct hci_uart *hu)
 
        skb_queue_purge(&ath->txq);
 
-       kfree_skb(ath->rx_skb);
+       return 0;
+}
 
-       cancel_work_sync(&ath->ctxtsw);
+static int ath_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr)
+{
+       struct sk_buff *skb;
+       u8 buf[10];
+       int err;
+
+       buf[0] = 0x01;
+       buf[1] = 0x01;
+       buf[2] = 0x00;
+       buf[3] = sizeof(bdaddr_t);
+       memcpy(buf + 4, bdaddr, sizeof(bdaddr_t));
+
+       skb = __hci_cmd_sync(hdev, 0xfc0b, sizeof(buf), buf, HCI_INIT_TIMEOUT);
+       if (IS_ERR(skb)) {
+               err = PTR_ERR(skb);
+               BT_ERR("%s: Change address command failed (%d)",
+                      hdev->name, err);
+               return err;
+       }
+       kfree_skb(skb);
 
-       hu->priv = NULL;
-       kfree(ath);
+       return 0;
+}
+
+static int ath_setup(struct hci_uart *hu)
+{
+       BT_DBG("hu %p", hu);
+
+       hu->hdev->set_bdaddr = ath_set_bdaddr;
 
        return 0;
 }
 
+static const struct h4_recv_pkt ath_recv_pkts[] = {
+       { H4_RECV_ACL,   .recv = hci_recv_frame },
+       { H4_RECV_SCO,   .recv = hci_recv_frame },
+       { H4_RECV_EVENT, .recv = hci_recv_frame },
+};
+
+static int ath_recv(struct hci_uart *hu, const void *data, int count)
+{
+       struct ath_struct *ath = hu->priv;
+
+       ath->rx_skb = h4_recv_buf(hu->hdev, ath->rx_skb, data, count,
+                                 ath_recv_pkts, ARRAY_SIZE(ath_recv_pkts));
+       if (IS_ERR(ath->rx_skb)) {
+               int err = PTR_ERR(ath->rx_skb);
+               BT_ERR("%s: Frame reassembly failed (%d)", hu->hdev->name, err);
+               return err;
+       }
+
+       return count;
+}
+
 #define HCI_OP_ATH_SLEEP 0xFC04
 
-/* Enqueue frame for transmittion */
 static int ath_enqueue(struct hci_uart *hu, struct sk_buff *skb)
 {
        struct ath_struct *ath = hu->priv;
@@ -159,8 +209,7 @@ static int ath_enqueue(struct hci_uart *hu, struct sk_buff *skb)
                return 0;
        }
 
-       /*
-        * Update power management enable flag with parameters of
+       /* Update power management enable flag with parameters of
         * HCI sleep enable vendor specific HCI command.
         */
        if (bt_cb(skb)->pkt_type == HCI_COMMAND_PKT) {
@@ -190,37 +239,16 @@ static struct sk_buff *ath_dequeue(struct hci_uart *hu)
        return skb_dequeue(&ath->txq);
 }
 
-static const struct h4_recv_pkt ath_recv_pkts[] = {
-       { H4_RECV_ACL,   .recv = hci_recv_frame },
-       { H4_RECV_SCO,   .recv = hci_recv_frame },
-       { H4_RECV_EVENT, .recv = hci_recv_frame },
-};
-
-/* Recv data */
-static int ath_recv(struct hci_uart *hu, const void *data, int count)
-{
-       struct ath_struct *ath = hu->priv;
-
-       ath->rx_skb = h4_recv_buf(hu->hdev, ath->rx_skb, data, count,
-                                 ath_recv_pkts, ARRAY_SIZE(ath_recv_pkts));
-       if (IS_ERR(ath->rx_skb)) {
-               int err = PTR_ERR(ath->rx_skb);
-               BT_ERR("%s: Frame reassembly failed (%d)", hu->hdev->name, err);
-               return err;
-       }
-
-       return count;
-}
-
 static const struct hci_uart_proto athp = {
        .id             = HCI_UART_ATH3K,
        .name           = "ATH3K",
        .open           = ath_open,
        .close          = ath_close,
+       .flush          = ath_flush,
+       .setup          = ath_setup,
        .recv           = ath_recv,
        .enqueue        = ath_enqueue,
        .dequeue        = ath_dequeue,
-       .flush          = ath_flush,
 };
 
 int __init ath_init(void)
index b854125..5340604 100644 (file)
@@ -660,7 +660,7 @@ validate_group(struct perf_event *event)
                 * Initialise the fake PMU. We only need to populate the
                 * used_mask for the purposes of validation.
                 */
-               .used_mask = CPU_BITS_NONE,
+               .used_mask = { 0 },
        };
 
        if (!validate_event(event->pmu, &fake_pmu, leader))
index 5bd792c..ab3bde1 100644 (file)
@@ -453,7 +453,7 @@ void __iomem *mips_cdmm_early_probe(unsigned int dev_type)
 
        /* Look for a specific device type */
        for (; drb < bus->drbs; drb += size + 1) {
-               acsr = readl(cdmm + drb * CDMM_DRB_SIZE);
+               acsr = __raw_readl(cdmm + drb * CDMM_DRB_SIZE);
                type = (acsr & CDMM_ACSR_DEVTYPE) >> CDMM_ACSR_DEVTYPE_SHIFT;
                if (type == dev_type)
                        return cdmm + drb * CDMM_DRB_SIZE;
@@ -500,7 +500,7 @@ static void mips_cdmm_bus_discover(struct mips_cdmm_bus *bus)
        bus->discovered = true;
        pr_info("cdmm%u discovery (%u blocks)\n", cpu, bus->drbs);
        for (; drb < bus->drbs; drb += size + 1) {
-               acsr = readl(cdmm + drb * CDMM_DRB_SIZE);
+               acsr = __raw_readl(cdmm + drb * CDMM_DRB_SIZE);
                type = (acsr & CDMM_ACSR_DEVTYPE) >> CDMM_ACSR_DEVTYPE_SHIFT;
                size = (acsr & CDMM_ACSR_DEVSIZE) >> CDMM_ACSR_DEVSIZE_SHIFT;
                rev  = (acsr & CDMM_ACSR_DEVREV)  >> CDMM_ACSR_DEVREV_SHIFT;
index fb9ec62..6f047dc 100644 (file)
@@ -58,7 +58,6 @@
 #include <linux/debugfs.h>
 #include <linux/log2.h>
 #include <linux/syscore_ops.h>
-#include <linux/memblock.h>
 
 /*
  * DDR target is the same on all platforms.
@@ -70,6 +69,7 @@
  */
 #define WIN_CTRL_OFF           0x0000
 #define   WIN_CTRL_ENABLE       BIT(0)
+/* Only on HW I/O coherency capable platforms */
 #define   WIN_CTRL_SYNCBARRIER  BIT(1)
 #define   WIN_CTRL_TGT_MASK     0xf0
 #define   WIN_CTRL_TGT_SHIFT    4
 
 /* Relative to mbusbridge_base */
 #define MBUS_BRIDGE_CTRL_OFF   0x0
-#define  MBUS_BRIDGE_SIZE_MASK  0xffff0000
 #define MBUS_BRIDGE_BASE_OFF   0x4
-#define  MBUS_BRIDGE_BASE_MASK  0xffff0000
 
 /* Maximum number of windows, for all known platforms */
 #define MBUS_WINS_MAX           20
@@ -323,8 +321,9 @@ static int mvebu_mbus_setup_window(struct mvebu_mbus_state *mbus,
        ctrl = ((size - 1) & WIN_CTRL_SIZE_MASK) |
                (attr << WIN_CTRL_ATTR_SHIFT)    |
                (target << WIN_CTRL_TGT_SHIFT)   |
-               WIN_CTRL_SYNCBARRIER             |
                WIN_CTRL_ENABLE;
+       if (mbus->hw_io_coherency)
+               ctrl |= WIN_CTRL_SYNCBARRIER;
 
        writel(base & WIN_BASE_LOW, addr + WIN_BASE_OFF);
        writel(ctrl, addr + WIN_CTRL_OFF);
@@ -577,106 +576,36 @@ static unsigned int armada_xp_mbus_win_remap_offset(int win)
                return MVEBU_MBUS_NO_REMAP;
 }
 
-/*
- * Use the memblock information to find the MBus bridge hole in the
- * physical address space.
- */
-static void __init
-mvebu_mbus_find_bridge_hole(uint64_t *start, uint64_t *end)
-{
-       struct memblock_region *r;
-       uint64_t s = 0;
-
-       for_each_memblock(memory, r) {
-               /*
-                * This part of the memory is above 4 GB, so we don't
-                * care for the MBus bridge hole.
-                */
-               if (r->base >= 0x100000000)
-                       continue;
-
-               /*
-                * The MBus bridge hole is at the end of the RAM under
-                * the 4 GB limit.
-                */
-               if (r->base + r->size > s)
-                       s = r->base + r->size;
-       }
-
-       *start = s;
-       *end = 0x100000000;
-}
-
 static void __init
 mvebu_mbus_default_setup_cpu_target(struct mvebu_mbus_state *mbus)
 {
        int i;
        int cs;
-       uint64_t mbus_bridge_base, mbus_bridge_end;
 
        mvebu_mbus_dram_info.mbus_dram_target_id = TARGET_DDR;
 
-       mvebu_mbus_find_bridge_hole(&mbus_bridge_base, &mbus_bridge_end);
-
        for (i = 0, cs = 0; i < 4; i++) {
-               u64 base = readl(mbus->sdramwins_base + DDR_BASE_CS_OFF(i));
-               u64 size = readl(mbus->sdramwins_base + DDR_SIZE_CS_OFF(i));
-               u64 end;
-               struct mbus_dram_window *w;
-
-               /* Ignore entries that are not enabled */
-               if (!(size & DDR_SIZE_ENABLED))
-                       continue;
-
-               /*
-                * Ignore entries whose base address is above 2^32,
-                * since devices cannot DMA to such high addresses
-                */
-               if (base & DDR_BASE_CS_HIGH_MASK)
-                       continue;
-
-               base = base & DDR_BASE_CS_LOW_MASK;
-               size = (size | ~DDR_SIZE_MASK) + 1;
-               end = base + size;
-
-               /*
-                * Adjust base/size of the current CS to make sure it
-                * doesn't overlap with the MBus bridge hole. This is
-                * particularly important for devices that do DMA from
-                * DRAM to a SRAM mapped in a MBus window, such as the
-                * CESA cryptographic engine.
-                */
+               u32 base = readl(mbus->sdramwins_base + DDR_BASE_CS_OFF(i));
+               u32 size = readl(mbus->sdramwins_base + DDR_SIZE_CS_OFF(i));
 
                /*
-                * The CS is fully enclosed inside the MBus bridge
-                * area, so ignore it.
+                * We only take care of entries for which the chip
+                * select is enabled, and that don't have high base
+                * address bits set (devices can only access the first
+                * 32 bits of the memory).
                 */
-               if (base >= mbus_bridge_base && end <= mbus_bridge_end)
-                       continue;
+               if ((size & DDR_SIZE_ENABLED) &&
+                   !(base & DDR_BASE_CS_HIGH_MASK)) {
+                       struct mbus_dram_window *w;
 
-               /*
-                * Beginning of CS overlaps with end of MBus, raise CS
-                * base address, and shrink its size.
-                */
-               if (base >= mbus_bridge_base && end > mbus_bridge_end) {
-                       size -= mbus_bridge_end - base;
-                       base = mbus_bridge_end;
+                       w = &mvebu_mbus_dram_info.cs[cs++];
+                       w->cs_index = i;
+                       w->mbus_attr = 0xf & ~(1 << i);
+                       if (mbus->hw_io_coherency)
+                               w->mbus_attr |= ATTR_HW_COHERENCY;
+                       w->base = base & DDR_BASE_CS_LOW_MASK;
+                       w->size = (size | ~DDR_SIZE_MASK) + 1;
                }
-
-               /*
-                * End of CS overlaps with beginning of MBus, shrink
-                * CS size.
-                */
-               if (base < mbus_bridge_base && end > mbus_bridge_base)
-                       size -= end - mbus_bridge_base;
-
-               w = &mvebu_mbus_dram_info.cs[cs++];
-               w->cs_index = i;
-               w->mbus_attr = 0xf & ~(1 << i);
-               if (mbus->hw_io_coherency)
-                       w->mbus_attr |= ATTR_HW_COHERENCY;
-               w->base = base;
-               w->size = size;
        }
        mvebu_mbus_dram_info.num_cs = cs;
 }
index 11f7982..ebee57d 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * OMAP L3 Interconnect error handling driver
  *
- * Copyright (C) 2011-2014 Texas Instruments Incorporated - http://www.ti.com/
+ * Copyright (C) 2011-2015 Texas Instruments Incorporated - http://www.ti.com/
  *     Santosh Shilimkar <santosh.shilimkar@ti.com>
  *     Sricharan <r.sricharan@ti.com>
  *
@@ -233,7 +233,8 @@ static irqreturn_t l3_interrupt_handler(int irq, void *_l3)
 }
 
 static const struct of_device_id l3_noc_match[] = {
-       {.compatible = "ti,omap4-l3-noc", .data = &omap_l3_data},
+       {.compatible = "ti,omap4-l3-noc", .data = &omap4_l3_data},
+       {.compatible = "ti,omap5-l3-noc", .data = &omap5_l3_data},
        {.compatible = "ti,dra7-l3-noc", .data = &dra_l3_data},
        {.compatible = "ti,am4372-l3-noc", .data = &am4372_l3_data},
        {},
index 9525458..73431f8 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * OMAP L3 Interconnect  error handling driver header
  *
- * Copyright (C) 2011-2014 Texas Instruments Incorporated - http://www.ti.com/
+ * Copyright (C) 2011-2015 Texas Instruments Incorporated - http://www.ti.com/
  *     Santosh Shilimkar <santosh.shilimkar@ti.com>
  *     sricharan <r.sricharan@ti.com>
  *
@@ -175,16 +175,14 @@ static struct l3_flagmux_data omap_l3_flagmux_clk2 = {
 };
 
 
-static struct l3_target_data omap_l3_target_data_clk3[] = {
-       {0x0100, "EMUSS",},
-       {0x0300, "DEBUG SOURCE",},
-       {0x0,   "HOST CLK3",},
+static struct l3_target_data omap4_l3_target_data_clk3[] = {
+       {0x0100, "DEBUGSS",},
 };
 
-static struct l3_flagmux_data omap_l3_flagmux_clk3 = {
+static struct l3_flagmux_data omap4_l3_flagmux_clk3 = {
        .offset = 0x0200,
-       .l3_targ = omap_l3_target_data_clk3,
-       .num_targ_data = ARRAY_SIZE(omap_l3_target_data_clk3),
+       .l3_targ = omap4_l3_target_data_clk3,
+       .num_targ_data = ARRAY_SIZE(omap4_l3_target_data_clk3),
 };
 
 static struct l3_masters_data omap_l3_masters[] = {
@@ -215,21 +213,49 @@ static struct l3_masters_data omap_l3_masters[] = {
        { 0x32, "USBHOSTFS"}
 };
 
-static struct l3_flagmux_data *omap_l3_flagmux[] = {
+static struct l3_flagmux_data *omap4_l3_flagmux[] = {
        &omap_l3_flagmux_clk1,
        &omap_l3_flagmux_clk2,
-       &omap_l3_flagmux_clk3,
+       &omap4_l3_flagmux_clk3,
 };
 
-static const struct omap_l3 omap_l3_data = {
-       .l3_flagmux = omap_l3_flagmux,
-       .num_modules = ARRAY_SIZE(omap_l3_flagmux),
+static const struct omap_l3 omap4_l3_data = {
+       .l3_flagmux = omap4_l3_flagmux,
+       .num_modules = ARRAY_SIZE(omap4_l3_flagmux),
        .l3_masters = omap_l3_masters,
        .num_masters = ARRAY_SIZE(omap_l3_masters),
        /* The 6 MSBs of register field used to distinguish initiator */
        .mst_addr_mask = 0xFC,
 };
 
+/* OMAP5 data */
+static struct l3_target_data omap5_l3_target_data_clk3[] = {
+       {0x0100, "L3INSTR",},
+       {0x0300, "DEBUGSS",},
+       {0x0,    "HOSTCLK3",},
+};
+
+static struct l3_flagmux_data omap5_l3_flagmux_clk3 = {
+       .offset = 0x0200,
+       .l3_targ = omap5_l3_target_data_clk3,
+       .num_targ_data = ARRAY_SIZE(omap5_l3_target_data_clk3),
+};
+
+static struct l3_flagmux_data *omap5_l3_flagmux[] = {
+       &omap_l3_flagmux_clk1,
+       &omap_l3_flagmux_clk2,
+       &omap5_l3_flagmux_clk3,
+};
+
+static const struct omap_l3 omap5_l3_data = {
+       .l3_flagmux = omap5_l3_flagmux,
+       .num_modules = ARRAY_SIZE(omap5_l3_flagmux),
+       .l3_masters = omap_l3_masters,
+       .num_masters = ARRAY_SIZE(omap_l3_masters),
+       /* The 6 MSBs of register field used to distinguish initiator */
+       .mst_addr_mask = 0x7E0,
+};
+
 /* DRA7 data */
 static struct l3_target_data dra_l3_target_data_clk1[] = {
        {0x2a00, "AES1",},
@@ -274,7 +300,7 @@ static struct l3_flagmux_data dra_l3_flagmux_clk1 = {
 
 static struct l3_target_data dra_l3_target_data_clk2[] = {
        {0x0,   "HOST CLK1",},
-       {0x0,   "HOST CLK2",},
+       {0x800000, "HOST CLK2",},
        {0xdead, L3_TARGET_NOT_SUPPORTED,},
        {0x3400, "SHA2_2",},
        {0x0900, "BB2D",},
index 597fed4..df2c1af 100644 (file)
@@ -29,7 +29,7 @@
 #define PERIPHERAL_RSHIFT_MASK 0x3
 #define PERIPHERAL_RSHIFT(val) (((val) >> 16) & PERIPHERAL_RSHIFT_MASK)
 
-#define PERIPHERAL_MAX_SHIFT   4
+#define PERIPHERAL_MAX_SHIFT   3
 
 struct clk_peripheral {
        struct clk_hw hw;
@@ -242,7 +242,7 @@ static long clk_sam9x5_peripheral_round_rate(struct clk_hw *hw,
                return *parent_rate;
 
        if (periph->range.max) {
-               for (; shift < PERIPHERAL_MAX_SHIFT; shift++) {
+               for (; shift <= PERIPHERAL_MAX_SHIFT; shift++) {
                        cur_rate = *parent_rate >> shift;
                        if (cur_rate <= periph->range.max)
                                break;
@@ -254,7 +254,7 @@ static long clk_sam9x5_peripheral_round_rate(struct clk_hw *hw,
 
        best_diff = cur_rate - rate;
        best_rate = cur_rate;
-       for (; shift < PERIPHERAL_MAX_SHIFT; shift++) {
+       for (; shift <= PERIPHERAL_MAX_SHIFT; shift++) {
                cur_rate = *parent_rate >> shift;
                if (cur_rate < rate)
                        cur_diff = rate - cur_rate;
@@ -289,7 +289,7 @@ static int clk_sam9x5_peripheral_set_rate(struct clk_hw *hw,
        if (periph->range.max && rate > periph->range.max)
                return -EINVAL;
 
-       for (shift = 0; shift < PERIPHERAL_MAX_SHIFT; shift++) {
+       for (shift = 0; shift <= PERIPHERAL_MAX_SHIFT; shift++) {
                if (parent_rate >> shift == rate) {
                        periph->auto_div = false;
                        periph->div = shift;
index 6ec79db..cbbe403 100644 (file)
@@ -173,8 +173,7 @@ static long clk_pll_get_best_div_mul(struct clk_pll *pll, unsigned long rate,
        int i = 0;
 
        /* Check if parent_rate is a valid input rate */
-       if (parent_rate < characteristics->input.min ||
-           parent_rate > characteristics->input.max)
+       if (parent_rate < characteristics->input.min)
                return -ERANGE;
 
        /*
@@ -187,6 +186,15 @@ static long clk_pll_get_best_div_mul(struct clk_pll *pll, unsigned long rate,
        if (!mindiv)
                mindiv = 1;
 
+       if (parent_rate > characteristics->input.max) {
+               tmpdiv = DIV_ROUND_UP(parent_rate, characteristics->input.max);
+               if (tmpdiv > PLL_DIV_MAX)
+                       return -ERANGE;
+
+               if (tmpdiv > mindiv)
+                       mindiv = tmpdiv;
+       }
+
        /*
         * Calculate the maximum divider which is limited by PLL register
         * layout (limited by the MUL or DIV field size).
index 69abb08..eb8e5dc 100644 (file)
@@ -121,7 +121,7 @@ extern void __init of_at91sam9x5_clk_smd_setup(struct device_node *np,
                                               struct at91_pmc *pmc);
 #endif
 
-#if defined(CONFIG_HAVE_AT91_SMD)
+#if defined(CONFIG_HAVE_AT91_H32MX)
 extern void __init of_sama5d4_clk_h32mx_setup(struct device_node *np,
                                              struct at91_pmc *pmc);
 #endif
index 44ea107..30335d3 100644 (file)
@@ -1128,13 +1128,6 @@ static int si5351_dt_parse(struct i2c_client *client,
        if (!pdata)
                return -ENOMEM;
 
-       pdata->clk_xtal = of_clk_get(np, 0);
-       if (!IS_ERR(pdata->clk_xtal))
-               clk_put(pdata->clk_xtal);
-       pdata->clk_clkin = of_clk_get(np, 1);
-       if (!IS_ERR(pdata->clk_clkin))
-               clk_put(pdata->clk_clkin);
-
        /*
         * property silabs,pll-source : <num src>, [<..>]
         * allow to selectively set pll source
@@ -1328,8 +1321,22 @@ static int si5351_i2c_probe(struct i2c_client *client,
        i2c_set_clientdata(client, drvdata);
        drvdata->client = client;
        drvdata->variant = variant;
-       drvdata->pxtal = pdata->clk_xtal;
-       drvdata->pclkin = pdata->clk_clkin;
+       drvdata->pxtal = devm_clk_get(&client->dev, "xtal");
+       drvdata->pclkin = devm_clk_get(&client->dev, "clkin");
+
+       if (PTR_ERR(drvdata->pxtal) == -EPROBE_DEFER ||
+           PTR_ERR(drvdata->pclkin) == -EPROBE_DEFER)
+               return -EPROBE_DEFER;
+
+       /*
+        * Check for valid parent clock: VARIANT_A and VARIANT_B need XTAL,
+        *   VARIANT_C can have CLKIN instead.
+        */
+       if (IS_ERR(drvdata->pxtal) &&
+           (drvdata->variant != SI5351_VARIANT_C || IS_ERR(drvdata->pclkin))) {
+               dev_err(&client->dev, "missing parent clock\n");
+               return -EINVAL;
+       }
 
        drvdata->regmap = devm_regmap_init_i2c(client, &si5351_regmap_config);
        if (IS_ERR(drvdata->regmap)) {
@@ -1393,6 +1400,11 @@ static int si5351_i2c_probe(struct i2c_client *client,
                }
        }
 
+       if (!IS_ERR(drvdata->pxtal))
+               clk_prepare_enable(drvdata->pxtal);
+       if (!IS_ERR(drvdata->pclkin))
+               clk_prepare_enable(drvdata->pclkin);
+
        /* register xtal input clock gate */
        memset(&init, 0, sizeof(init));
        init.name = si5351_input_names[0];
@@ -1407,7 +1419,8 @@ static int si5351_i2c_probe(struct i2c_client *client,
        clk = devm_clk_register(&client->dev, &drvdata->xtal);
        if (IS_ERR(clk)) {
                dev_err(&client->dev, "unable to register %s\n", init.name);
-               return PTR_ERR(clk);
+               ret = PTR_ERR(clk);
+               goto err_clk;
        }
 
        /* register clkin input clock gate */
@@ -1425,7 +1438,8 @@ static int si5351_i2c_probe(struct i2c_client *client,
                if (IS_ERR(clk)) {
                        dev_err(&client->dev, "unable to register %s\n",
                                init.name);
-                       return PTR_ERR(clk);
+                       ret = PTR_ERR(clk);
+                       goto err_clk;
                }
        }
 
@@ -1447,7 +1461,8 @@ static int si5351_i2c_probe(struct i2c_client *client,
        clk = devm_clk_register(&client->dev, &drvdata->pll[0].hw);
        if (IS_ERR(clk)) {
                dev_err(&client->dev, "unable to register %s\n", init.name);
-               return -EINVAL;
+               ret = PTR_ERR(clk);
+               goto err_clk;
        }
 
        /* register PLLB or VXCO (Si5351B) */
@@ -1471,7 +1486,8 @@ static int si5351_i2c_probe(struct i2c_client *client,
        clk = devm_clk_register(&client->dev, &drvdata->pll[1].hw);
        if (IS_ERR(clk)) {
                dev_err(&client->dev, "unable to register %s\n", init.name);
-               return -EINVAL;
+               ret = PTR_ERR(clk);
+               goto err_clk;
        }
 
        /* register clk multisync and clk out divider */
@@ -1492,8 +1508,10 @@ static int si5351_i2c_probe(struct i2c_client *client,
                num_clocks * sizeof(*drvdata->onecell.clks), GFP_KERNEL);
 
        if (WARN_ON(!drvdata->msynth || !drvdata->clkout ||
-                   !drvdata->onecell.clks))
-               return -ENOMEM;
+                   !drvdata->onecell.clks)) {
+               ret = -ENOMEM;
+               goto err_clk;
+       }
 
        for (n = 0; n < num_clocks; n++) {
                drvdata->msynth[n].num = n;
@@ -1511,7 +1529,8 @@ static int si5351_i2c_probe(struct i2c_client *client,
                if (IS_ERR(clk)) {
                        dev_err(&client->dev, "unable to register %s\n",
                                init.name);
-                       return -EINVAL;
+                       ret = PTR_ERR(clk);
+                       goto err_clk;
                }
        }
 
@@ -1538,7 +1557,8 @@ static int si5351_i2c_probe(struct i2c_client *client,
                if (IS_ERR(clk)) {
                        dev_err(&client->dev, "unable to register %s\n",
                                init.name);
-                       return -EINVAL;
+                       ret = PTR_ERR(clk);
+                       goto err_clk;
                }
                drvdata->onecell.clks[n] = clk;
 
@@ -1557,10 +1577,17 @@ static int si5351_i2c_probe(struct i2c_client *client,
                                  &drvdata->onecell);
        if (ret) {
                dev_err(&client->dev, "unable to add clk provider\n");
-               return ret;
+               goto err_clk;
        }
 
        return 0;
+
+err_clk:
+       if (!IS_ERR(drvdata->pxtal))
+               clk_disable_unprepare(drvdata->pxtal);
+       if (!IS_ERR(drvdata->pclkin))
+               clk_disable_unprepare(drvdata->pclkin);
+       return ret;
 }
 
 static const struct i2c_device_id si5351_i2c_ids[] = {
index 459ce9d..5b0f418 100644 (file)
@@ -1475,8 +1475,10 @@ static struct clk_core *__clk_set_parent_before(struct clk_core *clk,
         */
        if (clk->prepare_count) {
                clk_core_prepare(parent);
+               flags = clk_enable_lock();
                clk_core_enable(parent);
                clk_core_enable(clk);
+               clk_enable_unlock(flags);
        }
 
        /* update the clk tree topology */
@@ -1491,13 +1493,17 @@ static void __clk_set_parent_after(struct clk_core *core,
                                   struct clk_core *parent,
                                   struct clk_core *old_parent)
 {
+       unsigned long flags;
+
        /*
         * Finish the migration of prepare state and undo the changes done
         * for preventing a race with clk_enable().
         */
        if (core->prepare_count) {
+               flags = clk_enable_lock();
                clk_core_disable(core);
                clk_core_disable(old_parent);
+               clk_enable_unlock(flags);
                clk_core_unprepare(old_parent);
        }
 }
@@ -1525,8 +1531,10 @@ static int __clk_set_parent(struct clk_core *clk, struct clk_core *parent,
                clk_enable_unlock(flags);
 
                if (clk->prepare_count) {
+                       flags = clk_enable_lock();
                        clk_core_disable(clk);
                        clk_core_disable(parent);
+                       clk_enable_unlock(flags);
                        clk_core_unprepare(parent);
                }
                return ret;
index d345847..c66f7bc 100644 (file)
@@ -71,8 +71,8 @@ static const char *gcc_xo_gpll0_bimc[] = {
 static const struct parent_map gcc_xo_gpll0a_gpll1_gpll2a_map[] = {
        { P_XO, 0 },
        { P_GPLL0_AUX, 3 },
-       { P_GPLL2_AUX, 2 },
        { P_GPLL1, 1 },
+       { P_GPLL2_AUX, 2 },
 };
 
 static const char *gcc_xo_gpll0a_gpll1_gpll2a[] = {
@@ -1115,7 +1115,7 @@ static struct clk_rcg2 usb_hs_system_clk_src = {
 static const struct freq_tbl ftbl_gcc_venus0_vcodec0_clk[] = {
        F(100000000, P_GPLL0, 8, 0, 0),
        F(160000000, P_GPLL0, 5, 0, 0),
-       F(228570000, P_GPLL0, 5, 0, 0),
+       F(228570000, P_GPLL0, 3.5, 0, 0),
        { }
 };
 
index 17e9af7..a17683b 100644 (file)
@@ -10,7 +10,7 @@ obj-$(CONFIG_SOC_EXYNOS5250)  += clk-exynos5250.o
 obj-$(CONFIG_SOC_EXYNOS5260)   += clk-exynos5260.o
 obj-$(CONFIG_SOC_EXYNOS5410)   += clk-exynos5410.o
 obj-$(CONFIG_SOC_EXYNOS5420)   += clk-exynos5420.o
-obj-$(CONFIG_ARCH_EXYNOS5433)  += clk-exynos5433.o
+obj-$(CONFIG_ARCH_EXYNOS)      += clk-exynos5433.o
 obj-$(CONFIG_SOC_EXYNOS5440)   += clk-exynos5440.o
 obj-$(CONFIG_ARCH_EXYNOS)      += clk-exynos-audss.o
 obj-$(CONFIG_ARCH_EXYNOS)      += clk-exynos-clkout.o
index 07d666c..bea4a17 100644 (file)
@@ -271,6 +271,7 @@ static const struct samsung_clk_reg_dump exynos5420_set_clksrc[] = {
        { .offset = SRC_MASK_PERIC0,            .value = 0x11111110, },
        { .offset = SRC_MASK_PERIC1,            .value = 0x11111100, },
        { .offset = SRC_MASK_ISP,               .value = 0x11111000, },
+       { .offset = GATE_BUS_TOP,               .value = 0xffffffff, },
        { .offset = GATE_BUS_DISP1,             .value = 0xffffffff, },
        { .offset = GATE_IP_PERIC,              .value = 0xffffffff, },
 };
index 387e3e3..9e04ae2 100644 (file)
@@ -748,7 +748,7 @@ static struct samsung_pll_rate_table exynos5443_pll_rates[] = {
        PLL_35XX_RATE(825000000U,  275, 4,  1),
        PLL_35XX_RATE(800000000U,  400, 6,  1),
        PLL_35XX_RATE(733000000U,  733, 12, 1),
-       PLL_35XX_RATE(700000000U,  360, 6,  1),
+       PLL_35XX_RATE(700000000U,  175, 3,  1),
        PLL_35XX_RATE(667000000U,  222, 4,  1),
        PLL_35XX_RATE(633000000U,  211, 4,  1),
        PLL_35XX_RATE(600000000U,  500, 5,  2),
@@ -760,14 +760,14 @@ static struct samsung_pll_rate_table exynos5443_pll_rates[] = {
        PLL_35XX_RATE(444000000U,  370, 5,  2),
        PLL_35XX_RATE(420000000U,  350, 5,  2),
        PLL_35XX_RATE(400000000U,  400, 6,  2),
-       PLL_35XX_RATE(350000000U,  360, 6,  2),
+       PLL_35XX_RATE(350000000U,  350, 6,  2),
        PLL_35XX_RATE(333000000U,  222, 4,  2),
        PLL_35XX_RATE(300000000U,  500, 5,  3),
        PLL_35XX_RATE(266000000U,  532, 6,  3),
        PLL_35XX_RATE(200000000U,  400, 6,  3),
        PLL_35XX_RATE(166000000U,  332, 6,  3),
        PLL_35XX_RATE(160000000U,  320, 6,  3),
-       PLL_35XX_RATE(133000000U,  552, 6,  4),
+       PLL_35XX_RATE(133000000U,  532, 6,  4),
        PLL_35XX_RATE(100000000U,  400, 6,  4),
        { /* sentinel */ }
 };
@@ -1490,7 +1490,7 @@ static struct samsung_gate_clock mif_gate_clks[] __initdata = {
 
        /* ENABLE_PCLK_MIF_SECURE_MONOTONIC_CNT */
        GATE(CLK_PCLK_MONOTONIC_CNT, "pclk_monotonic_cnt", "div_aclk_mif_133",
-                       ENABLE_PCLK_MIF_SECURE_RTC, 0, 0, 0),
+                       ENABLE_PCLK_MIF_SECURE_MONOTONIC_CNT, 0, 0, 0),
 
        /* ENABLE_PCLK_MIF_SECURE_RTC */
        GATE(CLK_PCLK_RTC, "pclk_rtc", "div_aclk_mif_133",
@@ -3665,7 +3665,7 @@ static struct samsung_gate_clock apollo_gate_clks[] __initdata = {
                        ENABLE_SCLK_APOLLO, 3, CLK_IGNORE_UNUSED, 0),
        GATE(CLK_SCLK_HPM_APOLLO, "sclk_hpm_apollo", "div_sclk_hpm_apollo",
                        ENABLE_SCLK_APOLLO, 1, CLK_IGNORE_UNUSED, 0),
-       GATE(CLK_SCLK_APOLLO, "sclk_apollo", "div_apollo_pll",
+       GATE(CLK_SCLK_APOLLO, "sclk_apollo", "div_apollo2",
                        ENABLE_SCLK_APOLLO, 0, CLK_IGNORE_UNUSED, 0),
 };
 
@@ -3927,7 +3927,7 @@ CLK_OF_DECLARE(exynos5433_cmu_atlas, "samsung,exynos5433-cmu-atlas",
 #define ENABLE_PCLK_MSCL                               0x0900
 #define ENABLE_PCLK_MSCL_SECURE_SMMU_M2MSCALER0                0x0904
 #define ENABLE_PCLK_MSCL_SECURE_SMMU_M2MSCALER1                0x0908
-#define ENABLE_PCLK_MSCL_SECURE_SMMU_JPEG              0x000c
+#define ENABLE_PCLK_MSCL_SECURE_SMMU_JPEG              0x090c
 #define ENABLE_SCLK_MSCL                               0x0a00
 #define ENABLE_IP_MSCL0                                        0x0b00
 #define ENABLE_IP_MSCL1                                        0x0b04
index b0c18ed..0136dfc 100644 (file)
@@ -699,13 +699,14 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
        dmi_check_system(sw_any_bug_dmi_table);
        if (bios_with_sw_any_bug && !policy_is_shared(policy)) {
                policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
-               cpumask_copy(policy->cpus, cpu_core_mask(cpu));
+               cpumask_copy(policy->cpus, topology_core_cpumask(cpu));
        }
 
        if (check_amd_hwpstate_cpu(cpu) && !acpi_pstate_strict) {
                cpumask_clear(policy->cpus);
                cpumask_set_cpu(cpu, policy->cpus);
-               cpumask_copy(data->freqdomain_cpus, cpu_sibling_mask(cpu));
+               cpumask_copy(data->freqdomain_cpus,
+                            topology_sibling_cpumask(cpu));
                policy->shared_type = CPUFREQ_SHARED_TYPE_HW;
                pr_info_once(PFX "overriding BIOS provided _PSD data\n");
        }
index 529cfd9..5dd95da 100644 (file)
@@ -172,7 +172,7 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy)
        unsigned int i;
 
 #ifdef CONFIG_SMP
-       cpumask_copy(policy->cpus, cpu_sibling_mask(policy->cpu));
+       cpumask_copy(policy->cpus, topology_sibling_cpumask(policy->cpu));
 #endif
 
        /* Errata workaround */
index f9ce7e4..5c035d0 100644 (file)
@@ -57,13 +57,6 @@ static DEFINE_PER_CPU(struct powernow_k8_data *, powernow_data);
 
 static struct cpufreq_driver cpufreq_amd64_driver;
 
-#ifndef CONFIG_SMP
-static inline const struct cpumask *cpu_core_mask(int cpu)
-{
-       return cpumask_of(0);
-}
-#endif
-
 /* Return a frequency in MHz, given an input fid */
 static u32 find_freq_from_fid(u32 fid)
 {
@@ -620,7 +613,7 @@ static int fill_powernow_table(struct powernow_k8_data *data,
 
        pr_debug("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid);
        data->powernow_table = powernow_table;
-       if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu)
+       if (cpumask_first(topology_core_cpumask(data->cpu)) == data->cpu)
                print_basics(data);
 
        for (j = 0; j < data->numps; j++)
@@ -784,7 +777,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
                CPUFREQ_TABLE_END;
        data->powernow_table = powernow_table;
 
-       if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu)
+       if (cpumask_first(topology_core_cpumask(data->cpu)) == data->cpu)
                print_basics(data);
 
        /* notify BIOS that we exist */
@@ -1090,7 +1083,7 @@ static int powernowk8_cpu_init(struct cpufreq_policy *pol)
        if (rc != 0)
                goto err_out_exit_acpi;
 
-       cpumask_copy(pol->cpus, cpu_core_mask(pol->cpu));
+       cpumask_copy(pol->cpus, topology_core_cpumask(pol->cpu));
        data->available_cores = pol->cpus;
 
        /* min/max the cpu is capable of */
index e56d632..37555c6 100644 (file)
@@ -292,7 +292,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
 
        /* only run on CPU to be set, or on its sibling */
 #ifdef CONFIG_SMP
-       cpumask_copy(policy->cpus, cpu_sibling_mask(policy->cpu));
+       cpumask_copy(policy->cpus, topology_sibling_cpumask(policy->cpu));
 #endif
        policy_cpu = cpumask_any_and(policy->cpus, cpu_online_mask);
 
index ba0532e..332c8ef 100644 (file)
@@ -1544,6 +1544,8 @@ static int ahash_init(struct ahash_request *req)
 
        state->current_buf = 0;
        state->buf_dma = 0;
+       state->buflen_0 = 0;
+       state->buflen_1 = 0;
 
        return 0;
 }
index 26a544b..5095337 100644 (file)
@@ -56,7 +56,7 @@
 
 /* Buffer, its dma address and lock */
 struct buf_data {
-       u8 buf[RN_BUF_SIZE];
+       u8 buf[RN_BUF_SIZE] ____cacheline_aligned;
        dma_addr_t addr;
        struct completion filled;
        u32 hw_desc[DESC_JOB_O_LEN];
index ab300ea..a9064e3 100644 (file)
@@ -78,12 +78,14 @@ static int p8_aes_setkey(struct crypto_tfm *tfm, const u8 *key,
     int ret;
     struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm);
 
+    preempt_disable();
     pagefault_disable();
     enable_kernel_altivec();
     ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key);
     ret += aes_p8_set_decrypt_key(key, keylen * 8, &ctx->dec_key);
     pagefault_enable();
-    
+    preempt_enable();
+
     ret += crypto_cipher_setkey(ctx->fallback, key, keylen);
     return ret;
 }
@@ -95,10 +97,12 @@ static void p8_aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
     if (in_interrupt()) {
         crypto_cipher_encrypt_one(ctx->fallback, dst, src);
     } else {
+       preempt_disable();
         pagefault_disable();
         enable_kernel_altivec();
         aes_p8_encrypt(src, dst, &ctx->enc_key);
         pagefault_enable();
+       preempt_enable();
     }
 }
 
@@ -109,10 +113,12 @@ static void p8_aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
     if (in_interrupt()) {
         crypto_cipher_decrypt_one(ctx->fallback, dst, src);
     } else {
+       preempt_disable();
         pagefault_disable();
         enable_kernel_altivec();
         aes_p8_decrypt(src, dst, &ctx->dec_key);
         pagefault_enable();
+       preempt_enable();
     }
 }
 
index 1a559b7..477284a 100644 (file)
@@ -79,11 +79,13 @@ static int p8_aes_cbc_setkey(struct crypto_tfm *tfm, const u8 *key,
     int ret;
     struct p8_aes_cbc_ctx *ctx = crypto_tfm_ctx(tfm);
 
+    preempt_disable();
     pagefault_disable();
     enable_kernel_altivec();
     ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key);
     ret += aes_p8_set_decrypt_key(key, keylen * 8, &ctx->dec_key);
     pagefault_enable();
+    preempt_enable();
 
     ret += crypto_blkcipher_setkey(ctx->fallback, key, keylen);
     return ret;
@@ -106,6 +108,7 @@ static int p8_aes_cbc_encrypt(struct blkcipher_desc *desc,
     if (in_interrupt()) {
         ret = crypto_blkcipher_encrypt(&fallback_desc, dst, src, nbytes);
     } else {
+       preempt_disable();
         pagefault_disable();
         enable_kernel_altivec();
 
@@ -119,6 +122,7 @@ static int p8_aes_cbc_encrypt(struct blkcipher_desc *desc,
        }
 
         pagefault_enable();
+       preempt_enable();
     }
 
     return ret;
@@ -141,6 +145,7 @@ static int p8_aes_cbc_decrypt(struct blkcipher_desc *desc,
     if (in_interrupt()) {
         ret = crypto_blkcipher_decrypt(&fallback_desc, dst, src, nbytes);
     } else {
+       preempt_disable();
         pagefault_disable();
         enable_kernel_altivec();
 
@@ -154,6 +159,7 @@ static int p8_aes_cbc_decrypt(struct blkcipher_desc *desc,
                }
 
         pagefault_enable();
+       preempt_enable();
     }
 
     return ret;
index d0ffe27..f255ec4 100644 (file)
@@ -114,11 +114,13 @@ static int p8_ghash_setkey(struct crypto_shash *tfm, const u8 *key,
     if (keylen != GHASH_KEY_LEN)
         return -EINVAL;
 
+    preempt_disable();
     pagefault_disable();
     enable_kernel_altivec();
     enable_kernel_fp();
     gcm_init_p8(ctx->htable, (const u64 *) key);
     pagefault_enable();
+    preempt_enable();
     return crypto_shash_setkey(ctx->fallback, key, keylen);
 }
 
@@ -140,23 +142,27 @@ static int p8_ghash_update(struct shash_desc *desc,
             }
             memcpy(dctx->buffer + dctx->bytes, src,
                     GHASH_DIGEST_SIZE - dctx->bytes);
+           preempt_disable();
             pagefault_disable();
             enable_kernel_altivec();
             enable_kernel_fp();
             gcm_ghash_p8(dctx->shash, ctx->htable, dctx->buffer,
                     GHASH_DIGEST_SIZE);
             pagefault_enable();
+           preempt_enable();
             src += GHASH_DIGEST_SIZE - dctx->bytes;
             srclen -= GHASH_DIGEST_SIZE - dctx->bytes;
             dctx->bytes = 0;
         }
         len = srclen & ~(GHASH_DIGEST_SIZE - 1);
         if (len) {
+           preempt_disable();
             pagefault_disable();
             enable_kernel_altivec();
             enable_kernel_fp();
             gcm_ghash_p8(dctx->shash, ctx->htable, src, len);
             pagefault_enable();
+           preempt_enable();
             src += len;
             srclen -= len;
         }
@@ -180,12 +186,14 @@ static int p8_ghash_final(struct shash_desc *desc, u8 *out)
         if (dctx->bytes) {
             for (i = dctx->bytes; i < GHASH_DIGEST_SIZE; i++)
                 dctx->buffer[i] = 0;
+           preempt_disable();
             pagefault_disable();
             enable_kernel_altivec();
             enable_kernel_fp();
             gcm_ghash_p8(dctx->shash, ctx->htable, dctx->buffer,
                     GHASH_DIGEST_SIZE);
             pagefault_enable();
+           preempt_enable();
             dctx->bytes = 0;
         }
         memcpy(out, dctx->shash, GHASH_DIGEST_SIZE);
index 933e4b3..7992164 100644 (file)
 #define AT_XDMAC_MBR_UBC_NDV3          (0x3 << 27)     /* Next Descriptor View 3 */
 
 #define AT_XDMAC_MAX_CHAN      0x20
+#define AT_XDMAC_MAX_CSIZE     16      /* 16 data */
+#define AT_XDMAC_MAX_DWIDTH    8       /* 64 bits */
 
 #define AT_XDMAC_DMA_BUSWIDTHS\
        (BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) |\
@@ -192,20 +194,17 @@ struct at_xdmac_chan {
        struct dma_chan                 chan;
        void __iomem                    *ch_regs;
        u32                             mask;           /* Channel Mask */
-       u32                             cfg[2];         /* Channel Configuration Register */
-       #define AT_XDMAC_DEV_TO_MEM_CFG 0               /* Predifined dev to mem channel conf */
-       #define AT_XDMAC_MEM_TO_DEV_CFG 1               /* Predifined mem to dev channel conf */
+       u32                             cfg;            /* Channel Configuration Register */
        u8                              perid;          /* Peripheral ID */
        u8                              perif;          /* Peripheral Interface */
        u8                              memif;          /* Memory Interface */
-       u32                             per_src_addr;
-       u32                             per_dst_addr;
        u32                             save_cc;
        u32                             save_cim;
        u32                             save_cnda;
        u32                             save_cndc;
        unsigned long                   status;
        struct tasklet_struct           tasklet;
+       struct dma_slave_config         sconfig;
 
        spinlock_t                      lock;
 
@@ -415,8 +414,9 @@ static dma_cookie_t at_xdmac_tx_submit(struct dma_async_tx_descriptor *tx)
        struct at_xdmac_desc    *desc = txd_to_at_desc(tx);
        struct at_xdmac_chan    *atchan = to_at_xdmac_chan(tx->chan);
        dma_cookie_t            cookie;
+       unsigned long           irqflags;
 
-       spin_lock_bh(&atchan->lock);
+       spin_lock_irqsave(&atchan->lock, irqflags);
        cookie = dma_cookie_assign(tx);
 
        dev_vdbg(chan2dev(tx->chan), "%s: atchan 0x%p, add desc 0x%p to xfers_list\n",
@@ -425,7 +425,7 @@ static dma_cookie_t at_xdmac_tx_submit(struct dma_async_tx_descriptor *tx)
        if (list_is_singular(&atchan->xfers_list))
                at_xdmac_start_xfer(atchan, desc);
 
-       spin_unlock_bh(&atchan->lock);
+       spin_unlock_irqrestore(&atchan->lock, irqflags);
        return cookie;
 }
 
@@ -494,61 +494,94 @@ static struct dma_chan *at_xdmac_xlate(struct of_phandle_args *dma_spec,
        return chan;
 }
 
+static int at_xdmac_compute_chan_conf(struct dma_chan *chan,
+                                     enum dma_transfer_direction direction)
+{
+       struct at_xdmac_chan    *atchan = to_at_xdmac_chan(chan);
+       int                     csize, dwidth;
+
+       if (direction == DMA_DEV_TO_MEM) {
+               atchan->cfg =
+                       AT91_XDMAC_DT_PERID(atchan->perid)
+                       | AT_XDMAC_CC_DAM_INCREMENTED_AM
+                       | AT_XDMAC_CC_SAM_FIXED_AM
+                       | AT_XDMAC_CC_DIF(atchan->memif)
+                       | AT_XDMAC_CC_SIF(atchan->perif)
+                       | AT_XDMAC_CC_SWREQ_HWR_CONNECTED
+                       | AT_XDMAC_CC_DSYNC_PER2MEM
+                       | AT_XDMAC_CC_MBSIZE_SIXTEEN
+                       | AT_XDMAC_CC_TYPE_PER_TRAN;
+               csize = ffs(atchan->sconfig.src_maxburst) - 1;
+               if (csize < 0) {
+                       dev_err(chan2dev(chan), "invalid src maxburst value\n");
+                       return -EINVAL;
+               }
+               atchan->cfg |= AT_XDMAC_CC_CSIZE(csize);
+               dwidth = ffs(atchan->sconfig.src_addr_width) - 1;
+               if (dwidth < 0) {
+                       dev_err(chan2dev(chan), "invalid src addr width value\n");
+                       return -EINVAL;
+               }
+               atchan->cfg |= AT_XDMAC_CC_DWIDTH(dwidth);
+       } else if (direction == DMA_MEM_TO_DEV) {
+               atchan->cfg =
+                       AT91_XDMAC_DT_PERID(atchan->perid)
+                       | AT_XDMAC_CC_DAM_FIXED_AM
+                       | AT_XDMAC_CC_SAM_INCREMENTED_AM
+                       | AT_XDMAC_CC_DIF(atchan->perif)
+                       | AT_XDMAC_CC_SIF(atchan->memif)
+                       | AT_XDMAC_CC_SWREQ_HWR_CONNECTED
+                       | AT_XDMAC_CC_DSYNC_MEM2PER
+                       | AT_XDMAC_CC_MBSIZE_SIXTEEN
+                       | AT_XDMAC_CC_TYPE_PER_TRAN;
+               csize = ffs(atchan->sconfig.dst_maxburst) - 1;
+               if (csize < 0) {
+                       dev_err(chan2dev(chan), "invalid src maxburst value\n");
+                       return -EINVAL;
+               }
+               atchan->cfg |= AT_XDMAC_CC_CSIZE(csize);
+               dwidth = ffs(atchan->sconfig.dst_addr_width) - 1;
+               if (dwidth < 0) {
+                       dev_err(chan2dev(chan), "invalid dst addr width value\n");
+                       return -EINVAL;
+               }
+               atchan->cfg |= AT_XDMAC_CC_DWIDTH(dwidth);
+       }
+
+       dev_dbg(chan2dev(chan), "%s: cfg=0x%08x\n", __func__, atchan->cfg);
+
+       return 0;
+}
+
+/*
+ * Only check that maxburst and addr width values are supported by the
+ * the controller but not that the configuration is good to perform the
+ * transfer since we don't know the direction at this stage.
+ */
+static int at_xdmac_check_slave_config(struct dma_slave_config *sconfig)
+{
+       if ((sconfig->src_maxburst > AT_XDMAC_MAX_CSIZE)
+           || (sconfig->dst_maxburst > AT_XDMAC_MAX_CSIZE))
+               return -EINVAL;
+
+       if ((sconfig->src_addr_width > AT_XDMAC_MAX_DWIDTH)
+           || (sconfig->dst_addr_width > AT_XDMAC_MAX_DWIDTH))
+               return -EINVAL;
+
+       return 0;
+}
+
 static int at_xdmac_set_slave_config(struct dma_chan *chan,
                                      struct dma_slave_config *sconfig)
 {
        struct at_xdmac_chan    *atchan = to_at_xdmac_chan(chan);
-       u8 dwidth;
-       int csize;
 
-       atchan->cfg[AT_XDMAC_DEV_TO_MEM_CFG] =
-               AT91_XDMAC_DT_PERID(atchan->perid)
-               | AT_XDMAC_CC_DAM_INCREMENTED_AM
-               | AT_XDMAC_CC_SAM_FIXED_AM
-               | AT_XDMAC_CC_DIF(atchan->memif)
-               | AT_XDMAC_CC_SIF(atchan->perif)
-               | AT_XDMAC_CC_SWREQ_HWR_CONNECTED
-               | AT_XDMAC_CC_DSYNC_PER2MEM
-               | AT_XDMAC_CC_MBSIZE_SIXTEEN
-               | AT_XDMAC_CC_TYPE_PER_TRAN;
-       csize = at_xdmac_csize(sconfig->src_maxburst);
-       if (csize < 0) {
-               dev_err(chan2dev(chan), "invalid src maxburst value\n");
+       if (at_xdmac_check_slave_config(sconfig)) {
+               dev_err(chan2dev(chan), "invalid slave configuration\n");
                return -EINVAL;
        }
-       atchan->cfg[AT_XDMAC_DEV_TO_MEM_CFG] |= AT_XDMAC_CC_CSIZE(csize);
-       dwidth = ffs(sconfig->src_addr_width) - 1;
-       atchan->cfg[AT_XDMAC_DEV_TO_MEM_CFG] |= AT_XDMAC_CC_DWIDTH(dwidth);
-
-
-       atchan->cfg[AT_XDMAC_MEM_TO_DEV_CFG] =
-               AT91_XDMAC_DT_PERID(atchan->perid)
-               | AT_XDMAC_CC_DAM_FIXED_AM
-               | AT_XDMAC_CC_SAM_INCREMENTED_AM
-               | AT_XDMAC_CC_DIF(atchan->perif)
-               | AT_XDMAC_CC_SIF(atchan->memif)
-               | AT_XDMAC_CC_SWREQ_HWR_CONNECTED
-               | AT_XDMAC_CC_DSYNC_MEM2PER
-               | AT_XDMAC_CC_MBSIZE_SIXTEEN
-               | AT_XDMAC_CC_TYPE_PER_TRAN;
-       csize = at_xdmac_csize(sconfig->dst_maxburst);
-       if (csize < 0) {
-               dev_err(chan2dev(chan), "invalid src maxburst value\n");
-               return -EINVAL;
-       }
-       atchan->cfg[AT_XDMAC_MEM_TO_DEV_CFG] |= AT_XDMAC_CC_CSIZE(csize);
-       dwidth = ffs(sconfig->dst_addr_width) - 1;
-       atchan->cfg[AT_XDMAC_MEM_TO_DEV_CFG] |= AT_XDMAC_CC_DWIDTH(dwidth);
-
-       /* Src and dst addr are needed to configure the link list descriptor. */
-       atchan->per_src_addr = sconfig->src_addr;
-       atchan->per_dst_addr = sconfig->dst_addr;
 
-       dev_dbg(chan2dev(chan),
-               "%s: cfg[dev2mem]=0x%08x, cfg[mem2dev]=0x%08x, per_src_addr=0x%08x, per_dst_addr=0x%08x\n",
-               __func__, atchan->cfg[AT_XDMAC_DEV_TO_MEM_CFG],
-               atchan->cfg[AT_XDMAC_MEM_TO_DEV_CFG],
-               atchan->per_src_addr, atchan->per_dst_addr);
+       memcpy(&atchan->sconfig, sconfig, sizeof(atchan->sconfig));
 
        return 0;
 }
@@ -563,6 +596,8 @@ at_xdmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
        struct scatterlist      *sg;
        int                     i;
        unsigned int            xfer_size = 0;
+       unsigned long           irqflags;
+       struct dma_async_tx_descriptor  *ret = NULL;
 
        if (!sgl)
                return NULL;
@@ -578,7 +613,10 @@ at_xdmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
                 flags);
 
        /* Protect dma_sconfig field that can be modified by set_slave_conf. */
-       spin_lock_bh(&atchan->lock);
+       spin_lock_irqsave(&atchan->lock, irqflags);
+
+       if (at_xdmac_compute_chan_conf(chan, direction))
+               goto spin_unlock;
 
        /* Prepare descriptors. */
        for_each_sg(sgl, sg, sg_len, i) {
@@ -589,8 +627,7 @@ at_xdmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
                mem = sg_dma_address(sg);
                if (unlikely(!len)) {
                        dev_err(chan2dev(chan), "sg data length is zero\n");
-                       spin_unlock_bh(&atchan->lock);
-                       return NULL;
+                       goto spin_unlock;
                }
                dev_dbg(chan2dev(chan), "%s: * sg%d len=%u, mem=0x%08x\n",
                         __func__, i, len, mem);
@@ -600,20 +637,18 @@ at_xdmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
                        dev_err(chan2dev(chan), "can't get descriptor\n");
                        if (first)
                                list_splice_init(&first->descs_list, &atchan->free_descs_list);
-                       spin_unlock_bh(&atchan->lock);
-                       return NULL;
+                       goto spin_unlock;
                }
 
                /* Linked list descriptor setup. */
                if (direction == DMA_DEV_TO_MEM) {
-                       desc->lld.mbr_sa = atchan->per_src_addr;
+                       desc->lld.mbr_sa = atchan->sconfig.src_addr;
                        desc->lld.mbr_da = mem;
-                       desc->lld.mbr_cfg = atchan->cfg[AT_XDMAC_DEV_TO_MEM_CFG];
                } else {
                        desc->lld.mbr_sa = mem;
-                       desc->lld.mbr_da = atchan->per_dst_addr;
-                       desc->lld.mbr_cfg = atchan->cfg[AT_XDMAC_MEM_TO_DEV_CFG];
+                       desc->lld.mbr_da = atchan->sconfig.dst_addr;
                }
+               desc->lld.mbr_cfg = atchan->cfg;
                dwidth = at_xdmac_get_dwidth(desc->lld.mbr_cfg);
                fixed_dwidth = IS_ALIGNED(len, 1 << dwidth)
                               ? at_xdmac_get_dwidth(desc->lld.mbr_cfg)
@@ -645,13 +680,15 @@ at_xdmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
                xfer_size += len;
        }
 
-       spin_unlock_bh(&atchan->lock);
 
        first->tx_dma_desc.flags = flags;
        first->xfer_size = xfer_size;
        first->direction = direction;
+       ret = &first->tx_dma_desc;
 
-       return &first->tx_dma_desc;
+spin_unlock:
+       spin_unlock_irqrestore(&atchan->lock, irqflags);
+       return ret;
 }
 
 static struct dma_async_tx_descriptor *
@@ -664,6 +701,7 @@ at_xdmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr,
        struct at_xdmac_desc    *first = NULL, *prev = NULL;
        unsigned int            periods = buf_len / period_len;
        int                     i;
+       unsigned long           irqflags;
 
        dev_dbg(chan2dev(chan), "%s: buf_addr=%pad, buf_len=%zd, period_len=%zd, dir=%s, flags=0x%lx\n",
                __func__, &buf_addr, buf_len, period_len,
@@ -679,32 +717,34 @@ at_xdmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr,
                return NULL;
        }
 
+       if (at_xdmac_compute_chan_conf(chan, direction))
+               return NULL;
+
        for (i = 0; i < periods; i++) {
                struct at_xdmac_desc    *desc = NULL;
 
-               spin_lock_bh(&atchan->lock);
+               spin_lock_irqsave(&atchan->lock, irqflags);
                desc = at_xdmac_get_desc(atchan);
                if (!desc) {
                        dev_err(chan2dev(chan), "can't get descriptor\n");
                        if (first)
                                list_splice_init(&first->descs_list, &atchan->free_descs_list);
-                       spin_unlock_bh(&atchan->lock);
+                       spin_unlock_irqrestore(&atchan->lock, irqflags);
                        return NULL;
                }
-               spin_unlock_bh(&atchan->lock);
+               spin_unlock_irqrestore(&atchan->lock, irqflags);
                dev_dbg(chan2dev(chan),
                        "%s: desc=0x%p, tx_dma_desc.phys=%pad\n",
                        __func__, desc, &desc->tx_dma_desc.phys);
 
                if (direction == DMA_DEV_TO_MEM) {
-                       desc->lld.mbr_sa = atchan->per_src_addr;
+                       desc->lld.mbr_sa = atchan->sconfig.src_addr;
                        desc->lld.mbr_da = buf_addr + i * period_len;
-                       desc->lld.mbr_cfg = atchan->cfg[AT_XDMAC_DEV_TO_MEM_CFG];
                } else {
                        desc->lld.mbr_sa = buf_addr + i * period_len;
-                       desc->lld.mbr_da = atchan->per_dst_addr;
-                       desc->lld.mbr_cfg = atchan->cfg[AT_XDMAC_MEM_TO_DEV_CFG];
+                       desc->lld.mbr_da = atchan->sconfig.dst_addr;
                }
+               desc->lld.mbr_cfg = atchan->cfg;
                desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV1
                        | AT_XDMAC_MBR_UBC_NDEN
                        | AT_XDMAC_MBR_UBC_NSEN
@@ -766,6 +806,7 @@ at_xdmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
                                        | AT_XDMAC_CC_SIF(0)
                                        | AT_XDMAC_CC_MBSIZE_SIXTEEN
                                        | AT_XDMAC_CC_TYPE_MEM_TRAN;
+       unsigned long           irqflags;
 
        dev_dbg(chan2dev(chan), "%s: src=%pad, dest=%pad, len=%zd, flags=0x%lx\n",
                __func__, &src, &dest, len, flags);
@@ -798,9 +839,9 @@ at_xdmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 
                dev_dbg(chan2dev(chan), "%s: remaining_size=%zu\n", __func__, remaining_size);
 
-               spin_lock_bh(&atchan->lock);
+               spin_lock_irqsave(&atchan->lock, irqflags);
                desc = at_xdmac_get_desc(atchan);
-               spin_unlock_bh(&atchan->lock);
+               spin_unlock_irqrestore(&atchan->lock, irqflags);
                if (!desc) {
                        dev_err(chan2dev(chan), "can't get descriptor\n");
                        if (first)
@@ -886,6 +927,7 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
        int                     residue;
        u32                     cur_nda, mask, value;
        u8                      dwidth = 0;
+       unsigned long           flags;
 
        ret = dma_cookie_status(chan, cookie, txstate);
        if (ret == DMA_COMPLETE)
@@ -894,7 +936,7 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
        if (!txstate)
                return ret;
 
-       spin_lock_bh(&atchan->lock);
+       spin_lock_irqsave(&atchan->lock, flags);
 
        desc = list_first_entry(&atchan->xfers_list, struct at_xdmac_desc, xfer_node);
 
@@ -904,8 +946,7 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
         */
        if (!desc->active_xfer) {
                dma_set_residue(txstate, desc->xfer_size);
-               spin_unlock_bh(&atchan->lock);
-               return ret;
+               goto spin_unlock;
        }
 
        residue = desc->xfer_size;
@@ -936,14 +977,14 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
        }
        residue += at_xdmac_chan_read(atchan, AT_XDMAC_CUBC) << dwidth;
 
-       spin_unlock_bh(&atchan->lock);
-
        dma_set_residue(txstate, residue);
 
        dev_dbg(chan2dev(chan),
                 "%s: desc=0x%p, tx_dma_desc.phys=%pad, tx_status=%d, cookie=%d, residue=%d\n",
                 __func__, desc, &desc->tx_dma_desc.phys, ret, cookie, residue);
 
+spin_unlock:
+       spin_unlock_irqrestore(&atchan->lock, flags);
        return ret;
 }
 
@@ -964,8 +1005,9 @@ static void at_xdmac_remove_xfer(struct at_xdmac_chan *atchan,
 static void at_xdmac_advance_work(struct at_xdmac_chan *atchan)
 {
        struct at_xdmac_desc    *desc;
+       unsigned long           flags;
 
-       spin_lock_bh(&atchan->lock);
+       spin_lock_irqsave(&atchan->lock, flags);
 
        /*
         * If channel is enabled, do nothing, advance_work will be triggered
@@ -980,7 +1022,7 @@ static void at_xdmac_advance_work(struct at_xdmac_chan *atchan)
                        at_xdmac_start_xfer(atchan, desc);
        }
 
-       spin_unlock_bh(&atchan->lock);
+       spin_unlock_irqrestore(&atchan->lock, flags);
 }
 
 static void at_xdmac_handle_cyclic(struct at_xdmac_chan *atchan)
@@ -1116,12 +1158,13 @@ static int at_xdmac_device_config(struct dma_chan *chan,
 {
        struct at_xdmac_chan    *atchan = to_at_xdmac_chan(chan);
        int ret;
+       unsigned long           flags;
 
        dev_dbg(chan2dev(chan), "%s\n", __func__);
 
-       spin_lock_bh(&atchan->lock);
+       spin_lock_irqsave(&atchan->lock, flags);
        ret = at_xdmac_set_slave_config(chan, config);
-       spin_unlock_bh(&atchan->lock);
+       spin_unlock_irqrestore(&atchan->lock, flags);
 
        return ret;
 }
@@ -1130,18 +1173,19 @@ static int at_xdmac_device_pause(struct dma_chan *chan)
 {
        struct at_xdmac_chan    *atchan = to_at_xdmac_chan(chan);
        struct at_xdmac         *atxdmac = to_at_xdmac(atchan->chan.device);
+       unsigned long           flags;
 
        dev_dbg(chan2dev(chan), "%s\n", __func__);
 
        if (test_and_set_bit(AT_XDMAC_CHAN_IS_PAUSED, &atchan->status))
                return 0;
 
-       spin_lock_bh(&atchan->lock);
+       spin_lock_irqsave(&atchan->lock, flags);
        at_xdmac_write(atxdmac, AT_XDMAC_GRWS, atchan->mask);
        while (at_xdmac_chan_read(atchan, AT_XDMAC_CC)
               & (AT_XDMAC_CC_WRIP | AT_XDMAC_CC_RDIP))
                cpu_relax();
-       spin_unlock_bh(&atchan->lock);
+       spin_unlock_irqrestore(&atchan->lock, flags);
 
        return 0;
 }
@@ -1150,18 +1194,19 @@ static int at_xdmac_device_resume(struct dma_chan *chan)
 {
        struct at_xdmac_chan    *atchan = to_at_xdmac_chan(chan);
        struct at_xdmac         *atxdmac = to_at_xdmac(atchan->chan.device);
+       unsigned long           flags;
 
        dev_dbg(chan2dev(chan), "%s\n", __func__);
 
-       spin_lock_bh(&atchan->lock);
+       spin_lock_irqsave(&atchan->lock, flags);
        if (!at_xdmac_chan_is_paused(atchan)) {
-               spin_unlock_bh(&atchan->lock);
+               spin_unlock_irqrestore(&atchan->lock, flags);
                return 0;
        }
 
        at_xdmac_write(atxdmac, AT_XDMAC_GRWR, atchan->mask);
        clear_bit(AT_XDMAC_CHAN_IS_PAUSED, &atchan->status);
-       spin_unlock_bh(&atchan->lock);
+       spin_unlock_irqrestore(&atchan->lock, flags);
 
        return 0;
 }
@@ -1171,10 +1216,11 @@ static int at_xdmac_device_terminate_all(struct dma_chan *chan)
        struct at_xdmac_desc    *desc, *_desc;
        struct at_xdmac_chan    *atchan = to_at_xdmac_chan(chan);
        struct at_xdmac         *atxdmac = to_at_xdmac(atchan->chan.device);
+       unsigned long           flags;
 
        dev_dbg(chan2dev(chan), "%s\n", __func__);
 
-       spin_lock_bh(&atchan->lock);
+       spin_lock_irqsave(&atchan->lock, flags);
        at_xdmac_write(atxdmac, AT_XDMAC_GD, atchan->mask);
        while (at_xdmac_read(atxdmac, AT_XDMAC_GS) & atchan->mask)
                cpu_relax();
@@ -1184,7 +1230,7 @@ static int at_xdmac_device_terminate_all(struct dma_chan *chan)
                at_xdmac_remove_xfer(atchan, desc);
 
        clear_bit(AT_XDMAC_CHAN_IS_CYCLIC, &atchan->status);
-       spin_unlock_bh(&atchan->lock);
+       spin_unlock_irqrestore(&atchan->lock, flags);
 
        return 0;
 }
@@ -1194,8 +1240,9 @@ static int at_xdmac_alloc_chan_resources(struct dma_chan *chan)
        struct at_xdmac_chan    *atchan = to_at_xdmac_chan(chan);
        struct at_xdmac_desc    *desc;
        int                     i;
+       unsigned long           flags;
 
-       spin_lock_bh(&atchan->lock);
+       spin_lock_irqsave(&atchan->lock, flags);
 
        if (at_xdmac_chan_is_enabled(atchan)) {
                dev_err(chan2dev(chan),
@@ -1226,7 +1273,7 @@ static int at_xdmac_alloc_chan_resources(struct dma_chan *chan)
        dev_dbg(chan2dev(chan), "%s: allocated %d descriptors\n", __func__, i);
 
 spin_unlock:
-       spin_unlock_bh(&atchan->lock);
+       spin_unlock_irqrestore(&atchan->lock, flags);
        return i;
 }
 
index 2890d74..3ddfd1f 100644 (file)
@@ -487,7 +487,11 @@ int dma_get_slave_caps(struct dma_chan *chan, struct dma_slave_caps *caps)
        caps->directions = device->directions;
        caps->residue_granularity = device->residue_granularity;
 
-       caps->cmd_pause = !!device->device_pause;
+       /*
+        * Some devices implement only pause (e.g. to get residuum) but no
+        * resume. However cmd_pause is advertised as pause AND resume.
+        */
+       caps->cmd_pause = !!(device->device_pause && device->device_resume);
        caps->cmd_terminate = !!device->device_terminate_all;
 
        return 0;
index 9b84def..f42f71e 100644 (file)
@@ -384,7 +384,10 @@ static int hsu_dma_terminate_all(struct dma_chan *chan)
        spin_lock_irqsave(&hsuc->vchan.lock, flags);
 
        hsu_dma_stop_channel(hsuc);
-       hsuc->desc = NULL;
+       if (hsuc->desc) {
+               hsu_dma_desc_free(&hsuc->desc->vdesc);
+               hsuc->desc = NULL;
+       }
 
        vchan_get_all_descriptors(&hsuc->vchan, &head);
        spin_unlock_irqrestore(&hsuc->vchan.lock, flags);
index a7d9d30..340f9e6 100644 (file)
@@ -2127,6 +2127,7 @@ static int pl330_terminate_all(struct dma_chan *chan)
        struct pl330_dmac *pl330 = pch->dmac;
        LIST_HEAD(list);
 
+       pm_runtime_get_sync(pl330->ddma.dev);
        spin_lock_irqsave(&pch->lock, flags);
        spin_lock(&pl330->lock);
        _stop(pch->thread);
@@ -2151,6 +2152,8 @@ static int pl330_terminate_all(struct dma_chan *chan)
        list_splice_tail_init(&pch->work_list, &pl330->desc_pool);
        list_splice_tail_init(&pch->completed_list, &pl330->desc_pool);
        spin_unlock_irqrestore(&pch->lock, flags);
+       pm_runtime_mark_last_busy(pl330->ddma.dev);
+       pm_runtime_put_autosuspend(pl330->ddma.dev);
 
        return 0;
 }
index de67fce..e45d1f1 100644 (file)
@@ -119,6 +119,18 @@ static int usb_extcon_probe(struct platform_device *pdev)
                return PTR_ERR(info->id_gpiod);
        }
 
+       info->edev = devm_extcon_dev_allocate(dev, usb_extcon_cable);
+       if (IS_ERR(info->edev)) {
+               dev_err(dev, "failed to allocate extcon device\n");
+               return -ENOMEM;
+       }
+
+       ret = devm_extcon_dev_register(dev, info->edev);
+       if (ret < 0) {
+               dev_err(dev, "failed to register extcon device\n");
+               return ret;
+       }
+
        ret = gpiod_set_debounce(info->id_gpiod,
                                 USB_GPIO_DEBOUNCE_MS * 1000);
        if (ret < 0)
@@ -142,18 +154,6 @@ static int usb_extcon_probe(struct platform_device *pdev)
                return ret;
        }
 
-       info->edev = devm_extcon_dev_allocate(dev, usb_extcon_cable);
-       if (IS_ERR(info->edev)) {
-               dev_err(dev, "failed to allocate extcon device\n");
-               return -ENOMEM;
-       }
-
-       ret = devm_extcon_dev_register(dev, info->edev);
-       if (ret < 0) {
-               dev_err(dev, "failed to register extcon device\n");
-               return ret;
-       }
-
        platform_set_drvdata(pdev, info);
        device_init_wakeup(dev, 1);
 
index 6e45a43..97b1616 100644 (file)
@@ -499,19 +499,19 @@ static int __init dmi_present(const u8 *buf)
        buf += 16;
 
        if (memcmp(buf, "_DMI_", 5) == 0 && dmi_checksum(buf, 15)) {
+               if (smbios_ver)
+                       dmi_ver = smbios_ver;
+               else
+                       dmi_ver = (buf[14] & 0xF0) << 4 | (buf[14] & 0x0F);
                dmi_num = get_unaligned_le16(buf + 12);
                dmi_len = get_unaligned_le16(buf + 6);
                dmi_base = get_unaligned_le32(buf + 8);
 
                if (dmi_walk_early(dmi_decode) == 0) {
                        if (smbios_ver) {
-                               dmi_ver = smbios_ver;
-                               pr_info("SMBIOS %d.%d%s present.\n",
-                                       dmi_ver >> 8, dmi_ver & 0xFF,
-                                       (dmi_ver < 0x0300) ? "" : ".x");
+                               pr_info("SMBIOS %d.%d present.\n",
+                                      dmi_ver >> 8, dmi_ver & 0xFF);
                        } else {
-                               dmi_ver = (buf[14] & 0xF0) << 4 |
-                                          (buf[14] & 0x0F);
                                pr_info("Legacy DMI %d.%d present.\n",
                                       dmi_ver >> 8, dmi_ver & 0xFF);
                        }
index 87b8e3b..5c55227 100644 (file)
@@ -120,7 +120,8 @@ add_sysfs_runtime_map_entry(struct kobject *kobj, int nr)
        entry = kzalloc(sizeof(*entry), GFP_KERNEL);
        if (!entry) {
                kset_unregister(map_kset);
-               return entry;
+               map_kset = NULL;
+               return ERR_PTR(-ENOMEM);
        }
 
        memcpy(&entry->md, efi_runtime_map + nr * efi_memdesc_size,
@@ -132,6 +133,7 @@ add_sysfs_runtime_map_entry(struct kobject *kobj, int nr)
        if (ret) {
                kobject_put(&entry->kobj);
                kset_unregister(map_kset);
+               map_kset = NULL;
                return ERR_PTR(ret);
        }
 
@@ -195,8 +197,6 @@ out_add_entry:
                entry = *(map_entries + j);
                kobject_put(&entry->kobj);
        }
-       if (map_kset)
-               kset_unregister(map_kset);
 out:
        return ret;
 }
index 071c2c9..7279123 100644 (file)
@@ -186,8 +186,20 @@ struct ibft_kobject {
 
 static struct iscsi_boot_kset *boot_kset;
 
+/* fully null address */
 static const char nulls[16];
 
+/* IPv4-mapped IPv6 ::ffff:0.0.0.0 */
+static const char mapped_nulls[16] = { 0x00, 0x00, 0x00, 0x00,
+                                       0x00, 0x00, 0x00, 0x00,
+                                       0x00, 0x00, 0xff, 0xff,
+                                       0x00, 0x00, 0x00, 0x00 };
+
+static int address_not_null(u8 *ip)
+{
+       return (memcmp(ip, nulls, 16) && memcmp(ip, mapped_nulls, 16));
+}
+
 /*
  * Helper functions to parse data properly.
  */
@@ -445,7 +457,7 @@ static umode_t ibft_check_nic_for(void *data, int type)
                rc = S_IRUGO;
                break;
        case ISCSI_BOOT_ETH_IP_ADDR:
-               if (memcmp(nic->ip_addr, nulls, sizeof(nic->ip_addr)))
+               if (address_not_null(nic->ip_addr))
                        rc = S_IRUGO;
                break;
        case ISCSI_BOOT_ETH_SUBNET_MASK:
@@ -456,21 +468,19 @@ static umode_t ibft_check_nic_for(void *data, int type)
                rc = S_IRUGO;
                break;
        case ISCSI_BOOT_ETH_GATEWAY:
-               if (memcmp(nic->gateway, nulls, sizeof(nic->gateway)))
+               if (address_not_null(nic->gateway))
                        rc = S_IRUGO;
                break;
        case ISCSI_BOOT_ETH_PRIMARY_DNS:
-               if (memcmp(nic->primary_dns, nulls,
-                          sizeof(nic->primary_dns)))
+               if (address_not_null(nic->primary_dns))
                        rc = S_IRUGO;
                break;
        case ISCSI_BOOT_ETH_SECONDARY_DNS:
-               if (memcmp(nic->secondary_dns, nulls,
-                          sizeof(nic->secondary_dns)))
+               if (address_not_null(nic->secondary_dns))
                        rc = S_IRUGO;
                break;
        case ISCSI_BOOT_ETH_DHCP:
-               if (memcmp(nic->dhcp, nulls, sizeof(nic->dhcp)))
+               if (address_not_null(nic->dhcp))
                        rc = S_IRUGO;
                break;
        case ISCSI_BOOT_ETH_VLAN:
@@ -536,23 +546,19 @@ static umode_t __init ibft_check_initiator_for(void *data, int type)
                rc = S_IRUGO;
                break;
        case ISCSI_BOOT_INI_ISNS_SERVER:
-               if (memcmp(init->isns_server, nulls,
-                          sizeof(init->isns_server)))
+               if (address_not_null(init->isns_server))
                        rc = S_IRUGO;
                break;
        case ISCSI_BOOT_INI_SLP_SERVER:
-               if (memcmp(init->slp_server, nulls,
-                          sizeof(init->slp_server)))
+               if (address_not_null(init->slp_server))
                        rc = S_IRUGO;
                break;
        case ISCSI_BOOT_INI_PRI_RADIUS_SERVER:
-               if (memcmp(init->pri_radius_server, nulls,
-                          sizeof(init->pri_radius_server)))
+               if (address_not_null(init->pri_radius_server))
                        rc = S_IRUGO;
                break;
        case ISCSI_BOOT_INI_SEC_RADIUS_SERVER:
-               if (memcmp(init->sec_radius_server, nulls,
-                          sizeof(init->sec_radius_server)))
+               if (address_not_null(init->sec_radius_server))
                        rc = S_IRUGO;
                break;
        case ISCSI_BOOT_INI_INITIATOR_NAME:
index 6b8115f..83f281d 100644 (file)
@@ -117,7 +117,7 @@ static int kempld_gpio_get_direction(struct gpio_chip *chip, unsigned offset)
                = container_of(chip, struct kempld_gpio_data, chip);
        struct kempld_device_data *pld = gpio->pld;
 
-       return kempld_gpio_get_bit(pld, KEMPLD_GPIO_DIR_NUM(offset), offset);
+       return !kempld_gpio_get_bit(pld, KEMPLD_GPIO_DIR_NUM(offset), offset);
 }
 
 static int kempld_gpio_pincount(struct kempld_device_data *pld)
index cd1d5bf..b232397 100644 (file)
@@ -1054,38 +1054,8 @@ static void omap_gpio_mod_init(struct gpio_bank *bank)
                dev_err(bank->dev, "Could not get gpio dbck\n");
 }
 
-static void
-omap_mpuio_alloc_gc(struct gpio_bank *bank, unsigned int irq_start,
-                   unsigned int num)
-{
-       struct irq_chip_generic *gc;
-       struct irq_chip_type *ct;
-
-       gc = irq_alloc_generic_chip("MPUIO", 1, irq_start, bank->base,
-                                   handle_simple_irq);
-       if (!gc) {
-               dev_err(bank->dev, "Memory alloc failed for gc\n");
-               return;
-       }
-
-       ct = gc->chip_types;
-
-       /* NOTE: No ack required, reading IRQ status clears it. */
-       ct->chip.irq_mask = irq_gc_mask_set_bit;
-       ct->chip.irq_unmask = irq_gc_mask_clr_bit;
-       ct->chip.irq_set_type = omap_gpio_irq_type;
-
-       if (bank->regs->wkup_en)
-               ct->chip.irq_set_wake = omap_gpio_wake_enable;
-
-       ct->regs.mask = OMAP_MPUIO_GPIO_INT / bank->stride;
-       irq_setup_generic_chip(gc, IRQ_MSK(num), IRQ_GC_INIT_MASK_CACHE,
-                              IRQ_NOREQUEST | IRQ_NOPROBE, 0);
-}
-
 static int omap_gpio_chip_init(struct gpio_bank *bank, struct irq_chip *irqc)
 {
-       int j;
        static int gpio;
        int irq_base = 0;
        int ret;
@@ -1132,6 +1102,15 @@ static int omap_gpio_chip_init(struct gpio_bank *bank, struct irq_chip *irqc)
        }
 #endif
 
+       /* MPUIO is a bit different, reading IRQ status clears it */
+       if (bank->is_mpuio) {
+               irqc->irq_ack = dummy_irq_chip.irq_ack;
+               irqc->irq_mask = irq_gc_mask_set_bit;
+               irqc->irq_unmask = irq_gc_mask_clr_bit;
+               if (!bank->regs->wkup_en)
+                       irqc->irq_set_wake = NULL;
+       }
+
        ret = gpiochip_irqchip_add(&bank->chip, irqc,
                                   irq_base, omap_gpio_irq_handler,
                                   IRQ_TYPE_NONE);
@@ -1145,15 +1124,6 @@ static int omap_gpio_chip_init(struct gpio_bank *bank, struct irq_chip *irqc)
        gpiochip_set_chained_irqchip(&bank->chip, irqc,
                                     bank->irq, omap_gpio_irq_handler);
 
-       for (j = 0; j < bank->width; j++) {
-               int irq = irq_find_mapping(bank->chip.irqdomain, j);
-               if (bank->is_mpuio) {
-                       omap_mpuio_alloc_gc(bank, irq, bank->width);
-                       irq_set_chip_and_handler(irq, NULL, NULL);
-                       set_irq_flags(irq, 0);
-               }
-       }
-
        return 0;
 }
 
index d2303d5..725d161 100644 (file)
@@ -550,7 +550,7 @@ acpi_gpio_adr_space_handler(u32 function, acpi_physical_address address,
 
        length = min(agpio->pin_table_length, (u16)(pin_index + bits));
        for (i = pin_index; i < length; ++i) {
-               unsigned pin = agpio->pin_table[i];
+               int pin = agpio->pin_table[i];
                struct acpi_gpio_connection *conn;
                struct gpio_desc *desc;
                bool found;
index 7722ed5..af3bc7a 100644 (file)
@@ -551,6 +551,7 @@ static struct class gpio_class = {
  */
 int gpiod_export(struct gpio_desc *desc, bool direction_may_change)
 {
+       struct gpio_chip        *chip;
        unsigned long           flags;
        int                     status;
        const char              *ioname = NULL;
@@ -568,8 +569,16 @@ int gpiod_export(struct gpio_desc *desc, bool direction_may_change)
                return -EINVAL;
        }
 
+       chip = desc->chip;
+
        mutex_lock(&sysfs_lock);
 
+       /* check if chip is being removed */
+       if (!chip || !chip->exported) {
+               status = -ENODEV;
+               goto fail_unlock;
+       }
+
        spin_lock_irqsave(&gpio_lock, flags);
        if (!test_bit(FLAG_REQUESTED, &desc->flags) ||
             test_bit(FLAG_EXPORT, &desc->flags)) {
@@ -783,12 +792,15 @@ void gpiochip_unexport(struct gpio_chip *chip)
 {
        int                     status;
        struct device           *dev;
+       struct gpio_desc *desc;
+       unsigned int i;
 
        mutex_lock(&sysfs_lock);
        dev = class_find_device(&gpio_class, NULL, chip, match_export);
        if (dev) {
                put_device(dev);
                device_unregister(dev);
+               /* prevent further gpiod exports */
                chip->exported = false;
                status = 0;
        } else
@@ -797,6 +809,13 @@ void gpiochip_unexport(struct gpio_chip *chip)
 
        if (status)
                chip_dbg(chip, "%s: status %d\n", __func__, status);
+
+       /* unregister gpiod class devices owned by sysfs */
+       for (i = 0; i < chip->ngpio; i++) {
+               desc = &chip->desc[i];
+               if (test_and_clear_bit(FLAG_SYSFS, &desc->flags))
+                       gpiod_free(desc);
+       }
 }
 
 static int __init gpiolib_sysfs_init(void)
index 59eaa23..6bc612b 100644 (file)
@@ -53,6 +53,11 @@ static DEFINE_MUTEX(gpio_lookup_lock);
 static LIST_HEAD(gpio_lookup_list);
 LIST_HEAD(gpio_chips);
 
+
+static void gpiochip_free_hogs(struct gpio_chip *chip);
+static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip);
+
+
 static inline void desc_set_label(struct gpio_desc *d, const char *label)
 {
        d->label = label;
@@ -297,6 +302,7 @@ int gpiochip_add(struct gpio_chip *chip)
 
 err_remove_chip:
        acpi_gpiochip_remove(chip);
+       gpiochip_free_hogs(chip);
        of_gpiochip_remove(chip);
        spin_lock_irqsave(&gpio_lock, flags);
        list_del(&chip->list);
@@ -313,10 +319,6 @@ err_free_descs:
 }
 EXPORT_SYMBOL_GPL(gpiochip_add);
 
-/* Forward-declaration */
-static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip);
-static void gpiochip_free_hogs(struct gpio_chip *chip);
-
 /**
  * gpiochip_remove() - unregister a gpio_chip
  * @chip: the chip to unregister
index 69af73f..596ee5c 100644 (file)
@@ -430,9 +430,10 @@ static int unregister_process_nocpsch(struct device_queue_manager *dqm,
 
        BUG_ON(!dqm || !qpd);
 
-       BUG_ON(!list_empty(&qpd->queues_list));
+       pr_debug("In func %s\n", __func__);
 
-       pr_debug("kfd: In func %s\n", __func__);
+       pr_debug("qpd->queues_list is %s\n",
+                       list_empty(&qpd->queues_list) ? "empty" : "not empty");
 
        retval = 0;
        mutex_lock(&dqm->lock);
@@ -882,6 +883,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
                return -ENOMEM;
        }
 
+       init_sdma_vm(dqm, q, qpd);
+
        retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
                                &q->gart_mqd_addr, &q->properties);
        if (retval != 0)
index 661c660..c25728b 100644 (file)
@@ -684,8 +684,6 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
                        dev->node_props.cpu_core_id_base);
        sysfs_show_32bit_prop(buffer, "simd_id_base",
                        dev->node_props.simd_id_base);
-       sysfs_show_32bit_prop(buffer, "capability",
-                       dev->node_props.capability);
        sysfs_show_32bit_prop(buffer, "max_waves_per_simd",
                        dev->node_props.max_waves_per_simd);
        sysfs_show_32bit_prop(buffer, "lds_size_in_kb",
@@ -728,14 +726,16 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
                sysfs_show_32bit_prop(buffer, "max_engine_clk_fcompute",
                        dev->gpu->kfd2kgd->get_max_engine_clock_in_mhz(
                                        dev->gpu->kgd));
+
                sysfs_show_64bit_prop(buffer, "local_mem_size",
-                       dev->gpu->kfd2kgd->get_vmem_size(
-                                       dev->gpu->kgd));
+                               (unsigned long long int) 0);
 
                sysfs_show_32bit_prop(buffer, "fw_version",
                        dev->gpu->kfd2kgd->get_fw_version(
                                                dev->gpu->kgd,
                                                KGD_ENGINE_MEC1));
+               sysfs_show_32bit_prop(buffer, "capability",
+                               dev->node_props.capability);
        }
 
        return sysfs_show_32bit_prop(buffer, "max_engine_clk_ccompute",
index c8a3447..af9662e 100644 (file)
@@ -131,12 +131,11 @@ static void drm_update_vblank_count(struct drm_device *dev, int crtc)
 
        /* Reinitialize corresponding vblank timestamp if high-precision query
         * available. Skip this step if query unsupported or failed. Will
-        * reinitialize delayed at next vblank interrupt in that case.
+        * reinitialize delayed at next vblank interrupt in that case and
+        * assign 0 for now, to mark the vblanktimestamp as invalid.
         */
-       if (rc) {
-               tslot = atomic_read(&vblank->count) + diff;
-               vblanktimestamp(dev, crtc, tslot) = t_vblank;
-       }
+       tslot = atomic_read(&vblank->count) + diff;
+       vblanktimestamp(dev, crtc, tslot) = rc ? t_vblank : (struct timeval) {0, 0};
 
        smp_mb__before_atomic();
        atomic_add(diff, &vblank->count);
index 40c1db9..2f0ed11 100644 (file)
@@ -465,6 +465,9 @@ int drm_plane_helper_commit(struct drm_plane *plane,
                if (!crtc[i])
                        continue;
 
+               if (crtc[i]->cursor == plane)
+                       continue;
+
                /* There's no other way to figure out whether the crtc is running. */
                ret = drm_crtc_vblank_get(crtc[i]);
                if (ret == 0) {
index ffc305f..eb7e610 100644 (file)
@@ -217,7 +217,7 @@ static ssize_t status_store(struct device *device,
 
        mutex_unlock(&dev->mode_config.mutex);
 
-       return ret;
+       return ret ? ret : count;
 }
 
 static ssize_t status_show(struct device *device,
index 1f7e33f..6714e5b 100644 (file)
@@ -91,7 +91,7 @@ static void decon_wait_for_vblank(struct exynos_drm_crtc *crtc)
 
 static void decon_clear_channel(struct decon_context *ctx)
 {
-       int win, ch_enabled = 0;
+       unsigned int win, ch_enabled = 0;
 
        DRM_DEBUG_KMS("%s\n", __FILE__);
 
@@ -710,7 +710,7 @@ static void decon_dpms(struct exynos_drm_crtc *crtc, int mode)
        }
 }
 
-static struct exynos_drm_crtc_ops decon_crtc_ops = {
+static const struct exynos_drm_crtc_ops decon_crtc_ops = {
        .dpms = decon_dpms,
        .mode_fixup = decon_mode_fixup,
        .commit = decon_commit,
index 1dbfba5..30feb7d 100644 (file)
@@ -32,7 +32,6 @@
 #include <drm/bridge/ptn3460.h>
 
 #include "exynos_dp_core.h"
-#include "exynos_drm_fimd.h"
 
 #define ctx_from_connector(c)  container_of(c, struct exynos_dp_device, \
                                        connector)
@@ -196,7 +195,7 @@ static int exynos_dp_read_edid(struct exynos_dp_device *dp)
                }
        }
 
-       dev_err(dp->dev, "EDID Read success!\n");
+       dev_dbg(dp->dev, "EDID Read success!\n");
        return 0;
 }
 
@@ -1066,6 +1065,8 @@ static void exynos_dp_phy_exit(struct exynos_dp_device *dp)
 
 static void exynos_dp_poweron(struct exynos_dp_device *dp)
 {
+       struct exynos_drm_crtc *crtc = dp_to_crtc(dp);
+
        if (dp->dpms_mode == DRM_MODE_DPMS_ON)
                return;
 
@@ -1076,7 +1077,8 @@ static void exynos_dp_poweron(struct exynos_dp_device *dp)
                }
        }
 
-       fimd_dp_clock_enable(dp_to_crtc(dp), true);
+       if (crtc->ops->clock_enable)
+               crtc->ops->clock_enable(dp_to_crtc(dp), true);
 
        clk_prepare_enable(dp->clock);
        exynos_dp_phy_init(dp);
@@ -1087,6 +1089,8 @@ static void exynos_dp_poweron(struct exynos_dp_device *dp)
 
 static void exynos_dp_poweroff(struct exynos_dp_device *dp)
 {
+       struct exynos_drm_crtc *crtc = dp_to_crtc(dp);
+
        if (dp->dpms_mode != DRM_MODE_DPMS_ON)
                return;
 
@@ -1102,7 +1106,8 @@ static void exynos_dp_poweroff(struct exynos_dp_device *dp)
        exynos_dp_phy_exit(dp);
        clk_disable_unprepare(dp->clock);
 
-       fimd_dp_clock_enable(dp_to_crtc(dp), false);
+       if (crtc->ops->clock_enable)
+               crtc->ops->clock_enable(dp_to_crtc(dp), false);
 
        if (dp->panel) {
                if (drm_panel_unprepare(dp->panel))
index eb49195..9006b94 100644 (file)
@@ -238,11 +238,11 @@ static struct drm_crtc_funcs exynos_crtc_funcs = {
 };
 
 struct exynos_drm_crtc *exynos_drm_crtc_create(struct drm_device *drm_dev,
-                                              struct drm_plane *plane,
-                                              int pipe,
-                                              enum exynos_drm_output_type type,
-                                              struct exynos_drm_crtc_ops *ops,
-                                              void *ctx)
+                                       struct drm_plane *plane,
+                                       int pipe,
+                                       enum exynos_drm_output_type type,
+                                       const struct exynos_drm_crtc_ops *ops,
+                                       void *ctx)
 {
        struct exynos_drm_crtc *exynos_crtc;
        struct exynos_drm_private *private = drm_dev->dev_private;
index 0ecd8fc..0f3aa70 100644 (file)
 #include "exynos_drm_drv.h"
 
 struct exynos_drm_crtc *exynos_drm_crtc_create(struct drm_device *drm_dev,
-                                              struct drm_plane *plane,
-                                              int pipe,
-                                              enum exynos_drm_output_type type,
-                                              struct exynos_drm_crtc_ops *ops,
-                                              void *context);
+                                       struct drm_plane *plane,
+                                       int pipe,
+                                       enum exynos_drm_output_type type,
+                                       const struct exynos_drm_crtc_ops *ops,
+                                       void *context);
 int exynos_drm_crtc_enable_vblank(struct drm_device *dev, int pipe);
 void exynos_drm_crtc_disable_vblank(struct drm_device *dev, int pipe);
 void exynos_drm_crtc_finish_pageflip(struct drm_device *dev, int pipe);
index e12ecb5..29e3fb7 100644 (file)
@@ -71,13 +71,6 @@ enum exynos_drm_output_type {
  * @dma_addr: array of bus(accessed by dma) address to the memory region
  *           allocated for a overlay.
  * @zpos: order of overlay layer(z position).
- * @index_color: if using color key feature then this value would be used
- *                     as index color.
- * @default_win: a window to be enabled.
- * @color_key: color key on or off.
- * @local_path: in case of lcd type, local path mode on or off.
- * @transparency: transparency on or off.
- * @activated: activated or not.
  * @enabled: enabled or not.
  * @resume: to resume or not.
  *
@@ -108,13 +101,7 @@ struct exynos_drm_plane {
        uint32_t pixel_format;
        dma_addr_t dma_addr[MAX_FB_BUFFER];
        unsigned int zpos;
-       unsigned int index_color;
 
-       bool default_win:1;
-       bool color_key:1;
-       bool local_path:1;
-       bool transparency:1;
-       bool activated:1;
        bool enabled:1;
        bool resume:1;
 };
@@ -181,6 +168,10 @@ struct exynos_drm_display {
  * @win_disable: disable hardware specific overlay.
  * @te_handler: trigger to transfer video image at the tearing effect
  *     synchronization signal if there is a page flip request.
+ * @clock_enable: optional function enabling/disabling display domain clock,
+ *     called from exynos-dp driver before powering up (with
+ *     'enable' argument as true) and after powering down (with
+ *     'enable' as false).
  */
 struct exynos_drm_crtc;
 struct exynos_drm_crtc_ops {
@@ -195,6 +186,7 @@ struct exynos_drm_crtc_ops {
        void (*win_commit)(struct exynos_drm_crtc *crtc, unsigned int zpos);
        void (*win_disable)(struct exynos_drm_crtc *crtc, unsigned int zpos);
        void (*te_handler)(struct exynos_drm_crtc *crtc);
+       void (*clock_enable)(struct exynos_drm_crtc *crtc, bool enable);
 };
 
 /*
@@ -221,7 +213,7 @@ struct exynos_drm_crtc {
        unsigned int                    dpms;
        wait_queue_head_t               pending_flip_queue;
        struct drm_pending_vblank_event *event;
-       struct exynos_drm_crtc_ops      *ops;
+       const struct exynos_drm_crtc_ops        *ops;
        void                            *ctx;
 };
 
index 929cb03..142eb4e 100644 (file)
@@ -171,43 +171,6 @@ exynos_drm_framebuffer_init(struct drm_device *dev,
        return &exynos_fb->fb;
 }
 
-static u32 exynos_drm_format_num_buffers(struct drm_mode_fb_cmd2 *mode_cmd)
-{
-       unsigned int cnt = 0;
-
-       if (mode_cmd->pixel_format != DRM_FORMAT_NV12)
-               return drm_format_num_planes(mode_cmd->pixel_format);
-
-       while (cnt != MAX_FB_BUFFER) {
-               if (!mode_cmd->handles[cnt])
-                       break;
-               cnt++;
-       }
-
-       /*
-        * check if NV12 or NV12M.
-        *
-        * NV12
-        * handles[0] = base1, offsets[0] = 0
-        * handles[1] = base1, offsets[1] = Y_size
-        *
-        * NV12M
-        * handles[0] = base1, offsets[0] = 0
-        * handles[1] = base2, offsets[1] = 0
-        */
-       if (cnt == 2) {
-               /*
-                * in case of NV12 format, offsets[1] is not 0 and
-                * handles[0] is same as handles[1].
-                */
-               if (mode_cmd->offsets[1] &&
-                       mode_cmd->handles[0] == mode_cmd->handles[1])
-                       cnt = 1;
-       }
-
-       return cnt;
-}
-
 static struct drm_framebuffer *
 exynos_user_fb_create(struct drm_device *dev, struct drm_file *file_priv,
                      struct drm_mode_fb_cmd2 *mode_cmd)
@@ -230,7 +193,7 @@ exynos_user_fb_create(struct drm_device *dev, struct drm_file *file_priv,
 
        drm_helper_mode_fill_fb_struct(&exynos_fb->fb, mode_cmd);
        exynos_fb->exynos_gem_obj[0] = to_exynos_gem_obj(obj);
-       exynos_fb->buf_cnt = exynos_drm_format_num_buffers(mode_cmd);
+       exynos_fb->buf_cnt = drm_format_num_planes(mode_cmd->pixel_format);
 
        DRM_DEBUG_KMS("buf_cnt = %d\n", exynos_fb->buf_cnt);
 
index 9819fa6..a0edab8 100644 (file)
@@ -33,7 +33,6 @@
 #include "exynos_drm_crtc.h"
 #include "exynos_drm_plane.h"
 #include "exynos_drm_iommu.h"
-#include "exynos_drm_fimd.h"
 
 /*
  * FIMD stands for Fully Interactive Mobile Display and
@@ -216,7 +215,7 @@ static void fimd_wait_for_vblank(struct exynos_drm_crtc *crtc)
                DRM_DEBUG_KMS("vblank wait timed out.\n");
 }
 
-static void fimd_enable_video_output(struct fimd_context *ctx, int win,
+static void fimd_enable_video_output(struct fimd_context *ctx, unsigned int win,
                                        bool enable)
 {
        u32 val = readl(ctx->regs + WINCON(win));
@@ -229,7 +228,8 @@ static void fimd_enable_video_output(struct fimd_context *ctx, int win,
        writel(val, ctx->regs + WINCON(win));
 }
 
-static void fimd_enable_shadow_channel_path(struct fimd_context *ctx, int win,
+static void fimd_enable_shadow_channel_path(struct fimd_context *ctx,
+                                               unsigned int win,
                                                bool enable)
 {
        u32 val = readl(ctx->regs + SHADOWCON);
@@ -244,7 +244,7 @@ static void fimd_enable_shadow_channel_path(struct fimd_context *ctx, int win,
 
 static void fimd_clear_channel(struct fimd_context *ctx)
 {
-       int win, ch_enabled = 0;
+       unsigned int win, ch_enabled = 0;
 
        DRM_DEBUG_KMS("%s\n", __FILE__);
 
@@ -946,7 +946,24 @@ static void fimd_te_handler(struct exynos_drm_crtc *crtc)
                drm_handle_vblank(ctx->drm_dev, ctx->pipe);
 }
 
-static struct exynos_drm_crtc_ops fimd_crtc_ops = {
+static void fimd_dp_clock_enable(struct exynos_drm_crtc *crtc, bool enable)
+{
+       struct fimd_context *ctx = crtc->ctx;
+       u32 val;
+
+       /*
+        * Only Exynos 5250, 5260, 5410 and 542x requires enabling DP/MIE
+        * clock. On these SoCs the bootloader may enable it but any
+        * power domain off/on will reset it to disable state.
+        */
+       if (ctx->driver_data != &exynos5_fimd_driver_data)
+               return;
+
+       val = enable ? DP_MIE_CLK_DP_ENABLE : DP_MIE_CLK_DISABLE;
+       writel(DP_MIE_CLK_DP_ENABLE, ctx->regs + DP_MIE_CLKCON);
+}
+
+static const struct exynos_drm_crtc_ops fimd_crtc_ops = {
        .dpms = fimd_dpms,
        .mode_fixup = fimd_mode_fixup,
        .commit = fimd_commit,
@@ -956,6 +973,7 @@ static struct exynos_drm_crtc_ops fimd_crtc_ops = {
        .win_commit = fimd_win_commit,
        .win_disable = fimd_win_disable,
        .te_handler = fimd_te_handler,
+       .clock_enable = fimd_dp_clock_enable,
 };
 
 static irqreturn_t fimd_irq_handler(int irq, void *dev_id)
@@ -1025,12 +1043,7 @@ static int fimd_bind(struct device *dev, struct device *master, void *data)
        if (ctx->display)
                exynos_drm_create_enc_conn(drm_dev, ctx->display);
 
-       ret = fimd_iommu_attach_devices(ctx, drm_dev);
-       if (ret)
-               return ret;
-
-       return 0;
-
+       return fimd_iommu_attach_devices(ctx, drm_dev);
 }
 
 static void fimd_unbind(struct device *dev, struct device *master,
@@ -1192,24 +1205,6 @@ static int fimd_remove(struct platform_device *pdev)
        return 0;
 }
 
-void fimd_dp_clock_enable(struct exynos_drm_crtc *crtc, bool enable)
-{
-       struct fimd_context *ctx = crtc->ctx;
-       u32 val;
-
-       /*
-        * Only Exynos 5250, 5260, 5410 and 542x requires enabling DP/MIE
-        * clock. On these SoCs the bootloader may enable it but any
-        * power domain off/on will reset it to disable state.
-        */
-       if (ctx->driver_data != &exynos5_fimd_driver_data)
-               return;
-
-       val = enable ? DP_MIE_CLK_DP_ENABLE : DP_MIE_CLK_DISABLE;
-       writel(DP_MIE_CLK_DP_ENABLE, ctx->regs + DP_MIE_CLKCON);
-}
-EXPORT_SYMBOL_GPL(fimd_dp_clock_enable);
-
 struct platform_driver fimd_driver = {
        .probe          = fimd_probe,
        .remove         = fimd_remove,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.h b/drivers/gpu/drm/exynos/exynos_drm_fimd.h
deleted file mode 100644 (file)
index b4fcaa5..0000000
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Copyright (c) 2015 Samsung Electronics Co., Ltd.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-#ifndef _EXYNOS_DRM_FIMD_H_
-#define _EXYNOS_DRM_FIMD_H_
-
-extern void fimd_dp_clock_enable(struct exynos_drm_crtc *crtc, bool enable);
-
-#endif /* _EXYNOS_DRM_FIMD_H_ */
index 13ea334..b1180fb 100644 (file)
@@ -76,7 +76,7 @@ int exynos_check_plane(struct drm_plane *plane, struct drm_framebuffer *fb)
                        return -EFAULT;
                }
 
-               exynos_plane->dma_addr[i] = buffer->dma_addr;
+               exynos_plane->dma_addr[i] = buffer->dma_addr + fb->offsets[i];
 
                DRM_DEBUG_KMS("buffer: %d, dma_addr = 0x%lx\n",
                                i, (unsigned long)exynos_plane->dma_addr[i]);
index 27e84ec..1b3479a 100644 (file)
@@ -217,7 +217,7 @@ static int vidi_ctx_initialize(struct vidi_context *ctx,
        return 0;
 }
 
-static struct exynos_drm_crtc_ops vidi_crtc_ops = {
+static const struct exynos_drm_crtc_ops vidi_crtc_ops = {
        .dpms = vidi_dpms,
        .enable_vblank = vidi_enable_vblank,
        .disable_vblank = vidi_disable_vblank,
index fbec750..8874c1f 100644 (file)
 #define MIXER_WIN_NR           3
 #define MIXER_DEFAULT_WIN      0
 
+/* The pixelformats that are natively supported by the mixer. */
+#define MXR_FORMAT_RGB565      4
+#define MXR_FORMAT_ARGB1555    5
+#define MXR_FORMAT_ARGB4444    6
+#define MXR_FORMAT_ARGB8888    7
+
 struct mixer_resources {
        int                     irq;
        void __iomem            *mixer_regs;
@@ -327,7 +333,8 @@ static void mixer_cfg_rgb_fmt(struct mixer_context *ctx, unsigned int height)
        mixer_reg_writemask(res, MXR_CFG, val, MXR_CFG_RGB_FMT_MASK);
 }
 
-static void mixer_cfg_layer(struct mixer_context *ctx, int win, bool enable)
+static void mixer_cfg_layer(struct mixer_context *ctx, unsigned int win,
+                               bool enable)
 {
        struct mixer_resources *res = &ctx->mixer_res;
        u32 val = enable ? ~0 : 0;
@@ -359,8 +366,6 @@ static void mixer_run(struct mixer_context *ctx)
        struct mixer_resources *res = &ctx->mixer_res;
 
        mixer_reg_writemask(res, MXR_STATUS, ~0, MXR_STATUS_REG_RUN);
-
-       mixer_regs_dump(ctx);
 }
 
 static void mixer_stop(struct mixer_context *ctx)
@@ -373,16 +378,13 @@ static void mixer_stop(struct mixer_context *ctx)
        while (!(mixer_reg_read(res, MXR_STATUS) & MXR_STATUS_REG_IDLE) &&
                        --timeout)
                usleep_range(10000, 12000);
-
-       mixer_regs_dump(ctx);
 }
 
-static void vp_video_buffer(struct mixer_context *ctx, int win)
+static void vp_video_buffer(struct mixer_context *ctx, unsigned int win)
 {
        struct mixer_resources *res = &ctx->mixer_res;
        unsigned long flags;
        struct exynos_drm_plane *plane;
-       unsigned int buf_num = 1;
        dma_addr_t luma_addr[2], chroma_addr[2];
        bool tiled_mode = false;
        bool crcb_mode = false;
@@ -393,27 +395,18 @@ static void vp_video_buffer(struct mixer_context *ctx, int win)
        switch (plane->pixel_format) {
        case DRM_FORMAT_NV12:
                crcb_mode = false;
-               buf_num = 2;
                break;
-       /* TODO: single buffer format NV12, NV21 */
+       case DRM_FORMAT_NV21:
+               crcb_mode = true;
+               break;
        default:
-               /* ignore pixel format at disable time */
-               if (!plane->dma_addr[0])
-                       break;
-
                DRM_ERROR("pixel format for vp is wrong [%d].\n",
                                plane->pixel_format);
                return;
        }
 
-       if (buf_num == 2) {
-               luma_addr[0] = plane->dma_addr[0];
-               chroma_addr[0] = plane->dma_addr[1];
-       } else {
-               luma_addr[0] = plane->dma_addr[0];
-               chroma_addr[0] = plane->dma_addr[0]
-                       + (plane->pitch * plane->fb_height);
-       }
+       luma_addr[0] = plane->dma_addr[0];
+       chroma_addr[0] = plane->dma_addr[1];
 
        if (plane->scan_flag & DRM_MODE_FLAG_INTERLACE) {
                ctx->interlace = true;
@@ -484,6 +477,7 @@ static void vp_video_buffer(struct mixer_context *ctx, int win)
        mixer_vsync_set_update(ctx, true);
        spin_unlock_irqrestore(&res->reg_slock, flags);
 
+       mixer_regs_dump(ctx);
        vp_regs_dump(ctx);
 }
 
@@ -518,7 +512,7 @@ fail:
        return -ENOTSUPP;
 }
 
-static void mixer_graph_buffer(struct mixer_context *ctx, int win)
+static void mixer_graph_buffer(struct mixer_context *ctx, unsigned int win)
 {
        struct mixer_resources *res = &ctx->mixer_res;
        unsigned long flags;
@@ -531,20 +525,27 @@ static void mixer_graph_buffer(struct mixer_context *ctx, int win)
 
        plane = &ctx->planes[win];
 
-       #define RGB565 4
-       #define ARGB1555 5
-       #define ARGB4444 6
-       #define ARGB8888 7
+       switch (plane->pixel_format) {
+       case DRM_FORMAT_XRGB4444:
+               fmt = MXR_FORMAT_ARGB4444;
+               break;
 
-       switch (plane->bpp) {
-       case 16:
-               fmt = ARGB4444;
+       case DRM_FORMAT_XRGB1555:
+               fmt = MXR_FORMAT_ARGB1555;
                break;
-       case 32:
-               fmt = ARGB8888;
+
+       case DRM_FORMAT_RGB565:
+               fmt = MXR_FORMAT_RGB565;
+               break;
+
+       case DRM_FORMAT_XRGB8888:
+       case DRM_FORMAT_ARGB8888:
+               fmt = MXR_FORMAT_ARGB8888;
                break;
+
        default:
-               fmt = ARGB8888;
+               DRM_DEBUG_KMS("pixelformat unsupported by mixer\n");
+               return;
        }
 
        /* check if mixer supports requested scaling setup */
@@ -617,6 +618,8 @@ static void mixer_graph_buffer(struct mixer_context *ctx, int win)
 
        mixer_vsync_set_update(ctx, true);
        spin_unlock_irqrestore(&res->reg_slock, flags);
+
+       mixer_regs_dump(ctx);
 }
 
 static void vp_win_reset(struct mixer_context *ctx)
@@ -1070,6 +1073,7 @@ static void mixer_poweroff(struct mixer_context *ctx)
        mutex_unlock(&ctx->mixer_mutex);
 
        mixer_stop(ctx);
+       mixer_regs_dump(ctx);
        mixer_window_suspend(ctx);
 
        ctx->int_en = mixer_reg_read(res, MXR_INT_EN);
@@ -1126,7 +1130,7 @@ int mixer_check_mode(struct drm_display_mode *mode)
        return -EINVAL;
 }
 
-static struct exynos_drm_crtc_ops mixer_crtc_ops = {
+static const struct exynos_drm_crtc_ops mixer_crtc_ops = {
        .dpms                   = mixer_dpms,
        .enable_vblank          = mixer_enable_vblank,
        .disable_vblank         = mixer_disable_vblank,
@@ -1156,7 +1160,7 @@ static struct mixer_drv_data exynos4210_mxr_drv_data = {
        .has_sclk = 1,
 };
 
-static struct platform_device_id mixer_driver_types[] = {
+static const struct platform_device_id mixer_driver_types[] = {
        {
                .name           = "s5p-mixer",
                .driver_data    = (unsigned long)&exynos4210_mxr_drv_data,
index 007c7d7..dc55c51 100644 (file)
@@ -1667,12 +1667,15 @@ static int i915_sr_status(struct seq_file *m, void *unused)
 
        if (HAS_PCH_SPLIT(dev))
                sr_enabled = I915_READ(WM1_LP_ILK) & WM1_LP_SR_EN;
-       else if (IS_CRESTLINE(dev) || IS_I945G(dev) || IS_I945GM(dev))
+       else if (IS_CRESTLINE(dev) || IS_G4X(dev) ||
+                IS_I945G(dev) || IS_I945GM(dev))
                sr_enabled = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN;
        else if (IS_I915GM(dev))
                sr_enabled = I915_READ(INSTPM) & INSTPM_SELF_EN;
        else if (IS_PINEVIEW(dev))
                sr_enabled = I915_READ(DSPFW3) & PINEVIEW_SELF_REFRESH_EN;
+       else if (IS_VALLEYVIEW(dev))
+               sr_enabled = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN;
 
        intel_runtime_pm_put(dev_priv);
 
index c302ffb..a19d2c7 100644 (file)
@@ -699,6 +699,16 @@ static int i915_drm_resume(struct drm_device *dev)
        intel_init_pch_refclk(dev);
        drm_mode_config_reset(dev);
 
+       /*
+        * Interrupts have to be enabled before any batches are run. If not the
+        * GPU will hang. i915_gem_init_hw() will initiate batches to
+        * update/restore the context.
+        *
+        * Modeset enabling in intel_modeset_init_hw() also needs working
+        * interrupts.
+        */
+       intel_runtime_pm_enable_interrupts(dev_priv);
+
        mutex_lock(&dev->struct_mutex);
        if (i915_gem_init_hw(dev)) {
                DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n");
@@ -706,9 +716,6 @@ static int i915_drm_resume(struct drm_device *dev)
        }
        mutex_unlock(&dev->struct_mutex);
 
-       /* We need working interrupts for modeset enabling ... */
-       intel_runtime_pm_enable_interrupts(dev_priv);
-
        intel_modeset_init_hw(dev);
 
        spin_lock_irq(&dev_priv->irq_lock);
index 53394f9..2d0995e 100644 (file)
@@ -3003,8 +3003,8 @@ int i915_vma_unbind(struct i915_vma *vma)
                } else if (vma->ggtt_view.pages) {
                        sg_free_table(vma->ggtt_view.pages);
                        kfree(vma->ggtt_view.pages);
-                       vma->ggtt_view.pages = NULL;
                }
+               vma->ggtt_view.pages = NULL;
        }
 
        drm_mm_remove_node(&vma->node);
index a3190e7..cc552a4 100644 (file)
@@ -32,6 +32,7 @@
 #include "i915_trace.h"
 #include "intel_drv.h"
 #include <linux/dma_remapping.h>
+#include <linux/uaccess.h>
 
 #define  __EXEC_OBJECT_HAS_PIN (1<<31)
 #define  __EXEC_OBJECT_HAS_FENCE (1<<30)
@@ -465,7 +466,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
        }
 
        /* We can't wait for rendering with pagefaults disabled */
-       if (obj->active && in_atomic())
+       if (obj->active && pagefault_disabled())
                return -EFAULT;
 
        if (use_cpu_reloc(obj))
index d547d9c..d0f3cbc 100644 (file)
@@ -13635,9 +13635,6 @@ static const struct intel_dmi_quirk intel_dmi_quirks[] = {
 };
 
 static struct intel_quirk intel_quirks[] = {
-       /* HP Mini needs pipe A force quirk (LP: #322104) */
-       { 0x27ae, 0x103c, 0x361a, quirk_pipea_force },
-
        /* Toshiba Protege R-205, S-209 needs pipe A force quirk */
        { 0x2592, 0x1179, 0x0001, quirk_pipea_force },
 
index d023710..d714a4b 100644 (file)
@@ -880,10 +880,8 @@ intel_dp_aux_ch(struct intel_dp *intel_dp,
                                      DP_AUX_CH_CTL_RECEIVE_ERROR))
                                continue;
                        if (status & DP_AUX_CH_CTL_DONE)
-                               break;
+                               goto done;
                }
-               if (status & DP_AUX_CH_CTL_DONE)
-                       break;
        }
 
        if ((status & DP_AUX_CH_CTL_DONE) == 0) {
@@ -892,6 +890,7 @@ intel_dp_aux_ch(struct intel_dp *intel_dp,
                goto out;
        }
 
+done:
        /* Check for timeout or receive error.
         * Timeouts occur when the sink is not connected
         */
@@ -1348,7 +1347,7 @@ intel_dp_compute_config(struct intel_encoder *encoder,
 
        pipe_config->has_dp_encoder = true;
        pipe_config->has_drrs = false;
-       pipe_config->has_audio = intel_dp->has_audio;
+       pipe_config->has_audio = intel_dp->has_audio && port != PORT_A;
 
        if (is_edp(intel_dp) && intel_connector->panel.fixed_mode) {
                intel_fixed_panel_mode(intel_connector->panel.fixed_mode,
@@ -2211,8 +2210,8 @@ static void intel_dp_get_config(struct intel_encoder *encoder,
        int dotclock;
 
        tmp = I915_READ(intel_dp->output_reg);
-       if (tmp & DP_AUDIO_OUTPUT_ENABLE)
-               pipe_config->has_audio = true;
+
+       pipe_config->has_audio = tmp & DP_AUDIO_OUTPUT_ENABLE && port != PORT_A;
 
        if ((port == PORT_A) || !HAS_PCH_CPT(dev)) {
                if (tmp & DP_SYNC_HS_HIGH)
@@ -3812,7 +3811,8 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp)
                        if (val == 0)
                                break;
 
-                       intel_dp->sink_rates[i] = val * 200;
+                       /* Value read is in kHz while drm clock is saved in deca-kHz */
+                       intel_dp->sink_rates[i] = (val * 200) / 10;
                }
                intel_dp->num_sink_rates = i;
        }
index 56e437e..ae62800 100644 (file)
@@ -435,7 +435,7 @@ gmbus_xfer(struct i2c_adapter *adapter,
                                               struct intel_gmbus,
                                               adapter);
        struct drm_i915_private *dev_priv = bus->dev_priv;
-       int i, reg_offset;
+       int i = 0, inc, try = 0, reg_offset;
        int ret = 0;
 
        intel_aux_display_runtime_get(dev_priv);
@@ -448,12 +448,14 @@ gmbus_xfer(struct i2c_adapter *adapter,
 
        reg_offset = dev_priv->gpio_mmio_base;
 
+retry:
        I915_WRITE(GMBUS0 + reg_offset, bus->reg0);
 
-       for (i = 0; i < num; i++) {
+       for (; i < num; i += inc) {
+               inc = 1;
                if (gmbus_is_index_read(msgs, i, num)) {
                        ret = gmbus_xfer_index_read(dev_priv, &msgs[i]);
-                       i += 1;  /* set i to the index of the read xfer */
+                       inc = 2; /* an index read is two msgs */
                } else if (msgs[i].flags & I2C_M_RD) {
                        ret = gmbus_xfer_read(dev_priv, &msgs[i], 0);
                } else {
@@ -525,6 +527,18 @@ clear_err:
                         adapter->name, msgs[i].addr,
                         (msgs[i].flags & I2C_M_RD) ? 'r' : 'w', msgs[i].len);
 
+       /*
+        * Passive adapters sometimes NAK the first probe. Retry the first
+        * message once on -ENXIO for GMBUS transfers; the bit banging algorithm
+        * has retries internally. See also the retry loop in
+        * drm_do_probe_ddc_edid, which bails out on the first -ENXIO.
+        */
+       if (ret == -ENXIO && i == 0 && try++ == 0) {
+               DRM_DEBUG_KMS("GMBUS [%s] NAK on first message, retry\n",
+                             adapter->name);
+               goto retry;
+       }
+
        goto out;
 
 timeout:
index 09df74b..424e621 100644 (file)
@@ -1134,6 +1134,12 @@ static int gen8_init_common_ring(struct intel_engine_cs *ring)
        I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | ring->irq_keep_mask));
        I915_WRITE(RING_HWSTAM(ring->mmio_base), 0xffffffff);
 
+       if (ring->status_page.obj) {
+               I915_WRITE(RING_HWS_PGA(ring->mmio_base),
+                          (u32)ring->status_page.gfx_addr);
+               POSTING_READ(RING_HWS_PGA(ring->mmio_base));
+       }
+
        I915_WRITE(RING_MODE_GEN7(ring),
                   _MASKED_BIT_DISABLE(GFX_REPLAY_MODE) |
                   _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE));
index 5abda1d..fbcc7df 100644 (file)
@@ -813,12 +813,28 @@ static int intel_dual_link_lvds_callback(const struct dmi_system_id *id)
 static const struct dmi_system_id intel_dual_link_lvds[] = {
        {
                .callback = intel_dual_link_lvds_callback,
-               .ident = "Apple MacBook Pro (Core i5/i7 Series)",
+               .ident = "Apple MacBook Pro 15\" (2010)",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro6,2"),
+               },
+       },
+       {
+               .callback = intel_dual_link_lvds_callback,
+               .ident = "Apple MacBook Pro 15\" (2011)",
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
                        DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro8,2"),
                },
        },
+       {
+               .callback = intel_dual_link_lvds_callback,
+               .ident = "Apple MacBook Pro 15\" (2012)",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro9,1"),
+               },
+       },
        { }     /* terminating entry */
 };
 
@@ -848,6 +864,11 @@ static bool compute_is_dual_link_lvds(struct intel_lvds_encoder *lvds_encoder)
        if (i915.lvds_channel_mode > 0)
                return i915.lvds_channel_mode == 2;
 
+       /* single channel LVDS is limited to 112 MHz */
+       if (lvds_encoder->attached_connector->base.panel.fixed_mode->clock
+           > 112999)
+               return true;
+
        if (dmi_check_system(intel_dual_link_lvds))
                return true;
 
@@ -1111,6 +1132,8 @@ void intel_lvds_init(struct drm_device *dev)
 out:
        mutex_unlock(&dev->mode_config.mutex);
 
+       intel_panel_init(&intel_connector->panel, fixed_mode, downclock_mode);
+
        lvds_encoder->is_dual_link = compute_is_dual_link_lvds(lvds_encoder);
        DRM_DEBUG_KMS("detected %s-link lvds configuration\n",
                      lvds_encoder->is_dual_link ? "dual" : "single");
@@ -1125,7 +1148,6 @@ out:
        }
        drm_connector_register(connector);
 
-       intel_panel_init(&intel_connector->panel, fixed_mode, downclock_mode);
        intel_panel_setup_backlight(connector, INVALID_PIPE);
 
        return;
index fa4ccb3..555b896 100644 (file)
@@ -2045,22 +2045,20 @@ static void ilk_compute_wm_parameters(struct drm_crtc *crtc,
        p->pipe_htotal = intel_crtc->config->base.adjusted_mode.crtc_htotal;
        p->pixel_rate = ilk_pipe_pixel_rate(dev, crtc);
 
-       if (crtc->primary->state->fb) {
-               p->pri.enabled = true;
+       if (crtc->primary->state->fb)
                p->pri.bytes_per_pixel =
                        crtc->primary->state->fb->bits_per_pixel / 8;
-       } else {
-               p->pri.enabled = false;
-               p->pri.bytes_per_pixel = 0;
-       }
+       else
+               p->pri.bytes_per_pixel = 4;
+
+       p->cur.bytes_per_pixel = 4;
+       /*
+        * TODO: for now, assume primary and cursor planes are always enabled.
+        * Setting them to false makes the screen flicker.
+        */
+       p->pri.enabled = true;
+       p->cur.enabled = true;
 
-       if (crtc->cursor->state->fb) {
-               p->cur.enabled = true;
-               p->cur.bytes_per_pixel = 4;
-       } else {
-               p->cur.enabled = false;
-               p->cur.bytes_per_pixel = 0;
-       }
        p->pri.horiz_pixels = intel_crtc->config->pipe_src_w;
        p->cur.horiz_pixels = intel_crtc->base.cursor->state->crtc_w;
 
index 441e250..005b5e0 100644 (file)
@@ -901,13 +901,6 @@ static int chv_init_workarounds(struct intel_engine_cs *ring)
                            GEN6_WIZ_HASHING_MASK,
                            GEN6_WIZ_HASHING_16x4);
 
-       if (INTEL_REVID(dev) == SKL_REVID_C0 ||
-           INTEL_REVID(dev) == SKL_REVID_D0)
-               /* WaBarrierPerformanceFixDisable:skl */
-               WA_SET_BIT_MASKED(HDC_CHICKEN0,
-                                 HDC_FENCE_DEST_SLM_DISABLE |
-                                 HDC_BARRIER_PERFORMANCE_DISABLE);
-
        return 0;
 }
 
@@ -1024,6 +1017,13 @@ static int skl_init_workarounds(struct intel_engine_cs *ring)
                WA_SET_BIT_MASKED(HIZ_CHICKEN,
                                  BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE);
 
+       if (INTEL_REVID(dev) == SKL_REVID_C0 ||
+           INTEL_REVID(dev) == SKL_REVID_D0)
+               /* WaBarrierPerformanceFixDisable:skl */
+               WA_SET_BIT_MASKED(HDC_CHICKEN0,
+                                 HDC_FENCE_DEST_SLM_DISABLE |
+                                 HDC_BARRIER_PERFORMANCE_DISABLE);
+
        return skl_tune_iz_hashing(ring);
 }
 
index e87d2f4..987b81f 100644 (file)
@@ -2550,7 +2550,7 @@ intel_sdvo_analog_init(struct intel_sdvo *intel_sdvo, int device)
 
        DRM_DEBUG_KMS("initialising analog device %d\n", device);
 
-       intel_sdvo_connector = kzalloc(sizeof(*intel_sdvo_connector), GFP_KERNEL);
+       intel_sdvo_connector = intel_sdvo_connector_alloc();
        if (!intel_sdvo_connector)
                return false;
 
index 6e84df9..ad4b901 100644 (file)
@@ -1526,6 +1526,11 @@ static int mga_vga_mode_valid(struct drm_connector *connector,
                return MODE_BANDWIDTH;
        }
 
+       if ((mode->hdisplay % 8) != 0 || (mode->hsync_start % 8) != 0 ||
+           (mode->hsync_end % 8) != 0 || (mode->htotal % 8) != 0) {
+               return MODE_H_ILLEGAL;
+       }
+
        if (mode->crtc_hdisplay > 2048 || mode->crtc_hsync_start > 4096 ||
            mode->crtc_hsync_end > 4096 || mode->crtc_htotal > 4096 ||
            mode->crtc_vdisplay > 2048 || mode->crtc_vsync_start > 4096 ||
index 94a5bee..bbdcab0 100644 (file)
@@ -384,7 +384,7 @@ void adreno_gpu_cleanup(struct adreno_gpu *gpu)
        if (gpu->memptrs_bo) {
                if (gpu->memptrs_iova)
                        msm_gem_put_iova(gpu->memptrs_bo, gpu->base.id);
-               drm_gem_object_unreference(gpu->memptrs_bo);
+               drm_gem_object_unreference_unlocked(gpu->memptrs_bo);
        }
        release_firmware(gpu->pm4);
        release_firmware(gpu->pfp);
index 28d1f95..ad50b80 100644 (file)
@@ -177,6 +177,11 @@ int msm_dsi_modeset_init(struct msm_dsi *msm_dsi, struct drm_device *dev,
                goto fail;
        }
 
+       for (i = 0; i < MSM_DSI_ENCODER_NUM; i++) {
+               encoders[i]->bridge = msm_dsi->bridge;
+               msm_dsi->encoders[i] = encoders[i];
+       }
+
        msm_dsi->connector = msm_dsi_manager_connector_init(msm_dsi->id);
        if (IS_ERR(msm_dsi->connector)) {
                ret = PTR_ERR(msm_dsi->connector);
@@ -185,11 +190,6 @@ int msm_dsi_modeset_init(struct msm_dsi *msm_dsi, struct drm_device *dev,
                goto fail;
        }
 
-       for (i = 0; i < MSM_DSI_ENCODER_NUM; i++) {
-               encoders[i]->bridge = msm_dsi->bridge;
-               msm_dsi->encoders[i] = encoders[i];
-       }
-
        priv->bridges[priv->num_bridges++]       = msm_dsi->bridge;
        priv->connectors[priv->num_connectors++] = msm_dsi->connector;
 
index 956b224..649d20d 100644 (file)
@@ -1023,7 +1023,7 @@ static int dsi_short_read1_resp(u8 *buf, const struct mipi_dsi_msg *msg)
                *data = buf[1]; /* strip out dcs type */
                return 1;
        } else {
-               pr_err("%s: read data does not match with rx_buf len %d\n",
+               pr_err("%s: read data does not match with rx_buf len %zu\n",
                        __func__, msg->rx_len);
                return -EINVAL;
        }
@@ -1040,7 +1040,7 @@ static int dsi_short_read2_resp(u8 *buf, const struct mipi_dsi_msg *msg)
                data[1] = buf[2];
                return 2;
        } else {
-               pr_err("%s: read data does not match with rx_buf len %d\n",
+               pr_err("%s: read data does not match with rx_buf len %zu\n",
                        __func__, msg->rx_len);
                return -EINVAL;
        }
@@ -1093,7 +1093,6 @@ static int dsi_cmd_dma_rx(struct msm_dsi_host *msm_host,
 {
        u32 *lp, *temp, data;
        int i, j = 0, cnt;
-       bool ack_error = false;
        u32 read_cnt;
        u8 reg[16];
        int repeated_bytes = 0;
@@ -1105,15 +1104,10 @@ static int dsi_cmd_dma_rx(struct msm_dsi_host *msm_host,
        if (cnt > 4)
                cnt = 4; /* 4 x 32 bits registers only */
 
-       /* Calculate real read data count */
-       read_cnt = dsi_read(msm_host, 0x1d4) >> 16;
-
-       ack_error = (rx_byte == 4) ?
-               (read_cnt == 8) : /* short pkt + 4-byte error pkt */
-               (read_cnt == (pkt_size + 6 + 4)); /* long pkt+4-byte error pkt*/
-
-       if (ack_error)
-               read_cnt -= 4; /* Remove 4 byte error pkt */
+       if (rx_byte == 4)
+               read_cnt = 4;
+       else
+               read_cnt = pkt_size + 6;
 
        /*
         * In case of multiple reads from the panel, after the first read, there
@@ -1215,7 +1209,7 @@ static void dsi_err_worker(struct work_struct *work)
                container_of(work, struct msm_dsi_host, err_work);
        u32 status = msm_host->err_work_state;
 
-       pr_err("%s: status=%x\n", __func__, status);
+       pr_err_ratelimited("%s: status=%x\n", __func__, status);
        if (status & DSI_ERR_STATE_MDP_FIFO_UNDERFLOW)
                dsi_sw_reset_restore(msm_host);
 
@@ -1797,6 +1791,7 @@ int msm_dsi_host_cmd_rx(struct mipi_dsi_host *host,
        case MIPI_DSI_RX_ACKNOWLEDGE_AND_ERROR_REPORT:
                pr_err("%s: rx ACK_ERR_PACLAGE\n", __func__);
                ret = 0;
+               break;
        case MIPI_DSI_RX_GENERIC_SHORT_READ_RESPONSE_1BYTE:
        case MIPI_DSI_RX_DCS_SHORT_READ_RESPONSE_1BYTE:
                ret = dsi_short_read1_resp(buf, msg);
index ee3ebca..0a40f3c 100644 (file)
@@ -462,7 +462,7 @@ struct drm_connector *msm_dsi_manager_connector_init(u8 id)
        struct msm_dsi *msm_dsi = dsi_mgr_get_dsi(id);
        struct drm_connector *connector = NULL;
        struct dsi_connector *dsi_connector;
-       int ret;
+       int ret, i;
 
        dsi_connector = devm_kzalloc(msm_dsi->dev->dev,
                                sizeof(*dsi_connector), GFP_KERNEL);
@@ -495,6 +495,10 @@ struct drm_connector *msm_dsi_manager_connector_init(u8 id)
        if (ret)
                goto fail;
 
+       for (i = 0; i < MSM_DSI_ENCODER_NUM; i++)
+               drm_mode_connector_attach_encoder(connector,
+                                               msm_dsi->encoders[i]);
+
        return connector;
 
 fail:
index 5f5a84f..208f9d4 100644 (file)
@@ -132,7 +132,7 @@ ssize_t edp_aux_transfer(struct drm_dp_aux *drm_aux, struct drm_dp_aux_msg *msg)
        /* msg sanity check */
        if ((native && (msg->size > AUX_CMD_NATIVE_MAX)) ||
                (msg->size > AUX_CMD_I2C_MAX)) {
-               pr_err("%s: invalid msg: size(%d), request(%x)\n",
+               pr_err("%s: invalid msg: size(%zu), request(%x)\n",
                        __func__, msg->size, msg->request);
                return -EINVAL;
        }
@@ -155,7 +155,7 @@ ssize_t edp_aux_transfer(struct drm_dp_aux *drm_aux, struct drm_dp_aux_msg *msg)
                 */
                edp_write(aux->base + REG_EDP_AUX_TRANS_CTRL, 0);
                msm_edp_aux_ctrl(aux, 1);
-               pr_err("%s: aux timeout, %d\n", __func__, ret);
+               pr_err("%s: aux timeout, %zd\n", __func__, ret);
                goto unlock_exit;
        }
        DBG("completion");
index d8812e8..b4d1b46 100644 (file)
@@ -151,6 +151,8 @@ struct drm_connector *msm_edp_connector_init(struct msm_edp *edp)
        if (ret)
                goto fail;
 
+       drm_mode_connector_attach_encoder(connector, edp->encoder);
+
        return connector;
 
 fail:
index 0ec5abd..29e52d7 100644 (file)
@@ -1149,12 +1149,13 @@ int msm_edp_ctrl_init(struct msm_edp *edp)
        ctrl->aux = msm_edp_aux_init(dev, ctrl->base, &ctrl->drm_aux);
        if (!ctrl->aux || !ctrl->drm_aux) {
                pr_err("%s:failed to init aux\n", __func__);
-               return ret;
+               return -ENOMEM;
        }
 
        ctrl->phy = msm_edp_phy_init(dev, ctrl->base);
        if (!ctrl->phy) {
                pr_err("%s:failed to init phy\n", __func__);
+               ret = -ENOMEM;
                goto err_destory_aux;
        }
 
index e001e6b..8b9a793 100644 (file)
@@ -72,14 +72,13 @@ const struct mdp5_cfg_hw msm8x74_config = {
                .base = { 0x12d00, 0x12e00, 0x12f00 },
        },
        .intf = {
-               .count = 4,
                .base = { 0x12500, 0x12700, 0x12900, 0x12b00 },
-       },
-       .intfs = {
-               [0] = INTF_eDP,
-               [1] = INTF_DSI,
-               [2] = INTF_DSI,
-               [3] = INTF_HDMI,
+               .connect = {
+                       [0] = INTF_eDP,
+                       [1] = INTF_DSI,
+                       [2] = INTF_DSI,
+                       [3] = INTF_HDMI,
+               },
        },
        .max_clk = 200000000,
 };
@@ -142,14 +141,13 @@ const struct mdp5_cfg_hw apq8084_config = {
                .base = { 0x12f00, 0x13000, 0x13100, 0x13200 },
        },
        .intf = {
-               .count = 5,
                .base = { 0x12500, 0x12700, 0x12900, 0x12b00, 0x12d00 },
-       },
-       .intfs = {
-               [0] = INTF_eDP,
-               [1] = INTF_DSI,
-               [2] = INTF_DSI,
-               [3] = INTF_HDMI,
+               .connect = {
+                       [0] = INTF_eDP,
+                       [1] = INTF_DSI,
+                       [2] = INTF_DSI,
+                       [3] = INTF_HDMI,
+               },
        },
        .max_clk = 320000000,
 };
@@ -196,10 +194,12 @@ const struct mdp5_cfg_hw msm8x16_config = {
 
        },
        .intf = {
-               .count = 1, /* INTF_1 */
-               .base = { 0x6B800 },
+               .base = { 0x00000, 0x6b800 },
+               .connect = {
+                       [0] = INTF_DISABLED,
+                       [1] = INTF_DSI,
+               },
        },
-       /* TODO enable .intfs[] with [1] = INTF_DSI, once DSI is implemented */
        .max_clk = 320000000,
 };
 
index 3a551b0..69349ab 100644 (file)
@@ -59,6 +59,11 @@ struct mdp5_smp_block {
 
 #define MDP5_INTF_NUM_MAX      5
 
+struct mdp5_intf_block {
+       uint32_t base[MAX_BASES];
+       u32 connect[MDP5_INTF_NUM_MAX]; /* array of enum mdp5_intf_type */
+};
+
 struct mdp5_cfg_hw {
        char  *name;
 
@@ -72,9 +77,7 @@ struct mdp5_cfg_hw {
        struct mdp5_sub_block dspp;
        struct mdp5_sub_block ad;
        struct mdp5_sub_block pp;
-       struct mdp5_sub_block intf;
-
-       u32 intfs[MDP5_INTF_NUM_MAX]; /* array of enum mdp5_intf_type */
+       struct mdp5_intf_block intf;
 
        uint32_t max_clk;
 };
index dfa8beb..bbacf9d 100644 (file)
@@ -206,8 +206,8 @@ static struct drm_encoder *construct_encoder(struct mdp5_kms *mdp5_kms,
 
 static int get_dsi_id_from_intf(const struct mdp5_cfg_hw *hw_cfg, int intf_num)
 {
-       const int intf_cnt = hw_cfg->intf.count;
-       const u32 *intfs = hw_cfg->intfs;
+       const enum mdp5_intf_type *intfs = hw_cfg->intf.connect;
+       const int intf_cnt = ARRAY_SIZE(hw_cfg->intf.connect);
        int id = 0, i;
 
        for (i = 0; i < intf_cnt; i++) {
@@ -228,7 +228,7 @@ static int modeset_init_intf(struct mdp5_kms *mdp5_kms, int intf_num)
        struct msm_drm_private *priv = dev->dev_private;
        const struct mdp5_cfg_hw *hw_cfg =
                                        mdp5_cfg_get_hw_config(mdp5_kms->cfg);
-       enum mdp5_intf_type intf_type = hw_cfg->intfs[intf_num];
+       enum mdp5_intf_type intf_type = hw_cfg->intf.connect[intf_num];
        struct drm_encoder *encoder;
        int ret = 0;
 
@@ -365,7 +365,7 @@ static int modeset_init(struct mdp5_kms *mdp5_kms)
        /* Construct encoders and modeset initialize connector devices
         * for each external display interface.
         */
-       for (i = 0; i < ARRAY_SIZE(hw_cfg->intfs); i++) {
+       for (i = 0; i < ARRAY_SIZE(hw_cfg->intf.connect); i++) {
                ret = modeset_init_intf(mdp5_kms, i);
                if (ret)
                        goto fail;
@@ -514,8 +514,8 @@ struct msm_kms *mdp5_kms_init(struct drm_device *dev)
         */
        mdp5_enable(mdp5_kms);
        for (i = 0; i < MDP5_INTF_NUM_MAX; i++) {
-               if (!config->hw->intf.base[i] ||
-                               mdp5_cfg_intf_is_virtual(config->hw->intfs[i]))
+               if (mdp5_cfg_intf_is_virtual(config->hw->intf.connect[i]) ||
+                               !config->hw->intf.base[i])
                        continue;
                mdp5_write(mdp5_kms, REG_MDP5_INTF_TIMING_ENGINE_EN(i), 0);
        }
index 18a3d20..57b8f56 100644 (file)
@@ -273,7 +273,7 @@ static void set_scanout_locked(struct drm_plane *plane,
        mdp5_write(mdp5_kms, REG_MDP5_PIPE_SRC2_ADDR(pipe),
                        msm_framebuffer_iova(fb, mdp5_kms->id, 2));
        mdp5_write(mdp5_kms, REG_MDP5_PIPE_SRC3_ADDR(pipe),
-                       msm_framebuffer_iova(fb, mdp5_kms->id, 4));
+                       msm_framebuffer_iova(fb, mdp5_kms->id, 3));
 
        plane->fb = fb;
 }
index 47f4dd4..c80a6be 100644 (file)
 
 static void msm_fb_output_poll_changed(struct drm_device *dev)
 {
+#ifdef CONFIG_DRM_MSM_FBDEV
        struct msm_drm_private *priv = dev->dev_private;
        if (priv->fbdev)
                drm_fb_helper_hotplug_event(priv->fbdev);
+#endif
 }
 
 static const struct drm_mode_config_funcs mode_config_funcs = {
@@ -94,7 +96,7 @@ void __iomem *msm_ioremap(struct platform_device *pdev, const char *name,
        }
 
        if (reglog)
-               printk(KERN_DEBUG "IO:region %s %08x %08lx\n", dbgname, (u32)ptr, size);
+               printk(KERN_DEBUG "IO:region %s %p %08lx\n", dbgname, ptr, size);
 
        return ptr;
 }
@@ -102,7 +104,7 @@ void __iomem *msm_ioremap(struct platform_device *pdev, const char *name,
 void msm_writel(u32 data, void __iomem *addr)
 {
        if (reglog)
-               printk(KERN_DEBUG "IO:W %08x %08x\n", (u32)addr, data);
+               printk(KERN_DEBUG "IO:W %p %08x\n", addr, data);
        writel(data, addr);
 }
 
@@ -110,7 +112,7 @@ u32 msm_readl(const void __iomem *addr)
 {
        u32 val = readl(addr);
        if (reglog)
-               printk(KERN_ERR "IO:R %08x %08x\n", (u32)addr, val);
+               printk(KERN_ERR "IO:R %p %08x\n", addr, val);
        return val;
 }
 
@@ -143,8 +145,8 @@ static int msm_unload(struct drm_device *dev)
        if (gpu) {
                mutex_lock(&dev->struct_mutex);
                gpu->funcs->pm_suspend(gpu);
-               gpu->funcs->destroy(gpu);
                mutex_unlock(&dev->struct_mutex);
+               gpu->funcs->destroy(gpu);
        }
 
        if (priv->vram.paddr) {
@@ -177,7 +179,7 @@ static int get_mdp_ver(struct platform_device *pdev)
        const struct of_device_id *match;
        match = of_match_node(match_types, dev->of_node);
        if (match)
-               return (int)match->data;
+               return (int)(unsigned long)match->data;
 #endif
        return 4;
 }
@@ -216,7 +218,7 @@ static int msm_init_vram(struct drm_device *dev)
                if (ret)
                        return ret;
                size = r.end - r.start;
-               DRM_INFO("using VRAM carveout: %lx@%08x\n", size, r.start);
+               DRM_INFO("using VRAM carveout: %lx@%pa\n", size, &r.start);
        } else
 #endif
 
@@ -283,10 +285,6 @@ static int msm_load(struct drm_device *dev, unsigned long flags)
 
        drm_mode_config_init(dev);
 
-       ret = msm_init_vram(dev);
-       if (ret)
-               goto fail;
-
        platform_set_drvdata(pdev, dev);
 
        /* Bind all our sub-components: */
@@ -294,6 +292,10 @@ static int msm_load(struct drm_device *dev, unsigned long flags)
        if (ret)
                return ret;
 
+       ret = msm_init_vram(dev);
+       if (ret)
+               goto fail;
+
        switch (get_mdp_ver(pdev)) {
        case 4:
                kms = mdp4_kms_init(dev);
@@ -419,9 +421,11 @@ static void msm_preclose(struct drm_device *dev, struct drm_file *file)
 
 static void msm_lastclose(struct drm_device *dev)
 {
+#ifdef CONFIG_DRM_MSM_FBDEV
        struct msm_drm_private *priv = dev->dev_private;
        if (priv->fbdev)
                drm_fb_helper_restore_fbdev_mode_unlocked(priv->fbdev);
+#endif
 }
 
 static irqreturn_t msm_irq(int irq, void *arg)
index 6b573e6..1217132 100644 (file)
@@ -172,8 +172,8 @@ struct drm_framebuffer *msm_framebuffer_init(struct drm_device *dev,
 {
        struct msm_drm_private *priv = dev->dev_private;
        struct msm_kms *kms = priv->kms;
-       struct msm_framebuffer *msm_fb;
-       struct drm_framebuffer *fb = NULL;
+       struct msm_framebuffer *msm_fb = NULL;
+       struct drm_framebuffer *fb;
        const struct msm_format *format;
        int ret, i, n;
        unsigned int hsub, vsub;
@@ -239,8 +239,7 @@ struct drm_framebuffer *msm_framebuffer_init(struct drm_device *dev,
        return fb;
 
 fail:
-       if (fb)
-               msm_framebuffer_destroy(fb);
+       kfree(msm_fb);
 
        return ERR_PTR(ret);
 }
index 479d8af..5283976 100644 (file)
@@ -483,7 +483,7 @@ void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m)
        uint64_t off = drm_vma_node_start(&obj->vma_node);
 
        WARN_ON(!mutex_is_locked(&dev->struct_mutex));
-       seq_printf(m, "%08x: %c(r=%u,w=%u) %2d (%2d) %08llx %p %d\n",
+       seq_printf(m, "%08x: %c(r=%u,w=%u) %2d (%2d) %08llx %p %zu\n",
                        msm_obj->flags, is_active(msm_obj) ? 'A' : 'I',
                        msm_obj->read_fence, msm_obj->write_fence,
                        obj->name, obj->refcount.refcount.counter,
index 7acdaa5..7ac2f19 100644 (file)
@@ -60,7 +60,7 @@ static int msm_iommu_map(struct msm_mmu *mmu, uint32_t iova,
                u32 pa = sg_phys(sg) - sg->offset;
                size_t bytes = sg->length + sg->offset;
 
-               VERB("map[%d]: %08x %08x(%x)", i, iova, pa, bytes);
+               VERB("map[%d]: %08x %08x(%zx)", i, iova, pa, bytes);
 
                ret = iommu_map(domain, da, pa, bytes, prot);
                if (ret)
@@ -99,7 +99,7 @@ static int msm_iommu_unmap(struct msm_mmu *mmu, uint32_t iova,
                if (unmapped < bytes)
                        return unmapped;
 
-               VERB("unmap[%d]: %08x(%x)", i, iova, bytes);
+               VERB("unmap[%d]: %08x(%zx)", i, iova, bytes);
 
                BUG_ON(!PAGE_ALIGNED(bytes));
 
index 8171537..1f14b90 100644 (file)
@@ -56,6 +56,6 @@ fail:
 void msm_ringbuffer_destroy(struct msm_ringbuffer *ring)
 {
        if (ring->bo)
-               drm_gem_object_unreference(ring->bo);
+               drm_gem_object_unreference_unlocked(ring->bo);
        kfree(ring);
 }
index 0b5af0f..64f8b2f 100644 (file)
@@ -14,7 +14,7 @@
 
 #define FERMI_TWOD_A                                                 0x0000902d
 
-#define FERMI_MEMORY_TO_MEMORY_FORMAT_A                              0x0000903d
+#define FERMI_MEMORY_TO_MEMORY_FORMAT_A                              0x00009039
 
 #define KEPLER_INLINE_TO_MEMORY_A                                    0x0000a040
 #define KEPLER_INLINE_TO_MEMORY_B                                    0x0000a140
index 2f5eadd..fdb1dcf 100644 (file)
@@ -329,7 +329,6 @@ gm204_gr_init(struct nvkm_object *object)
        nv_mask(priv, 0x419cc0, 0x00000008, 0x00000008);
 
        for (gpc = 0; gpc < priv->gpc_nr; gpc++) {
-       printk(KERN_ERR "ppc %d %d\n", gpc, priv->ppc_nr[gpc]);
                for (ppc = 0; ppc < priv->ppc_nr[gpc]; ppc++)
                        nv_wr32(priv, PPC_UNIT(gpc, ppc, 0x038), 0xc0000000);
                nv_wr32(priv, GPC_UNIT(gpc, 0x0420), 0xc0000000);
index e8778c6..c61102f 100644 (file)
@@ -90,12 +90,14 @@ gf100_devinit_disable(struct nvkm_devinit *devinit)
        return disable;
 }
 
-static int
+int
 gf100_devinit_ctor(struct nvkm_object *parent, struct nvkm_object *engine,
                   struct nvkm_oclass *oclass, void *data, u32 size,
                   struct nvkm_object **pobject)
 {
+       struct nvkm_devinit_impl *impl = (void *)oclass;
        struct nv50_devinit_priv *priv;
+       u64 disable;
        int ret;
 
        ret = nvkm_devinit_create(parent, engine, oclass, &priv);
@@ -103,7 +105,8 @@ gf100_devinit_ctor(struct nvkm_object *parent, struct nvkm_object *engine,
        if (ret)
                return ret;
 
-       if (nv_rd32(priv, 0x022500) & 0x00000001)
+       disable = impl->disable(&priv->base);
+       if (disable & (1ULL << NVDEV_ENGINE_DISP))
                priv->base.post = true;
 
        return 0;
index b345a53..87ca0ec 100644 (file)
@@ -48,7 +48,7 @@ struct nvkm_oclass *
 gm107_devinit_oclass = &(struct nvkm_devinit_impl) {
        .base.handle = NV_SUBDEV(DEVINIT, 0x07),
        .base.ofuncs = &(struct nvkm_ofuncs) {
-               .ctor = nv50_devinit_ctor,
+               .ctor = gf100_devinit_ctor,
                .dtor = _nvkm_devinit_dtor,
                .init = nv50_devinit_init,
                .fini = _nvkm_devinit_fini,
index 535172c..1076fcf 100644 (file)
@@ -161,7 +161,7 @@ struct nvkm_oclass *
 gm204_devinit_oclass = &(struct nvkm_devinit_impl) {
        .base.handle = NV_SUBDEV(DEVINIT, 0x07),
        .base.ofuncs = &(struct nvkm_ofuncs) {
-               .ctor = nv50_devinit_ctor,
+               .ctor = gf100_devinit_ctor,
                .dtor = _nvkm_devinit_dtor,
                .init = nv50_devinit_init,
                .fini = _nvkm_devinit_fini,
index b882b65..9243521 100644 (file)
@@ -15,6 +15,9 @@ int  nv50_devinit_pll_set(struct nvkm_devinit *, u32, u32);
 
 int  gt215_devinit_pll_set(struct nvkm_devinit *, u32, u32);
 
+int  gf100_devinit_ctor(struct nvkm_object *, struct nvkm_object *,
+                       struct nvkm_oclass *, void *, u32,
+                       struct nvkm_object **);
 int  gf100_devinit_pll_set(struct nvkm_devinit *, u32, u32);
 
 u64  gm107_devinit_disable(struct nvkm_devinit *);
index 42b2ea3..dac78ad 100644 (file)
@@ -580,9 +580,6 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc,
                else
                        radeon_crtc->pll_flags |= RADEON_PLL_PREFER_LOW_REF_DIV;
 
-               /* if there is no audio, set MINM_OVER_MAXP  */
-               if (!drm_detect_monitor_audio(radeon_connector_edid(connector)))
-                       radeon_crtc->pll_flags |= RADEON_PLL_PREFER_MINM_OVER_MAXP;
                if (rdev->family < CHIP_RV770)
                        radeon_crtc->pll_flags |= RADEON_PLL_PREFER_MINM_OVER_MAXP;
                /* use frac fb div on APUs */
index 3e3290c..b435c85 100644 (file)
@@ -421,19 +421,21 @@ bool radeon_dp_getdpcd(struct radeon_connector *radeon_connector)
 {
        struct radeon_connector_atom_dig *dig_connector = radeon_connector->con_priv;
        u8 msg[DP_DPCD_SIZE];
-       int ret;
+       int ret, i;
 
-       ret = drm_dp_dpcd_read(&radeon_connector->ddc_bus->aux, DP_DPCD_REV, msg,
-                              DP_DPCD_SIZE);
-       if (ret > 0) {
-               memcpy(dig_connector->dpcd, msg, DP_DPCD_SIZE);
+       for (i = 0; i < 7; i++) {
+               ret = drm_dp_dpcd_read(&radeon_connector->ddc_bus->aux, DP_DPCD_REV, msg,
+                                      DP_DPCD_SIZE);
+               if (ret == DP_DPCD_SIZE) {
+                       memcpy(dig_connector->dpcd, msg, DP_DPCD_SIZE);
 
-               DRM_DEBUG_KMS("DPCD: %*ph\n", (int)sizeof(dig_connector->dpcd),
-                             dig_connector->dpcd);
+                       DRM_DEBUG_KMS("DPCD: %*ph\n", (int)sizeof(dig_connector->dpcd),
+                                     dig_connector->dpcd);
 
-               radeon_dp_probe_oui(radeon_connector);
+                       radeon_dp_probe_oui(radeon_connector);
 
-               return true;
+                       return true;
+               }
        }
        dig_connector->dpcd[0] = 0;
        return false;
index 28faea9..ba50f3c 100644 (file)
@@ -5837,7 +5837,7 @@ static int cik_pcie_gart_enable(struct radeon_device *rdev)
        /* restore context1-15 */
        /* set vm size, must be a multiple of 4 */
        WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
-       WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
+       WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
        for (i = 1; i < 16; i++) {
                if (i < 8)
                        WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
index f042051..cfa3a84 100644 (file)
@@ -173,7 +173,7 @@ void dce3_2_hdmi_update_acr(struct drm_encoder *encoder, long offset,
        struct drm_device *dev = encoder->dev;
        struct radeon_device *rdev = dev->dev_private;
 
-       WREG32(HDMI0_ACR_PACKET_CONTROL + offset,
+       WREG32(DCE3_HDMI0_ACR_PACKET_CONTROL + offset,
                HDMI0_ACR_SOURCE |              /* select SW CTS value */
                HDMI0_ACR_AUTO_SEND);   /* allow hw to sent ACR packets when required */
 
index 0926739..9953356 100644 (file)
@@ -400,7 +400,7 @@ void evergreen_hdmi_enable(struct drm_encoder *encoder, bool enable)
        if (enable) {
                struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
 
-               if (drm_detect_monitor_audio(radeon_connector_edid(connector))) {
+               if (connector && drm_detect_monitor_audio(radeon_connector_edid(connector))) {
                        WREG32(HDMI_INFOFRAME_CONTROL0 + dig->afmt->offset,
                               HDMI_AVI_INFO_SEND | /* enable AVI info frames */
                               HDMI_AVI_INFO_CONT | /* required for audio info values to be updated */
@@ -438,7 +438,8 @@ void evergreen_dp_enable(struct drm_encoder *encoder, bool enable)
        if (!dig || !dig->afmt)
                return;
 
-       if (enable && drm_detect_monitor_audio(radeon_connector_edid(connector))) {
+       if (enable && connector &&
+           drm_detect_monitor_audio(radeon_connector_edid(connector))) {
                struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
                struct radeon_connector *radeon_connector = to_radeon_connector(connector);
                struct radeon_connector_atom_dig *dig_connector;
index e8a496f..64d3a77 100644 (file)
@@ -1301,7 +1301,8 @@ static int cayman_pcie_gart_enable(struct radeon_device *rdev)
         */
        for (i = 1; i < 8; i++) {
                WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR + (i << 2), 0);
-               WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR + (i << 2), rdev->vm_manager.max_pfn);
+               WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR + (i << 2),
+                       rdev->vm_manager.max_pfn - 1);
                WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
                       rdev->vm_manager.saved_table_addr[i]);
        }
index d2abe48..46eb0fa 100644 (file)
@@ -1673,7 +1673,6 @@ struct radeon_uvd {
        struct radeon_bo        *vcpu_bo;
        void                    *cpu_addr;
        uint64_t                gpu_addr;
-       void                    *saved_bo;
        atomic_t                handles[RADEON_MAX_UVD_HANDLES];
        struct drm_file         *filp[RADEON_MAX_UVD_HANDLES];
        unsigned                img_size[RADEON_MAX_UVD_HANDLES];
index fafd8ce..8dbf508 100644 (file)
@@ -1202,7 +1202,7 @@ static struct radeon_asic rs780_asic = {
 static struct radeon_asic_ring rv770_uvd_ring = {
        .ib_execute = &uvd_v1_0_ib_execute,
        .emit_fence = &uvd_v2_2_fence_emit,
-       .emit_semaphore = &uvd_v1_0_semaphore_emit,
+       .emit_semaphore = &uvd_v2_2_semaphore_emit,
        .cs_parse = &radeon_uvd_cs_parse,
        .ring_test = &uvd_v1_0_ring_test,
        .ib_test = &uvd_v1_0_ib_test,
index cf0a90b..a3ca8cd 100644 (file)
@@ -949,6 +949,10 @@ void uvd_v1_0_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
 int uvd_v2_2_resume(struct radeon_device *rdev);
 void uvd_v2_2_fence_emit(struct radeon_device *rdev,
                         struct radeon_fence *fence);
+bool uvd_v2_2_semaphore_emit(struct radeon_device *rdev,
+                            struct radeon_ring *ring,
+                            struct radeon_semaphore *semaphore,
+                            bool emit_wait);
 
 /* uvd v3.1 */
 bool uvd_v3_1_semaphore_emit(struct radeon_device *rdev,
index 8b82abb..25191f1 100644 (file)
@@ -460,33 +460,34 @@ void radeon_audio_detect(struct drm_connector *connector,
        if (!connector || !connector->encoder)
                return;
 
-       if (!radeon_encoder_is_digital(connector->encoder))
+       rdev = connector->encoder->dev->dev_private;
+
+       if (!radeon_audio_chipset_supported(rdev))
                return;
 
-       rdev = connector->encoder->dev->dev_private;
        radeon_encoder = to_radeon_encoder(connector->encoder);
        dig = radeon_encoder->enc_priv;
 
-       if (!dig->afmt)
-               return;
-
        if (status == connector_status_connected) {
-               struct radeon_connector *radeon_connector = to_radeon_connector(connector);
+               struct radeon_connector *radeon_connector;
+               int sink_type;
+
+               if (!drm_detect_monitor_audio(radeon_connector_edid(connector))) {
+                       radeon_encoder->audio = NULL;
+                       return;
+               }
+
+               radeon_connector = to_radeon_connector(connector);
+               sink_type = radeon_dp_getsinktype(radeon_connector);
 
                if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort &&
-                   radeon_dp_getsinktype(radeon_connector) ==
-                   CONNECTOR_OBJECT_ID_DISPLAYPORT)
+                       sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT)
                        radeon_encoder->audio = rdev->audio.dp_funcs;
                else
                        radeon_encoder->audio = rdev->audio.hdmi_funcs;
 
                dig->afmt->pin = radeon_audio_get_pin(connector->encoder);
-               if (drm_detect_monitor_audio(radeon_connector_edid(connector))) {
-                       radeon_audio_enable(rdev, dig->afmt->pin, 0xf);
-               } else {
-                       radeon_audio_enable(rdev, dig->afmt->pin, 0);
-                       dig->afmt->pin = NULL;
-               }
+               radeon_audio_enable(rdev, dig->afmt->pin, 0xf);
        } else {
                radeon_audio_enable(rdev, dig->afmt->pin, 0);
                dig->afmt->pin = NULL;
index d17d251..cebb65e 100644 (file)
@@ -1379,10 +1379,8 @@ out:
        /* updated in get modes as well since we need to know if it's analog or digital */
        radeon_connector_update_scratch_regs(connector, ret);
 
-       if (radeon_audio != 0) {
-               radeon_connector_get_edid(connector);
+       if (radeon_audio != 0)
                radeon_audio_detect(connector, ret);
-       }
 
 exit:
        pm_runtime_mark_last_busy(connector->dev->dev);
@@ -1719,10 +1717,8 @@ radeon_dp_detect(struct drm_connector *connector, bool force)
 
        radeon_connector_update_scratch_regs(connector, ret);
 
-       if (radeon_audio != 0) {
-               radeon_connector_get_edid(connector);
+       if (radeon_audio != 0)
                radeon_audio_detect(connector, ret);
-       }
 
 out:
        pm_runtime_mark_last_busy(connector->dev->dev);
index b7ca4c5..a7fdfa4 100644 (file)
@@ -1463,6 +1463,21 @@ int radeon_device_init(struct radeon_device *rdev,
        if (r)
                DRM_ERROR("ib ring test failed (%d).\n", r);
 
+       /*
+        * Turks/Thames GPU will freeze whole laptop if DPM is not restarted
+        * after the CP ring have chew one packet at least. Hence here we stop
+        * and restart DPM after the radeon_ib_ring_tests().
+        */
+       if (rdev->pm.dpm_enabled &&
+           (rdev->pm.pm_method == PM_METHOD_DPM) &&
+           (rdev->family == CHIP_TURKS) &&
+           (rdev->flags & RADEON_IS_MOBILITY)) {
+               mutex_lock(&rdev->pm.mutex);
+               radeon_dpm_disable(rdev);
+               radeon_dpm_enable(rdev);
+               mutex_unlock(&rdev->pm.mutex);
+       }
+
        if ((radeon_testing & 1)) {
                if (rdev->accel_working)
                        radeon_test_moves(rdev);
index bf1fecc..fcbd60b 100644 (file)
@@ -30,8 +30,6 @@
                            AUX_SW_RX_HPD_DISCON |           \
                            AUX_SW_RX_PARTIAL_BYTE |         \
                            AUX_SW_NON_AUX_MODE |            \
-                           AUX_SW_RX_MIN_COUNT_VIOL |       \
-                           AUX_SW_RX_INVALID_STOP |         \
                            AUX_SW_RX_SYNC_INVALID_L |       \
                            AUX_SW_RX_SYNC_INVALID_H |       \
                            AUX_SW_RX_INVALID_START |        \
index 1017338..257b10b 100644 (file)
@@ -663,9 +663,17 @@ int
 radeon_dp_mst_probe(struct radeon_connector *radeon_connector)
 {
        struct radeon_connector_atom_dig *dig_connector = radeon_connector->con_priv;
+       struct drm_device *dev = radeon_connector->base.dev;
+       struct radeon_device *rdev = dev->dev_private;
        int ret;
        u8 msg[1];
 
+       if (!radeon_mst)
+               return 0;
+
+       if (!ASIC_IS_DCE5(rdev))
+               return 0;
+
        if (dig_connector->dpcd[DP_DPCD_REV] < 0x12)
                return 0;
 
index 7b2a733..b0acf50 100644 (file)
@@ -576,6 +576,9 @@ static int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file
                if (radeon_get_allowed_info_register(rdev, *value, value))
                        return -EINVAL;
                break;
+       case RADEON_INFO_VA_UNMAP_WORKING:
+               *value = true;
+               break;
        default:
                DRM_DEBUG_KMS("Invalid request %d\n", info->request);
                return -EINVAL;
index 535bf40..eef006c 100644 (file)
@@ -142,6 +142,9 @@ static void radeon_mn_invalidate_range_start(struct mmu_notifier *mn,
 
                list_for_each_entry(bo, &node->bos, mn_list) {
 
+                       if (!bo->tbo.ttm || bo->tbo.ttm->state != tt_bound)
+                               continue;
+
                        r = radeon_bo_reserve(bo, true);
                        if (r) {
                                DRM_ERROR("(%ld) failed to reserve user bo\n", r);
index b292aca..edafd3c 100644 (file)
@@ -591,8 +591,7 @@ static void radeon_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
 {
        struct radeon_device *rdev = radeon_get_rdev(ttm->bdev);
        struct radeon_ttm_tt *gtt = (void *)ttm;
-       struct scatterlist *sg;
-       int i;
+       struct sg_page_iter sg_iter;
 
        int write = !(gtt->userflags & RADEON_GEM_USERPTR_READONLY);
        enum dma_data_direction direction = write ?
@@ -605,9 +604,8 @@ static void radeon_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
        /* free the sg table and pages again */
        dma_unmap_sg(rdev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
 
-       for_each_sg(ttm->sg->sgl, sg, ttm->sg->nents, i) {
-               struct page *page = sg_page(sg);
-
+       for_each_sg_page(ttm->sg->sgl, &sg_iter, ttm->sg->nents, 0) {
+               struct page *page = sg_page_iter_page(&sg_iter);
                if (!(gtt->userflags & RADEON_GEM_USERPTR_READONLY))
                        set_page_dirty(page);
 
index c10b2ae..6edcb54 100644 (file)
@@ -204,28 +204,32 @@ void radeon_uvd_fini(struct radeon_device *rdev)
 
 int radeon_uvd_suspend(struct radeon_device *rdev)
 {
-       unsigned size;
-       void *ptr;
-       int i;
+       int i, r;
 
        if (rdev->uvd.vcpu_bo == NULL)
                return 0;
 
-       for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i)
-               if (atomic_read(&rdev->uvd.handles[i]))
-                       break;
+       for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) {
+               uint32_t handle = atomic_read(&rdev->uvd.handles[i]);
+               if (handle != 0) {
+                       struct radeon_fence *fence;
 
-       if (i == RADEON_MAX_UVD_HANDLES)
-               return 0;
+                       radeon_uvd_note_usage(rdev);
 
-       size = radeon_bo_size(rdev->uvd.vcpu_bo);
-       size -= rdev->uvd_fw->size;
+                       r = radeon_uvd_get_destroy_msg(rdev,
+                               R600_RING_TYPE_UVD_INDEX, handle, &fence);
+                       if (r) {
+                               DRM_ERROR("Error destroying UVD (%d)!\n", r);
+                               continue;
+                       }
 
-       ptr = rdev->uvd.cpu_addr;
-       ptr += rdev->uvd_fw->size;
+                       radeon_fence_wait(fence, false);
+                       radeon_fence_unref(&fence);
 
-       rdev->uvd.saved_bo = kmalloc(size, GFP_KERNEL);
-       memcpy(rdev->uvd.saved_bo, ptr, size);
+                       rdev->uvd.filp[i] = NULL;
+                       atomic_set(&rdev->uvd.handles[i], 0);
+               }
+       }
 
        return 0;
 }
@@ -246,12 +250,7 @@ int radeon_uvd_resume(struct radeon_device *rdev)
        ptr = rdev->uvd.cpu_addr;
        ptr += rdev->uvd_fw->size;
 
-       if (rdev->uvd.saved_bo != NULL) {
-               memcpy(ptr, rdev->uvd.saved_bo, size);
-               kfree(rdev->uvd.saved_bo);
-               rdev->uvd.saved_bo = NULL;
-       } else
-               memset(ptr, 0, size);
+       memset(ptr, 0, size);
 
        return 0;
 }
@@ -396,6 +395,29 @@ static int radeon_uvd_cs_msg_decode(uint32_t *msg, unsigned buf_sizes[])
        return 0;
 }
 
+static int radeon_uvd_validate_codec(struct radeon_cs_parser *p,
+                                    unsigned stream_type)
+{
+       switch (stream_type) {
+       case 0: /* H264 */
+       case 1: /* VC1 */
+               /* always supported */
+               return 0;
+
+       case 3: /* MPEG2 */
+       case 4: /* MPEG4 */
+               /* only since UVD 3 */
+               if (p->rdev->family >= CHIP_PALM)
+                       return 0;
+
+               /* fall through */
+       default:
+               DRM_ERROR("UVD codec not supported by hardware %d!\n",
+                         stream_type);
+               return -EINVAL;
+       }
+}
+
 static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo,
                             unsigned offset, unsigned buf_sizes[])
 {
@@ -436,50 +458,70 @@ static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo,
                return -EINVAL;
        }
 
-       if (msg_type == 1) {
-               /* it's a decode msg, calc buffer sizes */
-               r = radeon_uvd_cs_msg_decode(msg, buf_sizes);
-               /* calc image size (width * height) */
-               img_size = msg[6] * msg[7];
+       switch (msg_type) {
+       case 0:
+               /* it's a create msg, calc image size (width * height) */
+               img_size = msg[7] * msg[8];
+
+               r = radeon_uvd_validate_codec(p, msg[4]);
                radeon_bo_kunmap(bo);
                if (r)
                        return r;
 
-       } else if (msg_type == 2) {
+               /* try to alloc a new handle */
+               for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) {
+                       if (atomic_read(&p->rdev->uvd.handles[i]) == handle) {
+                               DRM_ERROR("Handle 0x%x already in use!\n", handle);
+                               return -EINVAL;
+                       }
+
+                       if (!atomic_cmpxchg(&p->rdev->uvd.handles[i], 0, handle)) {
+                               p->rdev->uvd.filp[i] = p->filp;
+                               p->rdev->uvd.img_size[i] = img_size;
+                               return 0;
+                       }
+               }
+
+               DRM_ERROR("No more free UVD handles!\n");
+               return -EINVAL;
+
+       case 1:
+               /* it's a decode msg, validate codec and calc buffer sizes */
+               r = radeon_uvd_validate_codec(p, msg[4]);
+               if (!r)
+                       r = radeon_uvd_cs_msg_decode(msg, buf_sizes);
+               radeon_bo_kunmap(bo);
+               if (r)
+                       return r;
+
+               /* validate the handle */
+               for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) {
+                       if (atomic_read(&p->rdev->uvd.handles[i]) == handle) {
+                               if (p->rdev->uvd.filp[i] != p->filp) {
+                                       DRM_ERROR("UVD handle collision detected!\n");
+                                       return -EINVAL;
+                               }
+                               return 0;
+                       }
+               }
+
+               DRM_ERROR("Invalid UVD handle 0x%x!\n", handle);
+               return -ENOENT;
+
+       case 2:
                /* it's a destroy msg, free the handle */
                for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i)
                        atomic_cmpxchg(&p->rdev->uvd.handles[i], handle, 0);
                radeon_bo_kunmap(bo);
                return 0;
-       } else {
-               /* it's a create msg, calc image size (width * height) */
-               img_size = msg[7] * msg[8];
-               radeon_bo_kunmap(bo);
 
-               if (msg_type != 0) {
-                       DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type);
-                       return -EINVAL;
-               }
-
-               /* it's a create msg, no special handling needed */
-       }
-
-       /* create or decode, validate the handle */
-       for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) {
-               if (atomic_read(&p->rdev->uvd.handles[i]) == handle)
-                       return 0;
-       }
+       default:
 
-       /* handle not found try to alloc a new one */
-       for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) {
-               if (!atomic_cmpxchg(&p->rdev->uvd.handles[i], 0, handle)) {
-                       p->rdev->uvd.filp[i] = p->filp;
-                       p->rdev->uvd.img_size[i] = img_size;
-                       return 0;
-               }
+               DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type);
+               return -EINVAL;
        }
 
-       DRM_ERROR("No more free UVD handles!\n");
+       BUG();
        return -EINVAL;
 }
 
index 24f849f..0de5711 100644 (file)
@@ -493,18 +493,27 @@ int radeon_vce_cs_reloc(struct radeon_cs_parser *p, int lo, int hi,
  *
  * @p: parser context
  * @handle: handle to validate
+ * @allocated: allocated a new handle?
  *
  * Validates the handle and return the found session index or -EINVAL
  * we we don't have another free session index.
  */
-int radeon_vce_validate_handle(struct radeon_cs_parser *p, uint32_t handle)
+static int radeon_vce_validate_handle(struct radeon_cs_parser *p,
+                                     uint32_t handle, bool *allocated)
 {
        unsigned i;
 
+       *allocated = false;
+
        /* validate the handle */
        for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) {
-               if (atomic_read(&p->rdev->vce.handles[i]) == handle)
+               if (atomic_read(&p->rdev->vce.handles[i]) == handle) {
+                       if (p->rdev->vce.filp[i] != p->filp) {
+                               DRM_ERROR("VCE handle collision detected!\n");
+                               return -EINVAL;
+                       }
                        return i;
+               }
        }
 
        /* handle not found try to alloc a new one */
@@ -512,6 +521,7 @@ int radeon_vce_validate_handle(struct radeon_cs_parser *p, uint32_t handle)
                if (!atomic_cmpxchg(&p->rdev->vce.handles[i], 0, handle)) {
                        p->rdev->vce.filp[i] = p->filp;
                        p->rdev->vce.img_size[i] = 0;
+                       *allocated = true;
                        return i;
                }
        }
@@ -529,10 +539,10 @@ int radeon_vce_validate_handle(struct radeon_cs_parser *p, uint32_t handle)
 int radeon_vce_cs_parse(struct radeon_cs_parser *p)
 {
        int session_idx = -1;
-       bool destroyed = false;
+       bool destroyed = false, created = false, allocated = false;
        uint32_t tmp, handle = 0;
        uint32_t *size = &tmp;
-       int i, r;
+       int i, r = 0;
 
        while (p->idx < p->chunk_ib->length_dw) {
                uint32_t len = radeon_get_ib_value(p, p->idx);
@@ -540,18 +550,21 @@ int radeon_vce_cs_parse(struct radeon_cs_parser *p)
 
                if ((len < 8) || (len & 3)) {
                        DRM_ERROR("invalid VCE command length (%d)!\n", len);
-                       return -EINVAL;
+                       r = -EINVAL;
+                       goto out;
                }
 
                if (destroyed) {
                        DRM_ERROR("No other command allowed after destroy!\n");
-                       return -EINVAL;
+                       r = -EINVAL;
+                       goto out;
                }
 
                switch (cmd) {
                case 0x00000001: // session
                        handle = radeon_get_ib_value(p, p->idx + 2);
-                       session_idx = radeon_vce_validate_handle(p, handle);
+                       session_idx = radeon_vce_validate_handle(p, handle,
+                                                                &allocated);
                        if (session_idx < 0)
                                return session_idx;
                        size = &p->rdev->vce.img_size[session_idx];
@@ -561,6 +574,13 @@ int radeon_vce_cs_parse(struct radeon_cs_parser *p)
                        break;
 
                case 0x01000001: // create
+                       created = true;
+                       if (!allocated) {
+                               DRM_ERROR("Handle already in use!\n");
+                               r = -EINVAL;
+                               goto out;
+                       }
+
                        *size = radeon_get_ib_value(p, p->idx + 8) *
                                radeon_get_ib_value(p, p->idx + 10) *
                                8 * 3 / 2;
@@ -578,12 +598,12 @@ int radeon_vce_cs_parse(struct radeon_cs_parser *p)
                        r = radeon_vce_cs_reloc(p, p->idx + 10, p->idx + 9,
                                                *size);
                        if (r)
-                               return r;
+                               goto out;
 
                        r = radeon_vce_cs_reloc(p, p->idx + 12, p->idx + 11,
                                                *size / 3);
                        if (r)
-                               return r;
+                               goto out;
                        break;
 
                case 0x02000001: // destroy
@@ -594,7 +614,7 @@ int radeon_vce_cs_parse(struct radeon_cs_parser *p)
                        r = radeon_vce_cs_reloc(p, p->idx + 3, p->idx + 2,
                                                *size * 2);
                        if (r)
-                               return r;
+                               goto out;
                        break;
 
                case 0x05000004: // video bitstream buffer
@@ -602,36 +622,47 @@ int radeon_vce_cs_parse(struct radeon_cs_parser *p)
                        r = radeon_vce_cs_reloc(p, p->idx + 3, p->idx + 2,
                                                tmp);
                        if (r)
-                               return r;
+                               goto out;
                        break;
 
                case 0x05000005: // feedback buffer
                        r = radeon_vce_cs_reloc(p, p->idx + 3, p->idx + 2,
                                                4096);
                        if (r)
-                               return r;
+                               goto out;
                        break;
 
                default:
                        DRM_ERROR("invalid VCE command (0x%x)!\n", cmd);
-                       return -EINVAL;
+                       r = -EINVAL;
+                       goto out;
                }
 
                if (session_idx == -1) {
                        DRM_ERROR("no session command at start of IB\n");
-                       return -EINVAL;
+                       r = -EINVAL;
+                       goto out;
                }
 
                p->idx += len / 4;
        }
 
-       if (destroyed) {
-               /* IB contains a destroy msg, free the handle */
+       if (allocated && !created) {
+               DRM_ERROR("New session without create command!\n");
+               r = -ENOENT;
+       }
+
+out:
+       if ((!r && destroyed) || (r && allocated)) {
+               /*
+                * IB contains a destroy msg or we have allocated an
+                * handle and got an error, anyway free the handle
+                */
                for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i)
                        atomic_cmpxchg(&p->rdev->vce.handles[i], handle, 0);
        }
 
-       return 0;
+       return r;
 }
 
 /**
index de42fc4..9c3377c 100644 (file)
@@ -458,14 +458,16 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev,
                /* make sure object fit at this offset */
                eoffset = soffset + size;
                if (soffset >= eoffset) {
-                       return -EINVAL;
+                       r = -EINVAL;
+                       goto error_unreserve;
                }
 
                last_pfn = eoffset / RADEON_GPU_PAGE_SIZE;
                if (last_pfn > rdev->vm_manager.max_pfn) {
                        dev_err(rdev->dev, "va above limit (0x%08X > 0x%08X)\n",
                                last_pfn, rdev->vm_manager.max_pfn);
-                       return -EINVAL;
+                       r = -EINVAL;
+                       goto error_unreserve;
                }
 
        } else {
@@ -486,7 +488,8 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev,
                                "(bo %p 0x%010lx 0x%010lx)\n", bo_va->bo,
                                soffset, tmp->bo, tmp->it.start, tmp->it.last);
                        mutex_unlock(&vm->mutex);
-                       return -EINVAL;
+                       r = -EINVAL;
+                       goto error_unreserve;
                }
        }
 
@@ -497,7 +500,8 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev,
                        tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL);
                        if (!tmp) {
                                mutex_unlock(&vm->mutex);
-                               return -ENOMEM;
+                               r = -ENOMEM;
+                               goto error_unreserve;
                        }
                        tmp->it.start = bo_va->it.start;
                        tmp->it.last = bo_va->it.last;
@@ -555,7 +559,6 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev,
                r = radeon_vm_clear_bo(rdev, pt);
                if (r) {
                        radeon_bo_unref(&pt);
-                       radeon_bo_reserve(bo_va->bo, false);
                        return r;
                }
 
@@ -575,6 +578,10 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev,
 
        mutex_unlock(&vm->mutex);
        return 0;
+
+error_unreserve:
+       radeon_bo_unreserve(bo_va->bo);
+       return r;
 }
 
 /**
index 3cf1e29..9ef2064 100644 (file)
                         ((n) & 0x3FFF) << 16)
 
 /* UVD */
+#define UVD_SEMA_ADDR_LOW                              0xef00
+#define UVD_SEMA_ADDR_HIGH                             0xef04
+#define UVD_SEMA_CMD                                   0xef08
 #define UVD_GPCOM_VCPU_CMD                             0xef0c
 #define UVD_GPCOM_VCPU_DATA0                           0xef10
 #define UVD_GPCOM_VCPU_DATA1                           0xef14
index b1d74bc..4c679b8 100644 (file)
@@ -4318,7 +4318,7 @@ static int si_pcie_gart_enable(struct radeon_device *rdev)
        /* empty context1-15 */
        /* set vm size, must be a multiple of 4 */
        WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
-       WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
+       WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
        /* Assign the pt base to something valid for now; the pts used for
         * the VMs are determined by the application and setup and assigned
         * on the fly in the vm part of radeon_gart.c
index e72b3cb..c6b1cbc 100644 (file)
@@ -466,18 +466,8 @@ bool uvd_v1_0_semaphore_emit(struct radeon_device *rdev,
                             struct radeon_semaphore *semaphore,
                             bool emit_wait)
 {
-       uint64_t addr = semaphore->gpu_addr;
-
-       radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_LOW, 0));
-       radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF);
-
-       radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_HIGH, 0));
-       radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF);
-
-       radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0));
-       radeon_ring_write(ring, emit_wait ? 1 : 0);
-
-       return true;
+       /* disable semaphores for UVD V1 hardware */
+       return false;
 }
 
 /**
index 8919351..7ed778c 100644 (file)
@@ -60,6 +60,35 @@ void uvd_v2_2_fence_emit(struct radeon_device *rdev,
 }
 
 /**
+ * uvd_v2_2_semaphore_emit - emit semaphore command
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring pointer
+ * @semaphore: semaphore to emit commands for
+ * @emit_wait: true if we should emit a wait command
+ *
+ * Emit a semaphore command (either wait or signal) to the UVD ring.
+ */
+bool uvd_v2_2_semaphore_emit(struct radeon_device *rdev,
+                            struct radeon_ring *ring,
+                            struct radeon_semaphore *semaphore,
+                            bool emit_wait)
+{
+       uint64_t addr = semaphore->gpu_addr;
+
+       radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_LOW, 0));
+       radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF);
+
+       radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_HIGH, 0));
+       radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF);
+
+       radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0));
+       radeon_ring_write(ring, emit_wait ? 1 : 0);
+
+       return true;
+}
+
+/**
  * uvd_v2_2_resume - memory controller programming
  *
  * @rdev: radeon_device pointer
index 1833abd..bfad15a 100644 (file)
@@ -173,7 +173,6 @@ static int tegra_drm_load(struct drm_device *drm, unsigned long flags)
        drm->irq_enabled = true;
 
        /* syncpoints are used for full 32-bit hardware VBLANK counters */
-       drm->vblank_disable_immediate = true;
        drm->max_vblank_count = 0xffffffff;
 
        err = drm_vblank_init(drm, drm->mode_config.num_crtc);
index 1055cb7..3f4c7b8 100644 (file)
@@ -1,4 +1,4 @@
 ccflags-y := -Iinclude/drm
-vgem-y := vgem_drv.o vgem_dma_buf.o
+vgem-y := vgem_drv.o
 
 obj-$(CONFIG_DRM_VGEM) += vgem.o
diff --git a/drivers/gpu/drm/vgem/vgem_dma_buf.c b/drivers/gpu/drm/vgem/vgem_dma_buf.c
deleted file mode 100644 (file)
index 0254438..0000000
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright Â© 2012 Intel Corporation
- * Copyright Â© 2014 The Chromium OS Authors
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- *    Ben Widawsky <ben@bwidawsk.net>
- *
- */
-
-#include <linux/dma-buf.h>
-#include "vgem_drv.h"
-
-struct sg_table *vgem_gem_prime_get_sg_table(struct drm_gem_object *gobj)
-{
-       struct drm_vgem_gem_object *obj = to_vgem_bo(gobj);
-       BUG_ON(obj->pages == NULL);
-
-       return drm_prime_pages_to_sg(obj->pages, obj->base.size / PAGE_SIZE);
-}
-
-int vgem_gem_prime_pin(struct drm_gem_object *gobj)
-{
-       struct drm_vgem_gem_object *obj = to_vgem_bo(gobj);
-       return vgem_gem_get_pages(obj);
-}
-
-void vgem_gem_prime_unpin(struct drm_gem_object *gobj)
-{
-       struct drm_vgem_gem_object *obj = to_vgem_bo(gobj);
-       vgem_gem_put_pages(obj);
-}
-
-void *vgem_gem_prime_vmap(struct drm_gem_object *gobj)
-{
-       struct drm_vgem_gem_object *obj = to_vgem_bo(gobj);
-       BUG_ON(obj->pages == NULL);
-
-       return vmap(obj->pages, obj->base.size / PAGE_SIZE, 0, PAGE_KERNEL);
-}
-
-void vgem_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
-{
-       vunmap(vaddr);
-}
-
-struct drm_gem_object *vgem_gem_prime_import(struct drm_device *dev,
-                                            struct dma_buf *dma_buf)
-{
-       struct drm_vgem_gem_object *obj = NULL;
-       int ret;
-
-       obj = kzalloc(sizeof(*obj), GFP_KERNEL);
-       if (obj == NULL) {
-               ret = -ENOMEM;
-               goto fail;
-       }
-
-       ret = drm_gem_object_init(dev, &obj->base, dma_buf->size);
-       if (ret) {
-               ret = -ENOMEM;
-               goto fail_free;
-       }
-
-       get_dma_buf(dma_buf);
-
-       obj->base.dma_buf = dma_buf;
-       obj->use_dma_buf = true;
-
-       return &obj->base;
-
-fail_free:
-       kfree(obj);
-fail:
-       return ERR_PTR(ret);
-}
index cb3b435..7a207ca 100644 (file)
@@ -302,22 +302,13 @@ static const struct file_operations vgem_driver_fops = {
 };
 
 static struct drm_driver vgem_driver = {
-       .driver_features                = DRIVER_GEM | DRIVER_PRIME,
+       .driver_features                = DRIVER_GEM,
        .gem_free_object                = vgem_gem_free_object,
        .gem_vm_ops                     = &vgem_gem_vm_ops,
        .ioctls                         = vgem_ioctls,
        .fops                           = &vgem_driver_fops,
        .dumb_create                    = vgem_gem_dumb_create,
        .dumb_map_offset                = vgem_gem_dumb_map,
-       .prime_handle_to_fd             = drm_gem_prime_handle_to_fd,
-       .prime_fd_to_handle             = drm_gem_prime_fd_to_handle,
-       .gem_prime_export               = drm_gem_prime_export,
-       .gem_prime_import               = vgem_gem_prime_import,
-       .gem_prime_pin                  = vgem_gem_prime_pin,
-       .gem_prime_unpin                = vgem_gem_prime_unpin,
-       .gem_prime_get_sg_table         = vgem_gem_prime_get_sg_table,
-       .gem_prime_vmap                 = vgem_gem_prime_vmap,
-       .gem_prime_vunmap               = vgem_gem_prime_vunmap,
        .name   = DRIVER_NAME,
        .desc   = DRIVER_DESC,
        .date   = DRIVER_DATE,
index 57ab4d8..e9f92f7 100644 (file)
@@ -43,15 +43,4 @@ struct drm_vgem_gem_object {
 extern void vgem_gem_put_pages(struct drm_vgem_gem_object *obj);
 extern int vgem_gem_get_pages(struct drm_vgem_gem_object *obj);
 
-/* vgem_dma_buf.c */
-extern struct sg_table *vgem_gem_prime_get_sg_table(
-                       struct drm_gem_object *gobj);
-extern int vgem_gem_prime_pin(struct drm_gem_object *gobj);
-extern void vgem_gem_prime_unpin(struct drm_gem_object *gobj);
-extern void *vgem_gem_prime_vmap(struct drm_gem_object *gobj);
-extern void vgem_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
-extern struct drm_gem_object *vgem_gem_prime_import(struct drm_device *dev,
-                                                   struct dma_buf *dma_buf);
-
-
 #endif
index 41f167e..7ce93d9 100644 (file)
 #define USB_DEVICE_ID_ATEN_2PORTKVM    0x2204
 #define USB_DEVICE_ID_ATEN_4PORTKVM    0x2205
 #define USB_DEVICE_ID_ATEN_4PORTKVMC   0x2208
+#define USB_DEVICE_ID_ATEN_CS682       0x2213
 
 #define USB_VENDOR_ID_ATMEL            0x03eb
 #define USB_DEVICE_ID_ATMEL_MULTITOUCH 0x211c
index b3cf6fd..5fd530a 100644 (file)
@@ -44,7 +44,6 @@ MODULE_PARM_DESC(disable_raw_mode,
 /* bits 1..20 are reserved for classes */
 #define HIDPP_QUIRK_DELAYED_INIT               BIT(21)
 #define HIDPP_QUIRK_WTP_PHYSICAL_BUTTONS       BIT(22)
-#define HIDPP_QUIRK_MULTI_INPUT                        BIT(23)
 
 /*
  * There are two hidpp protocols in use, the first version hidpp10 is known
@@ -706,12 +705,6 @@ static int wtp_input_mapping(struct hid_device *hdev, struct hid_input *hi,
                struct hid_field *field, struct hid_usage *usage,
                unsigned long **bit, int *max)
 {
-       struct hidpp_device *hidpp = hid_get_drvdata(hdev);
-
-       if ((hidpp->quirks & HIDPP_QUIRK_MULTI_INPUT) &&
-           (field->application == HID_GD_KEYBOARD))
-               return 0;
-
        return -1;
 }
 
@@ -720,10 +713,6 @@ static void wtp_populate_input(struct hidpp_device *hidpp,
 {
        struct wtp_data *wd = hidpp->private_data;
 
-       if ((hidpp->quirks & HIDPP_QUIRK_MULTI_INPUT) && origin_is_hid_core)
-               /* this is the generic hid-input call */
-               return;
-
        __set_bit(EV_ABS, input_dev->evbit);
        __set_bit(EV_KEY, input_dev->evbit);
        __clear_bit(EV_REL, input_dev->evbit);
@@ -1245,10 +1234,6 @@ static int hidpp_probe(struct hid_device *hdev, const struct hid_device_id *id)
        if (hidpp->quirks & HIDPP_QUIRK_DELAYED_INIT)
                connect_mask &= ~HID_CONNECT_HIDINPUT;
 
-       /* Re-enable hidinput for multi-input devices */
-       if (hidpp->quirks & HIDPP_QUIRK_MULTI_INPUT)
-               connect_mask |= HID_CONNECT_HIDINPUT;
-
        ret = hid_hw_start(hdev, connect_mask);
        if (ret) {
                hid_err(hdev, "%s:hid_hw_start returned error\n", __func__);
@@ -1296,11 +1281,6 @@ static const struct hid_device_id hidpp_devices[] = {
          HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH,
                USB_DEVICE_ID_LOGITECH_T651),
          .driver_data = HIDPP_QUIRK_CLASS_WTP },
-       { /* Keyboard TK820 */
-         HID_DEVICE(BUS_USB, HID_GROUP_LOGITECH_DJ_DEVICE,
-               USB_VENDOR_ID_LOGITECH, 0x4102),
-         .driver_data = HIDPP_QUIRK_DELAYED_INIT | HIDPP_QUIRK_MULTI_INPUT |
-                        HIDPP_QUIRK_CLASS_WTP },
 
        { HID_DEVICE(BUS_USB, HID_GROUP_LOGITECH_DJ_DEVICE,
                USB_VENDOR_ID_LOGITECH, HID_ANY_ID)},
index c3f6f1e..090a1ba 100644 (file)
@@ -294,7 +294,7 @@ int sensor_hub_input_attr_get_raw_value(struct hid_sensor_hub_device *hsdev,
        if (!report)
                return -EINVAL;
 
-       mutex_lock(&hsdev->mutex);
+       mutex_lock(hsdev->mutex_ptr);
        if (flag == SENSOR_HUB_SYNC) {
                memset(&hsdev->pending, 0, sizeof(hsdev->pending));
                init_completion(&hsdev->pending.ready);
@@ -328,7 +328,7 @@ int sensor_hub_input_attr_get_raw_value(struct hid_sensor_hub_device *hsdev,
                kfree(hsdev->pending.raw_data);
                hsdev->pending.status = false;
        }
-       mutex_unlock(&hsdev->mutex);
+       mutex_unlock(hsdev->mutex_ptr);
 
        return ret_val;
 }
@@ -667,7 +667,14 @@ static int sensor_hub_probe(struct hid_device *hdev,
                        hsdev->vendor_id = hdev->vendor;
                        hsdev->product_id = hdev->product;
                        hsdev->usage = collection->usage;
-                       mutex_init(&hsdev->mutex);
+                       hsdev->mutex_ptr = devm_kzalloc(&hdev->dev,
+                                                       sizeof(struct mutex),
+                                                       GFP_KERNEL);
+                       if (!hsdev->mutex_ptr) {
+                               ret = -ENOMEM;
+                               goto err_stop_hw;
+                       }
+                       mutex_init(hsdev->mutex_ptr);
                        hsdev->start_collection_index = i;
                        if (last_hsdev)
                                last_hsdev->end_collection_index = i;
index ab4dd95..92d6cdf 100644 (file)
@@ -862,6 +862,7 @@ static int i2c_hid_acpi_pdata(struct i2c_client *client,
        union acpi_object *obj;
        struct acpi_device *adev;
        acpi_handle handle;
+       int ret;
 
        handle = ACPI_HANDLE(&client->dev);
        if (!handle || acpi_bus_get_device(handle, &adev))
@@ -877,7 +878,9 @@ static int i2c_hid_acpi_pdata(struct i2c_client *client,
        pdata->hid_descriptor_address = obj->integer.value;
        ACPI_FREE(obj);
 
-       return acpi_dev_add_driver_gpios(adev, i2c_hid_acpi_gpios);
+       /* GPIOs are optional */
+       ret = acpi_dev_add_driver_gpios(adev, i2c_hid_acpi_gpios);
+       return ret < 0 && ret != -ENXIO ? ret : 0;
 }
 
 static const struct acpi_device_id i2c_hid_acpi_match[] = {
index a775143..4696895 100644 (file)
@@ -61,6 +61,7 @@ static const struct hid_blacklist {
        { USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_2PORTKVM, HID_QUIRK_NOGET },
        { USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_4PORTKVM, HID_QUIRK_NOGET },
        { USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_4PORTKVMC, HID_QUIRK_NOGET },
+       { USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_CS682, HID_QUIRK_NOGET },
        { USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_FIGHTERSTICK, HID_QUIRK_NOGET },
        { USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_COMBATSTICK, HID_QUIRK_NOGET },
        { USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_FLIGHT_SIM_ECLIPSE_YOKE, HID_QUIRK_NOGET },
index fa54d32..adf959d 100644 (file)
@@ -1072,6 +1072,9 @@ static int wacom_wac_finger_count_touches(struct wacom_wac *wacom)
        int count = 0;
        int i;
 
+       if (!touch_max)
+               return 0;
+
        /* non-HID_GENERIC single touch input doesn't call this routine */
        if ((touch_max == 1) && (wacom->features.type == HID_GENERIC))
                return wacom->hid_data.tipswitch &&
index ed303ba..3e03379 100644 (file)
@@ -63,7 +63,8 @@ MODULE_PARM_DESC(tjmax, "TjMax value in degrees Celsius");
 #define TO_ATTR_NO(cpu)                (TO_CORE_ID(cpu) + BASE_SYSFS_ATTR_NO)
 
 #ifdef CONFIG_SMP
-#define for_each_sibling(i, cpu)       for_each_cpu(i, cpu_sibling_mask(cpu))
+#define for_each_sibling(i, cpu) \
+       for_each_cpu(i, topology_sibling_cpumask(cpu))
 #else
 #define for_each_sibling(i, cpu)       for (i = 0; false; )
 #endif
index f3830db..37f0170 100644 (file)
@@ -439,6 +439,7 @@ nct6683_create_attr_group(struct device *dev, struct sensor_template_group *tg,
                                 (*t)->dev_attr.attr.name, tg->base + i);
                        if ((*t)->s2) {
                                a2 = &su->u.a2;
+                               sysfs_attr_init(&a2->dev_attr.attr);
                                a2->dev_attr.attr.name = su->name;
                                a2->nr = (*t)->u.s.nr + i;
                                a2->index = (*t)->u.s.index;
@@ -449,6 +450,7 @@ nct6683_create_attr_group(struct device *dev, struct sensor_template_group *tg,
                                *attrs = &a2->dev_attr.attr;
                        } else {
                                a = &su->u.a1;
+                               sysfs_attr_init(&a->dev_attr.attr);
                                a->dev_attr.attr.name = su->name;
                                a->index = (*t)->u.index + i;
                                a->dev_attr.attr.mode =
index 4fcb481..bd1c99d 100644 (file)
@@ -995,6 +995,7 @@ nct6775_create_attr_group(struct device *dev, struct sensor_template_group *tg,
                                 (*t)->dev_attr.attr.name, tg->base + i);
                        if ((*t)->s2) {
                                a2 = &su->u.a2;
+                               sysfs_attr_init(&a2->dev_attr.attr);
                                a2->dev_attr.attr.name = su->name;
                                a2->nr = (*t)->u.s.nr + i;
                                a2->index = (*t)->u.s.index;
@@ -1005,6 +1006,7 @@ nct6775_create_attr_group(struct device *dev, struct sensor_template_group *tg,
                                *attrs = &a2->dev_attr.attr;
                        } else {
                                a = &su->u.a1;
+                               sysfs_attr_init(&a->dev_attr.attr);
                                a->dev_attr.attr.name = su->name;
                                a->index = (*t)->u.index + i;
                                a->dev_attr.attr.mode =
index 112e4d4..6880011 100644 (file)
@@ -239,8 +239,10 @@ static struct ntc_thermistor_platform_data *
 ntc_thermistor_parse_dt(struct platform_device *pdev)
 {
        struct iio_channel *chan;
+       enum iio_chan_type type;
        struct device_node *np = pdev->dev.of_node;
        struct ntc_thermistor_platform_data *pdata;
+       int ret;
 
        if (!np)
                return NULL;
@@ -253,6 +255,13 @@ ntc_thermistor_parse_dt(struct platform_device *pdev)
        if (IS_ERR(chan))
                return ERR_CAST(chan);
 
+       ret = iio_get_channel_type(chan, &type);
+       if (ret < 0)
+               return ERR_PTR(ret);
+
+       if (type != IIO_VOLTAGE)
+               return ERR_PTR(-EINVAL);
+
        if (of_property_read_u32(np, "pullup-uv", &pdata->pullup_uv))
                return ERR_PTR(-ENODEV);
        if (of_property_read_u32(np, "pullup-ohm", &pdata->pullup_ohm))
index 99664eb..ccf4cff 100644 (file)
@@ -44,7 +44,7 @@
 #include <linux/sysfs.h>
 
 /* Addresses to scan */
-static const unsigned short normal_i2c[] = { 0x37, 0x48, 0x49, 0x4a, 0x4c, 0x4d,
+static const unsigned short normal_i2c[] = { 0x48, 0x49, 0x4a, 0x4c, 0x4d,
        0x4e, 0x4f, I2C_CLIENT_END };
 
 enum chips { tmp401, tmp411, tmp431, tmp432, tmp435 };
index 8fe78d0..7c69664 100644 (file)
@@ -554,4 +554,4 @@ module_platform_driver(hix5hd2_i2c_driver);
 MODULE_DESCRIPTION("Hix5hd2 I2C Bus driver");
 MODULE_AUTHOR("Wei Yan <sledge.yanwei@huawei.com>");
 MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:i2c-hix5hd2");
+MODULE_ALIAS("platform:hix5hd2-i2c");
index 958c8db..297e9c9 100644 (file)
@@ -1143,6 +1143,7 @@ static int s3c24xx_i2c_probe(struct platform_device *pdev)
                return -ENOMEM;
 
        i2c->quirks = s3c24xx_get_device_quirks(pdev);
+       i2c->sysreg = ERR_PTR(-ENOENT);
        if (pdata)
                memcpy(i2c->pdata, pdata, sizeof(*pdata));
        else
index a04c49f..39ea67f 100644 (file)
@@ -643,15 +643,6 @@ config BLK_DEV_TC86C001
        help
        This driver adds support for Toshiba TC86C001 GOKU-S chip.
 
-config BLK_DEV_CELLEB
-       tristate "Toshiba's Cell Reference Set IDE support"
-       depends on PPC_CELLEB
-       select BLK_DEV_IDEDMA_PCI
-       help
-         This driver provides support for the on-board IDE controller on
-         Toshiba Cell Reference Board.
-         If unsure, say Y.
-
 endif
 
 # TODO: BLK_DEV_IDEDMA_PCI -> BLK_DEV_IDEDMA_SFF
index a04ee82..2a8c417 100644 (file)
@@ -38,7 +38,6 @@ obj-$(CONFIG_BLK_DEV_AEC62XX)         += aec62xx.o
 obj-$(CONFIG_BLK_DEV_ALI15X3)          += alim15x3.o
 obj-$(CONFIG_BLK_DEV_AMD74XX)          += amd74xx.o
 obj-$(CONFIG_BLK_DEV_ATIIXP)           += atiixp.o
-obj-$(CONFIG_BLK_DEV_CELLEB)           += scc_pata.o
 obj-$(CONFIG_BLK_DEV_CMD64X)           += cmd64x.o
 obj-$(CONFIG_BLK_DEV_CS5520)           += cs5520.o
 obj-$(CONFIG_BLK_DEV_CS5530)           += cs5530.o
diff --git a/drivers/ide/scc_pata.c b/drivers/ide/scc_pata.c
deleted file mode 100644 (file)
index 2a2d188..0000000
+++ /dev/null
@@ -1,887 +0,0 @@
-/*
- * Support for IDE interfaces on Celleb platform
- *
- * (C) Copyright 2006 TOSHIBA CORPORATION
- *
- * This code is based on drivers/ide/pci/siimage.c:
- * Copyright (C) 2001-2002     Andre Hedrick <andre@linux-ide.org>
- * Copyright (C) 2003          Red Hat
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/delay.h>
-#include <linux/ide.h>
-#include <linux/init.h>
-
-#define PCI_DEVICE_ID_TOSHIBA_SCC_ATA            0x01b4
-
-#define SCC_PATA_NAME           "scc IDE"
-
-#define TDVHSEL_MASTER          0x00000001
-#define TDVHSEL_SLAVE           0x00000004
-
-#define MODE_JCUSFEN            0x00000080
-
-#define CCKCTRL_ATARESET        0x00040000
-#define CCKCTRL_BUFCNT          0x00020000
-#define CCKCTRL_CRST            0x00010000
-#define CCKCTRL_OCLKEN          0x00000100
-#define CCKCTRL_ATACLKOEN       0x00000002
-#define CCKCTRL_LCLKEN          0x00000001
-
-#define QCHCD_IOS_SS           0x00000001
-
-#define QCHSD_STPDIAG          0x00020000
-
-#define INTMASK_MSK             0xD1000012
-#define INTSTS_SERROR          0x80000000
-#define INTSTS_PRERR           0x40000000
-#define INTSTS_RERR            0x10000000
-#define INTSTS_ICERR           0x01000000
-#define INTSTS_BMSINT          0x00000010
-#define INTSTS_BMHE            0x00000008
-#define INTSTS_IOIRQS           0x00000004
-#define INTSTS_INTRQ            0x00000002
-#define INTSTS_ACTEINT          0x00000001
-
-#define ECMODE_VALUE 0x01
-
-static struct scc_ports {
-       unsigned long ctl, dma;
-       struct ide_host *host;  /* for removing port from system */
-} scc_ports[MAX_HWIFS];
-
-/* PIO transfer mode  table */
-/* JCHST */
-static unsigned long JCHSTtbl[2][7] = {
-       {0x0E, 0x05, 0x02, 0x03, 0x02, 0x00, 0x00},   /* 100MHz */
-       {0x13, 0x07, 0x04, 0x04, 0x03, 0x00, 0x00}    /* 133MHz */
-};
-
-/* JCHHT */
-static unsigned long JCHHTtbl[2][7] = {
-       {0x0E, 0x02, 0x02, 0x02, 0x02, 0x00, 0x00},   /* 100MHz */
-       {0x13, 0x03, 0x03, 0x03, 0x03, 0x00, 0x00}    /* 133MHz */
-};
-
-/* JCHCT */
-static unsigned long JCHCTtbl[2][7] = {
-       {0x1D, 0x1D, 0x1C, 0x0B, 0x06, 0x00, 0x00},   /* 100MHz */
-       {0x27, 0x26, 0x26, 0x0E, 0x09, 0x00, 0x00}    /* 133MHz */
-};
-
-
-/* DMA transfer mode  table */
-/* JCHDCTM/JCHDCTS */
-static unsigned long JCHDCTxtbl[2][7] = {
-       {0x0A, 0x06, 0x04, 0x03, 0x01, 0x00, 0x00},   /* 100MHz */
-       {0x0E, 0x09, 0x06, 0x04, 0x02, 0x01, 0x00}    /* 133MHz */
-};
-
-/* JCSTWTM/JCSTWTS  */
-static unsigned long JCSTWTxtbl[2][7] = {
-       {0x06, 0x04, 0x03, 0x02, 0x02, 0x02, 0x00},   /* 100MHz */
-       {0x09, 0x06, 0x04, 0x02, 0x02, 0x02, 0x02}    /* 133MHz */
-};
-
-/* JCTSS */
-static unsigned long JCTSStbl[2][7] = {
-       {0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x00},   /* 100MHz */
-       {0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05}    /* 133MHz */
-};
-
-/* JCENVT */
-static unsigned long JCENVTtbl[2][7] = {
-       {0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00},   /* 100MHz */
-       {0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02}    /* 133MHz */
-};
-
-/* JCACTSELS/JCACTSELM */
-static unsigned long JCACTSELtbl[2][7] = {
-       {0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00},   /* 100MHz */
-       {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}    /* 133MHz */
-};
-
-
-static u8 scc_ide_inb(unsigned long port)
-{
-       u32 data = in_be32((void*)port);
-       return (u8)data;
-}
-
-static void scc_exec_command(ide_hwif_t *hwif, u8 cmd)
-{
-       out_be32((void *)hwif->io_ports.command_addr, cmd);
-       eieio();
-       in_be32((void *)(hwif->dma_base + 0x01c));
-       eieio();
-}
-
-static u8 scc_read_status(ide_hwif_t *hwif)
-{
-       return (u8)in_be32((void *)hwif->io_ports.status_addr);
-}
-
-static u8 scc_read_altstatus(ide_hwif_t *hwif)
-{
-       return (u8)in_be32((void *)hwif->io_ports.ctl_addr);
-}
-
-static u8 scc_dma_sff_read_status(ide_hwif_t *hwif)
-{
-       return (u8)in_be32((void *)(hwif->dma_base + 4));
-}
-
-static void scc_write_devctl(ide_hwif_t *hwif, u8 ctl)
-{
-       out_be32((void *)hwif->io_ports.ctl_addr, ctl);
-       eieio();
-       in_be32((void *)(hwif->dma_base + 0x01c));
-       eieio();
-}
-
-static void scc_ide_insw(unsigned long port, void *addr, u32 count)
-{
-       u16 *ptr = (u16 *)addr;
-       while (count--) {
-               *ptr++ = le16_to_cpu(in_be32((void*)port));
-       }
-}
-
-static void scc_ide_insl(unsigned long port, void *addr, u32 count)
-{
-       u16 *ptr = (u16 *)addr;
-       while (count--) {
-               *ptr++ = le16_to_cpu(in_be32((void*)port));
-               *ptr++ = le16_to_cpu(in_be32((void*)port));
-       }
-}
-
-static void scc_ide_outb(u8 addr, unsigned long port)
-{
-       out_be32((void*)port, addr);
-}
-
-static void
-scc_ide_outsw(unsigned long port, void *addr, u32 count)
-{
-       u16 *ptr = (u16 *)addr;
-       while (count--) {
-               out_be32((void*)port, cpu_to_le16(*ptr++));
-       }
-}
-
-static void
-scc_ide_outsl(unsigned long port, void *addr, u32 count)
-{
-       u16 *ptr = (u16 *)addr;
-       while (count--) {
-               out_be32((void*)port, cpu_to_le16(*ptr++));
-               out_be32((void*)port, cpu_to_le16(*ptr++));
-       }
-}
-
-/**
- *     scc_set_pio_mode        -       set host controller for PIO mode
- *     @hwif: port
- *     @drive: drive
- *
- *     Load the timing settings for this device mode into the
- *     controller.
- */
-
-static void scc_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-       struct scc_ports *ports = ide_get_hwifdata(hwif);
-       unsigned long ctl_base = ports->ctl;
-       unsigned long cckctrl_port = ctl_base + 0xff0;
-       unsigned long piosht_port = ctl_base + 0x000;
-       unsigned long pioct_port = ctl_base + 0x004;
-       unsigned long reg;
-       int offset;
-       const u8 pio = drive->pio_mode - XFER_PIO_0;
-
-       reg = in_be32((void __iomem *)cckctrl_port);
-       if (reg & CCKCTRL_ATACLKOEN) {
-               offset = 1; /* 133MHz */
-       } else {
-               offset = 0; /* 100MHz */
-       }
-       reg = JCHSTtbl[offset][pio] << 16 | JCHHTtbl[offset][pio];
-       out_be32((void __iomem *)piosht_port, reg);
-       reg = JCHCTtbl[offset][pio];
-       out_be32((void __iomem *)pioct_port, reg);
-}
-
-/**
- *     scc_set_dma_mode        -       set host controller for DMA mode
- *     @hwif: port
- *     @drive: drive
- *
- *     Load the timing settings for this device mode into the
- *     controller.
- */
-
-static void scc_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-       struct scc_ports *ports = ide_get_hwifdata(hwif);
-       unsigned long ctl_base = ports->ctl;
-       unsigned long cckctrl_port = ctl_base + 0xff0;
-       unsigned long mdmact_port = ctl_base + 0x008;
-       unsigned long mcrcst_port = ctl_base + 0x00c;
-       unsigned long sdmact_port = ctl_base + 0x010;
-       unsigned long scrcst_port = ctl_base + 0x014;
-       unsigned long udenvt_port = ctl_base + 0x018;
-       unsigned long tdvhsel_port   = ctl_base + 0x020;
-       int is_slave = drive->dn & 1;
-       int offset, idx;
-       unsigned long reg;
-       unsigned long jcactsel;
-       const u8 speed = drive->dma_mode;
-
-       reg = in_be32((void __iomem *)cckctrl_port);
-       if (reg & CCKCTRL_ATACLKOEN) {
-               offset = 1; /* 133MHz */
-       } else {
-               offset = 0; /* 100MHz */
-       }
-
-       idx = speed - XFER_UDMA_0;
-
-       jcactsel = JCACTSELtbl[offset][idx];
-       if (is_slave) {
-               out_be32((void __iomem *)sdmact_port, JCHDCTxtbl[offset][idx]);
-               out_be32((void __iomem *)scrcst_port, JCSTWTxtbl[offset][idx]);
-               jcactsel = jcactsel << 2;
-               out_be32((void __iomem *)tdvhsel_port, (in_be32((void __iomem *)tdvhsel_port) & ~TDVHSEL_SLAVE) | jcactsel);
-       } else {
-               out_be32((void __iomem *)mdmact_port, JCHDCTxtbl[offset][idx]);
-               out_be32((void __iomem *)mcrcst_port, JCSTWTxtbl[offset][idx]);
-               out_be32((void __iomem *)tdvhsel_port, (in_be32((void __iomem *)tdvhsel_port) & ~TDVHSEL_MASTER) | jcactsel);
-       }
-       reg = JCTSStbl[offset][idx] << 16 | JCENVTtbl[offset][idx];
-       out_be32((void __iomem *)udenvt_port, reg);
-}
-
-static void scc_dma_host_set(ide_drive_t *drive, int on)
-{
-       ide_hwif_t *hwif = drive->hwif;
-       u8 unit = drive->dn & 1;
-       u8 dma_stat = scc_dma_sff_read_status(hwif);
-
-       if (on)
-               dma_stat |= (1 << (5 + unit));
-       else
-               dma_stat &= ~(1 << (5 + unit));
-
-       scc_ide_outb(dma_stat, hwif->dma_base + 4);
-}
-
-/**
- *     scc_dma_setup   -       begin a DMA phase
- *     @drive: target device
- *     @cmd: command
- *
- *     Build an IDE DMA PRD (IDE speak for scatter gather table)
- *     and then set up the DMA transfer registers.
- *
- *     Returns 0 on success. If a PIO fallback is required then 1
- *     is returned.
- */
-
-static int scc_dma_setup(ide_drive_t *drive, struct ide_cmd *cmd)
-{
-       ide_hwif_t *hwif = drive->hwif;
-       u32 rw = (cmd->tf_flags & IDE_TFLAG_WRITE) ? 0 : ATA_DMA_WR;
-       u8 dma_stat;
-
-       /* fall back to pio! */
-       if (ide_build_dmatable(drive, cmd) == 0)
-               return 1;
-
-       /* PRD table */
-       out_be32((void __iomem *)(hwif->dma_base + 8), hwif->dmatable_dma);
-
-       /* specify r/w */
-       out_be32((void __iomem *)hwif->dma_base, rw);
-
-       /* read DMA status for INTR & ERROR flags */
-       dma_stat = scc_dma_sff_read_status(hwif);
-
-       /* clear INTR & ERROR flags */
-       out_be32((void __iomem *)(hwif->dma_base + 4), dma_stat | 6);
-
-       return 0;
-}
-
-static void scc_dma_start(ide_drive_t *drive)
-{
-       ide_hwif_t *hwif = drive->hwif;
-       u8 dma_cmd = scc_ide_inb(hwif->dma_base);
-
-       /* start DMA */
-       scc_ide_outb(dma_cmd | 1, hwif->dma_base);
-}
-
-static int __scc_dma_end(ide_drive_t *drive)
-{
-       ide_hwif_t *hwif = drive->hwif;
-       u8 dma_stat, dma_cmd;
-
-       /* get DMA command mode */
-       dma_cmd = scc_ide_inb(hwif->dma_base);
-       /* stop DMA */
-       scc_ide_outb(dma_cmd & ~1, hwif->dma_base);
-       /* get DMA status */
-       dma_stat = scc_dma_sff_read_status(hwif);
-       /* clear the INTR & ERROR bits */
-       scc_ide_outb(dma_stat | 6, hwif->dma_base + 4);
-       /* verify good DMA status */
-       return (dma_stat & 7) != 4 ? (0x10 | dma_stat) : 0;
-}
-
-/**
- *     scc_dma_end     -       Stop DMA
- *     @drive: IDE drive
- *
- *     Check and clear INT Status register.
- *     Then call __scc_dma_end().
- */
-
-static int scc_dma_end(ide_drive_t *drive)
-{
-       ide_hwif_t *hwif = drive->hwif;
-       void __iomem *dma_base = (void __iomem *)hwif->dma_base;
-       unsigned long intsts_port = hwif->dma_base + 0x014;
-       u32 reg;
-       int dma_stat, data_loss = 0;
-       static int retry = 0;
-
-       /* errata A308 workaround: Step5 (check data loss) */
-       /* We don't check non ide_disk because it is limited to UDMA4 */
-       if (!(in_be32((void __iomem *)hwif->io_ports.ctl_addr)
-             & ATA_ERR) &&
-           drive->media == ide_disk && drive->current_speed > XFER_UDMA_4) {
-               reg = in_be32((void __iomem *)intsts_port);
-               if (!(reg & INTSTS_ACTEINT)) {
-                       printk(KERN_WARNING "%s: operation failed (transfer data loss)\n",
-                              drive->name);
-                       data_loss = 1;
-                       if (retry++) {
-                               struct request *rq = hwif->rq;
-                               ide_drive_t *drive;
-                               int i;
-
-                               /* ERROR_RESET and drive->crc_count are needed
-                                * to reduce DMA transfer mode in retry process.
-                                */
-                               if (rq)
-                                       rq->errors |= ERROR_RESET;
-
-                               ide_port_for_each_dev(i, drive, hwif)
-                                       drive->crc_count++;
-                       }
-               }
-       }
-
-       while (1) {
-               reg = in_be32((void __iomem *)intsts_port);
-
-               if (reg & INTSTS_SERROR) {
-                       printk(KERN_WARNING "%s: SERROR\n", SCC_PATA_NAME);
-                       out_be32((void __iomem *)intsts_port, INTSTS_SERROR|INTSTS_BMSINT);
-
-                       out_be32(dma_base, in_be32(dma_base) & ~QCHCD_IOS_SS);
-                       continue;
-               }
-
-               if (reg & INTSTS_PRERR) {
-                       u32 maea0, maec0;
-                       unsigned long ctl_base = hwif->config_data;
-
-                       maea0 = in_be32((void __iomem *)(ctl_base + 0xF50));
-                       maec0 = in_be32((void __iomem *)(ctl_base + 0xF54));
-
-                       printk(KERN_WARNING "%s: PRERR [addr:%x cmd:%x]\n", SCC_PATA_NAME, maea0, maec0);
-
-                       out_be32((void __iomem *)intsts_port, INTSTS_PRERR|INTSTS_BMSINT);
-
-                       out_be32(dma_base, in_be32(dma_base) & ~QCHCD_IOS_SS);
-                       continue;
-               }
-
-               if (reg & INTSTS_RERR) {
-                       printk(KERN_WARNING "%s: Response Error\n", SCC_PATA_NAME);
-                       out_be32((void __iomem *)intsts_port, INTSTS_RERR|INTSTS_BMSINT);
-
-                       out_be32(dma_base, in_be32(dma_base) & ~QCHCD_IOS_SS);
-                       continue;
-               }
-
-               if (reg & INTSTS_ICERR) {
-                       out_be32(dma_base, in_be32(dma_base) & ~QCHCD_IOS_SS);
-
-                       printk(KERN_WARNING "%s: Illegal Configuration\n", SCC_PATA_NAME);
-                       out_be32((void __iomem *)intsts_port, INTSTS_ICERR|INTSTS_BMSINT);
-                       continue;
-               }
-
-               if (reg & INTSTS_BMSINT) {
-                       printk(KERN_WARNING "%s: Internal Bus Error\n", SCC_PATA_NAME);
-                       out_be32((void __iomem *)intsts_port, INTSTS_BMSINT);
-
-                       ide_do_reset(drive);
-                       continue;
-               }
-
-               if (reg & INTSTS_BMHE) {
-                       out_be32((void __iomem *)intsts_port, INTSTS_BMHE);
-                       continue;
-               }
-
-               if (reg & INTSTS_ACTEINT) {
-                       out_be32((void __iomem *)intsts_port, INTSTS_ACTEINT);
-                       continue;
-               }
-
-               if (reg & INTSTS_IOIRQS) {
-                       out_be32((void __iomem *)intsts_port, INTSTS_IOIRQS);
-                       continue;
-               }
-               break;
-       }
-
-       dma_stat = __scc_dma_end(drive);
-       if (data_loss)
-               dma_stat |= 2; /* emulate DMA error (to retry command) */
-       return dma_stat;
-}
-
-/* returns 1 if dma irq issued, 0 otherwise */
-static int scc_dma_test_irq(ide_drive_t *drive)
-{
-       ide_hwif_t *hwif = drive->hwif;
-       u32 int_stat = in_be32((void __iomem *)hwif->dma_base + 0x014);
-
-       /* SCC errata A252,A308 workaround: Step4 */
-       if ((in_be32((void __iomem *)hwif->io_ports.ctl_addr)
-            & ATA_ERR) &&
-           (int_stat & INTSTS_INTRQ))
-               return 1;
-
-       /* SCC errata A308 workaround: Step5 (polling IOIRQS) */
-       if (int_stat & INTSTS_IOIRQS)
-               return 1;
-
-       return 0;
-}
-
-static u8 scc_udma_filter(ide_drive_t *drive)
-{
-       ide_hwif_t *hwif = drive->hwif;
-       u8 mask = hwif->ultra_mask;
-
-       /* errata A308 workaround: limit non ide_disk drive to UDMA4 */
-       if ((drive->media != ide_disk) && (mask & 0xE0)) {
-               printk(KERN_INFO "%s: limit %s to UDMA4\n",
-                      SCC_PATA_NAME, drive->name);
-               mask = ATA_UDMA4;
-       }
-
-       return mask;
-}
-
-/**
- *     setup_mmio_scc  -       map CTRL/BMID region
- *     @dev: PCI device we are configuring
- *     @name: device name
- *
- */
-
-static int setup_mmio_scc (struct pci_dev *dev, const char *name)
-{
-       void __iomem *ctl_addr;
-       void __iomem *dma_addr;
-       int i, ret;
-
-       for (i = 0; i < MAX_HWIFS; i++) {
-               if (scc_ports[i].ctl == 0)
-                       break;
-       }
-       if (i >= MAX_HWIFS)
-               return -ENOMEM;
-
-       ret = pci_request_selected_regions(dev, (1 << 2) - 1, name);
-       if (ret < 0) {
-               printk(KERN_ERR "%s: can't reserve resources\n", name);
-               return ret;
-       }
-
-       ctl_addr = pci_ioremap_bar(dev, 0);
-       if (!ctl_addr)
-               goto fail_0;
-
-       dma_addr = pci_ioremap_bar(dev, 1);
-       if (!dma_addr)
-               goto fail_1;
-
-       pci_set_master(dev);
-       scc_ports[i].ctl = (unsigned long)ctl_addr;
-       scc_ports[i].dma = (unsigned long)dma_addr;
-       pci_set_drvdata(dev, (void *) &scc_ports[i]);
-
-       return 1;
-
- fail_1:
-       iounmap(ctl_addr);
- fail_0:
-       return -ENOMEM;
-}
-
-static int scc_ide_setup_pci_device(struct pci_dev *dev,
-                                   const struct ide_port_info *d)
-{
-       struct scc_ports *ports = pci_get_drvdata(dev);
-       struct ide_host *host;
-       struct ide_hw hw, *hws[] = { &hw };
-       int i, rc;
-
-       memset(&hw, 0, sizeof(hw));
-       for (i = 0; i <= 8; i++)
-               hw.io_ports_array[i] = ports->dma + 0x20 + i * 4;
-       hw.irq = dev->irq;
-       hw.dev = &dev->dev;
-
-       rc = ide_host_add(d, hws, 1, &host);
-       if (rc)
-               return rc;
-
-       ports->host = host;
-
-       return 0;
-}
-
-/**
- *     init_setup_scc  -       set up an SCC PATA Controller
- *     @dev: PCI device
- *     @d: IDE port info
- *
- *     Perform the initial set up for this device.
- */
-
-static int init_setup_scc(struct pci_dev *dev, const struct ide_port_info *d)
-{
-       unsigned long ctl_base;
-       unsigned long dma_base;
-       unsigned long cckctrl_port;
-       unsigned long intmask_port;
-       unsigned long mode_port;
-       unsigned long ecmode_port;
-       u32 reg = 0;
-       struct scc_ports *ports;
-       int rc;
-
-       rc = pci_enable_device(dev);
-       if (rc)
-               goto end;
-
-       rc = setup_mmio_scc(dev, d->name);
-       if (rc < 0)
-               goto end;
-
-       ports = pci_get_drvdata(dev);
-       ctl_base = ports->ctl;
-       dma_base = ports->dma;
-       cckctrl_port = ctl_base + 0xff0;
-       intmask_port = dma_base + 0x010;
-       mode_port = ctl_base + 0x024;
-       ecmode_port = ctl_base + 0xf00;
-
-       /* controller initialization */
-       reg = 0;
-       out_be32((void*)cckctrl_port, reg);
-       reg |= CCKCTRL_ATACLKOEN;
-       out_be32((void*)cckctrl_port, reg);
-       reg |= CCKCTRL_LCLKEN | CCKCTRL_OCLKEN;
-       out_be32((void*)cckctrl_port, reg);
-       reg |= CCKCTRL_CRST;
-       out_be32((void*)cckctrl_port, reg);
-
-       for (;;) {
-               reg = in_be32((void*)cckctrl_port);
-               if (reg & CCKCTRL_CRST)
-                       break;
-               udelay(5000);
-       }
-
-       reg |= CCKCTRL_ATARESET;
-       out_be32((void*)cckctrl_port, reg);
-
-       out_be32((void*)ecmode_port, ECMODE_VALUE);
-       out_be32((void*)mode_port, MODE_JCUSFEN);
-       out_be32((void*)intmask_port, INTMASK_MSK);
-
-       rc = scc_ide_setup_pci_device(dev, d);
-
- end:
-       return rc;
-}
-
-static void scc_tf_load(ide_drive_t *drive, struct ide_taskfile *tf, u8 valid)
-{
-       struct ide_io_ports *io_ports = &drive->hwif->io_ports;
-
-       if (valid & IDE_VALID_FEATURE)
-               scc_ide_outb(tf->feature, io_ports->feature_addr);
-       if (valid & IDE_VALID_NSECT)
-               scc_ide_outb(tf->nsect, io_ports->nsect_addr);
-       if (valid & IDE_VALID_LBAL)
-               scc_ide_outb(tf->lbal, io_ports->lbal_addr);
-       if (valid & IDE_VALID_LBAM)
-               scc_ide_outb(tf->lbam, io_ports->lbam_addr);
-       if (valid & IDE_VALID_LBAH)
-               scc_ide_outb(tf->lbah, io_ports->lbah_addr);
-       if (valid & IDE_VALID_DEVICE)
-               scc_ide_outb(tf->device, io_ports->device_addr);
-}
-
-static void scc_tf_read(ide_drive_t *drive, struct ide_taskfile *tf, u8 valid)
-{
-       struct ide_io_ports *io_ports = &drive->hwif->io_ports;
-
-       if (valid & IDE_VALID_ERROR)
-               tf->error  = scc_ide_inb(io_ports->feature_addr);
-       if (valid & IDE_VALID_NSECT)
-               tf->nsect  = scc_ide_inb(io_ports->nsect_addr);
-       if (valid & IDE_VALID_LBAL)
-               tf->lbal   = scc_ide_inb(io_ports->lbal_addr);
-       if (valid & IDE_VALID_LBAM)
-               tf->lbam   = scc_ide_inb(io_ports->lbam_addr);
-       if (valid & IDE_VALID_LBAH)
-               tf->lbah   = scc_ide_inb(io_ports->lbah_addr);
-       if (valid & IDE_VALID_DEVICE)
-               tf->device = scc_ide_inb(io_ports->device_addr);
-}
-
-static void scc_input_data(ide_drive_t *drive, struct ide_cmd *cmd,
-                          void *buf, unsigned int len)
-{
-       unsigned long data_addr = drive->hwif->io_ports.data_addr;
-
-       len++;
-
-       if (drive->io_32bit) {
-               scc_ide_insl(data_addr, buf, len / 4);
-
-               if ((len & 3) >= 2)
-                       scc_ide_insw(data_addr, (u8 *)buf + (len & ~3), 1);
-       } else
-               scc_ide_insw(data_addr, buf, len / 2);
-}
-
-static void scc_output_data(ide_drive_t *drive,  struct ide_cmd *cmd,
-                           void *buf, unsigned int len)
-{
-       unsigned long data_addr = drive->hwif->io_ports.data_addr;
-
-       len++;
-
-       if (drive->io_32bit) {
-               scc_ide_outsl(data_addr, buf, len / 4);
-
-               if ((len & 3) >= 2)
-                       scc_ide_outsw(data_addr, (u8 *)buf + (len & ~3), 1);
-       } else
-               scc_ide_outsw(data_addr, buf, len / 2);
-}
-
-/**
- *     init_mmio_iops_scc      -       set up the iops for MMIO
- *     @hwif: interface to set up
- *
- */
-
-static void init_mmio_iops_scc(ide_hwif_t *hwif)
-{
-       struct pci_dev *dev = to_pci_dev(hwif->dev);
-       struct scc_ports *ports = pci_get_drvdata(dev);
-       unsigned long dma_base = ports->dma;
-
-       ide_set_hwifdata(hwif, ports);
-
-       hwif->dma_base = dma_base;
-       hwif->config_data = ports->ctl;
-}
-
-/**
- *     init_iops_scc   -       set up iops
- *     @hwif: interface to set up
- *
- *     Do the basic setup for the SCC hardware interface
- *     and then do the MMIO setup.
- */
-
-static void init_iops_scc(ide_hwif_t *hwif)
-{
-       struct pci_dev *dev = to_pci_dev(hwif->dev);
-
-       hwif->hwif_data = NULL;
-       if (pci_get_drvdata(dev) == NULL)
-               return;
-       init_mmio_iops_scc(hwif);
-}
-
-static int scc_init_dma(ide_hwif_t *hwif, const struct ide_port_info *d)
-{
-       return ide_allocate_dma_engine(hwif);
-}
-
-static u8 scc_cable_detect(ide_hwif_t *hwif)
-{
-       return ATA_CBL_PATA80;
-}
-
-/**
- *     init_hwif_scc   -       set up hwif
- *     @hwif: interface to set up
- *
- *     We do the basic set up of the interface structure. The SCC
- *     requires several custom handlers so we override the default
- *     ide DMA handlers appropriately.
- */
-
-static void init_hwif_scc(ide_hwif_t *hwif)
-{
-       /* PTERADD */
-       out_be32((void __iomem *)(hwif->dma_base + 0x018), hwif->dmatable_dma);
-
-       if (in_be32((void __iomem *)(hwif->config_data + 0xff0)) & CCKCTRL_ATACLKOEN)
-               hwif->ultra_mask = ATA_UDMA6; /* 133MHz */
-       else
-               hwif->ultra_mask = ATA_UDMA5; /* 100MHz */
-}
-
-static const struct ide_tp_ops scc_tp_ops = {
-       .exec_command           = scc_exec_command,
-       .read_status            = scc_read_status,
-       .read_altstatus         = scc_read_altstatus,
-       .write_devctl           = scc_write_devctl,
-
-       .dev_select             = ide_dev_select,
-       .tf_load                = scc_tf_load,
-       .tf_read                = scc_tf_read,
-
-       .input_data             = scc_input_data,
-       .output_data            = scc_output_data,
-};
-
-static const struct ide_port_ops scc_port_ops = {
-       .set_pio_mode           = scc_set_pio_mode,
-       .set_dma_mode           = scc_set_dma_mode,
-       .udma_filter            = scc_udma_filter,
-       .cable_detect           = scc_cable_detect,
-};
-
-static const struct ide_dma_ops scc_dma_ops = {
-       .dma_host_set           = scc_dma_host_set,
-       .dma_setup              = scc_dma_setup,
-       .dma_start              = scc_dma_start,
-       .dma_end                = scc_dma_end,
-       .dma_test_irq           = scc_dma_test_irq,
-       .dma_lost_irq           = ide_dma_lost_irq,
-       .dma_timer_expiry       = ide_dma_sff_timer_expiry,
-       .dma_sff_read_status    = scc_dma_sff_read_status,
-};
-
-static const struct ide_port_info scc_chipset = {
-       .name           = "sccIDE",
-       .init_iops      = init_iops_scc,
-       .init_dma       = scc_init_dma,
-       .init_hwif      = init_hwif_scc,
-       .tp_ops         = &scc_tp_ops,
-       .port_ops       = &scc_port_ops,
-       .dma_ops        = &scc_dma_ops,
-       .host_flags     = IDE_HFLAG_SINGLE,
-       .irq_flags      = IRQF_SHARED,
-       .pio_mask       = ATA_PIO4,
-       .chipset        = ide_pci,
-};
-
-/**
- *     scc_init_one    -       pci layer discovery entry
- *     @dev: PCI device
- *     @id: ident table entry
- *
- *     Called by the PCI code when it finds an SCC PATA controller.
- *     We then use the IDE PCI generic helper to do most of the work.
- */
-
-static int scc_init_one(struct pci_dev *dev, const struct pci_device_id *id)
-{
-       return init_setup_scc(dev, &scc_chipset);
-}
-
-/**
- *     scc_remove      -       pci layer remove entry
- *     @dev: PCI device
- *
- *     Called by the PCI code when it removes an SCC PATA controller.
- */
-
-static void scc_remove(struct pci_dev *dev)
-{
-       struct scc_ports *ports = pci_get_drvdata(dev);
-       struct ide_host *host = ports->host;
-
-       ide_host_remove(host);
-
-       iounmap((void*)ports->dma);
-       iounmap((void*)ports->ctl);
-       pci_release_selected_regions(dev, (1 << 2) - 1);
-       memset(ports, 0, sizeof(*ports));
-}
-
-static const struct pci_device_id scc_pci_tbl[] = {
-       { PCI_VDEVICE(TOSHIBA_2, PCI_DEVICE_ID_TOSHIBA_SCC_ATA), 0 },
-       { 0, },
-};
-MODULE_DEVICE_TABLE(pci, scc_pci_tbl);
-
-static struct pci_driver scc_pci_driver = {
-       .name = "SCC IDE",
-       .id_table = scc_pci_tbl,
-       .probe = scc_init_one,
-       .remove = scc_remove,
-};
-
-static int __init scc_ide_init(void)
-{
-       return ide_pci_register_driver(&scc_pci_driver);
-}
-
-static void __exit scc_ide_exit(void)
-{
-       pci_unregister_driver(&scc_pci_driver);
-}
-
-module_init(scc_ide_init);
-module_exit(scc_ide_exit);
-
-MODULE_DESCRIPTION("PCI driver module for Toshiba SCC IDE");
-MODULE_LICENSE("GPL");
index 7f55a6d..c6d5a3a 100644 (file)
@@ -389,7 +389,12 @@ int mma9551_read_config_words(struct i2c_client *client, u8 app_id,
 {
        int ret, i;
        int len_words = len / sizeof(u16);
-       __be16 be_buf[MMA9551_MAX_MAILBOX_DATA_REGS];
+       __be16 be_buf[MMA9551_MAX_MAILBOX_DATA_REGS / 2];
+
+       if (len_words > ARRAY_SIZE(be_buf)) {
+               dev_err(&client->dev, "Invalid buffer size %d\n", len);
+               return -EINVAL;
+       }
 
        ret = mma9551_transfer(client, app_id, MMA9551_CMD_READ_CONFIG,
                               reg, NULL, 0, (u8 *) be_buf, len);
@@ -424,7 +429,12 @@ int mma9551_read_status_words(struct i2c_client *client, u8 app_id,
 {
        int ret, i;
        int len_words = len / sizeof(u16);
-       __be16 be_buf[MMA9551_MAX_MAILBOX_DATA_REGS];
+       __be16 be_buf[MMA9551_MAX_MAILBOX_DATA_REGS / 2];
+
+       if (len_words > ARRAY_SIZE(be_buf)) {
+               dev_err(&client->dev, "Invalid buffer size %d\n", len);
+               return -EINVAL;
+       }
 
        ret = mma9551_transfer(client, app_id, MMA9551_CMD_READ_STATUS,
                               reg, NULL, 0, (u8 *) be_buf, len);
@@ -459,7 +469,12 @@ int mma9551_write_config_words(struct i2c_client *client, u8 app_id,
 {
        int i;
        int len_words = len / sizeof(u16);
-       __be16 be_buf[MMA9551_MAX_MAILBOX_DATA_REGS];
+       __be16 be_buf[(MMA9551_MAX_MAILBOX_DATA_REGS - 1) / 2];
+
+       if (len_words > ARRAY_SIZE(be_buf)) {
+               dev_err(&client->dev, "Invalid buffer size %d\n", len);
+               return -EINVAL;
+       }
 
        for (i = 0; i < len_words; i++)
                be_buf[i] = cpu_to_be16(buf[i]);
index 2df1af7..365a109 100644 (file)
@@ -54,6 +54,7 @@
 #define MMA9553_MASK_CONF_STEPCOALESCE         GENMASK(7, 0)
 
 #define MMA9553_REG_CONF_ACTTHD                        0x0E
+#define MMA9553_MAX_ACTTHD                     GENMASK(15, 0)
 
 /* Pedometer status registers (R-only) */
 #define MMA9553_REG_STATUS                     0x00
@@ -316,22 +317,19 @@ static int mma9553_set_config(struct mma9553_data *data, u16 reg,
 static int mma9553_read_activity_stepcnt(struct mma9553_data *data,
                                         u8 *activity, u16 *stepcnt)
 {
-       u32 status_stepcnt;
-       u16 status;
+       u16 buf[2];
        int ret;
 
        ret = mma9551_read_status_words(data->client, MMA9551_APPID_PEDOMETER,
-                                       MMA9553_REG_STATUS, sizeof(u32),
-                                       (u16 *) &status_stepcnt);
+                                       MMA9553_REG_STATUS, sizeof(u32), buf);
        if (ret < 0) {
                dev_err(&data->client->dev,
                        "error reading status and stepcnt\n");
                return ret;
        }
 
-       status = status_stepcnt & MMA9553_MASK_CONF_WORD;
-       *activity = mma9553_get_bits(status, MMA9553_MASK_STATUS_ACTIVITY);
-       *stepcnt = status_stepcnt >> 16;
+       *activity = mma9553_get_bits(buf[0], MMA9553_MASK_STATUS_ACTIVITY);
+       *stepcnt = buf[1];
 
        return 0;
 }
@@ -872,6 +870,9 @@ static int mma9553_write_event_value(struct iio_dev *indio_dev,
        case IIO_EV_INFO_PERIOD:
                switch (chan->type) {
                case IIO_ACTIVITY:
+                       if (val < 0 || val > MMA9553_ACTIVITY_THD_TO_SEC(
+                           MMA9553_MAX_ACTTHD))
+                               return -EINVAL;
                        mutex_lock(&data->mutex);
                        ret = mma9553_set_config(data, MMA9553_REG_CONF_ACTTHD,
                                                 &data->conf.actthd,
@@ -971,7 +972,8 @@ static const struct iio_chan_spec_ext_info mma9553_ext_info[] = {
        .modified = 1,                                                  \
        .channel2 = _chan2,                                             \
        .info_mask_separate = BIT(IIO_CHAN_INFO_PROCESSED),             \
-       .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_CALIBHEIGHT),     \
+       .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_CALIBHEIGHT) |    \
+                                   BIT(IIO_CHAN_INFO_ENABLE),          \
        .event_spec = mma9553_activity_events,                          \
        .num_event_specs = ARRAY_SIZE(mma9553_activity_events),         \
        .ext_info = mma9553_ext_info,                                   \
index 58d1d13..211b132 100644 (file)
@@ -546,6 +546,7 @@ int st_accel_common_probe(struct iio_dev *indio_dev)
 
        indio_dev->modes = INDIO_DIRECT_MODE;
        indio_dev->info = &accel_info;
+       mutex_init(&adata->tb.buf_lock);
 
        st_sensors_power_enable(indio_dev);
 
index 08bcfb0..56008a8 100644 (file)
@@ -53,39 +53,42 @@ static const struct iio_chan_spec const axp288_adc_channels[] = {
                .channel = 0,
                .address = AXP288_TS_ADC_H,
                .datasheet_name = "TS_PIN",
+               .info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
        }, {
                .indexed = 1,
                .type = IIO_TEMP,
                .channel = 1,
                .address = AXP288_PMIC_ADC_H,
                .datasheet_name = "PMIC_TEMP",
+               .info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
        }, {
                .indexed = 1,
                .type = IIO_TEMP,
                .channel = 2,
                .address = AXP288_GP_ADC_H,
                .datasheet_name = "GPADC",
+               .info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
        }, {
                .indexed = 1,
                .type = IIO_CURRENT,
                .channel = 3,
                .address = AXP20X_BATT_CHRG_I_H,
                .datasheet_name = "BATT_CHG_I",
-               .info_mask_separate = BIT(IIO_CHAN_INFO_PROCESSED),
+               .info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
        }, {
                .indexed = 1,
                .type = IIO_CURRENT,
                .channel = 4,
                .address = AXP20X_BATT_DISCHRG_I_H,
                .datasheet_name = "BATT_DISCHRG_I",
-               .info_mask_separate = BIT(IIO_CHAN_INFO_PROCESSED),
+               .info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
        }, {
                .indexed = 1,
                .type = IIO_VOLTAGE,
                .channel = 5,
                .address = AXP20X_BATT_V_H,
                .datasheet_name = "BATT_V",
-               .info_mask_separate = BIT(IIO_CHAN_INFO_PROCESSED),
+               .info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
        },
 };
 
@@ -151,9 +154,6 @@ static int axp288_adc_read_raw(struct iio_dev *indio_dev,
                                                chan->address))
                        dev_err(&indio_dev->dev, "TS pin restore\n");
                break;
-       case IIO_CHAN_INFO_PROCESSED:
-               ret = axp288_adc_read_channel(val, chan->address, info->regmap);
-               break;
        default:
                ret = -EINVAL;
        }
index 51e2a83..115f6e9 100644 (file)
@@ -35,8 +35,9 @@
 #define CC10001_ADC_EOC_SET            BIT(0)
 
 #define CC10001_ADC_CHSEL_SAMPLED      0x0c
-#define CC10001_ADC_POWER_UP           0x10
-#define CC10001_ADC_POWER_UP_SET       BIT(0)
+#define CC10001_ADC_POWER_DOWN         0x10
+#define CC10001_ADC_POWER_DOWN_SET     BIT(0)
+
 #define CC10001_ADC_DEBUG              0x14
 #define CC10001_ADC_DATA_COUNT         0x20
 
@@ -62,7 +63,6 @@ struct cc10001_adc_device {
        u16 *buf;
 
        struct mutex lock;
-       unsigned long channel_map;
        unsigned int start_delay_ns;
        unsigned int eoc_delay_ns;
 };
@@ -79,6 +79,18 @@ static inline u32 cc10001_adc_read_reg(struct cc10001_adc_device *adc_dev,
        return readl(adc_dev->reg_base + reg);
 }
 
+static void cc10001_adc_power_up(struct cc10001_adc_device *adc_dev)
+{
+       cc10001_adc_write_reg(adc_dev, CC10001_ADC_POWER_DOWN, 0);
+       ndelay(adc_dev->start_delay_ns);
+}
+
+static void cc10001_adc_power_down(struct cc10001_adc_device *adc_dev)
+{
+       cc10001_adc_write_reg(adc_dev, CC10001_ADC_POWER_DOWN,
+                             CC10001_ADC_POWER_DOWN_SET);
+}
+
 static void cc10001_adc_start(struct cc10001_adc_device *adc_dev,
                              unsigned int channel)
 {
@@ -88,6 +100,7 @@ static void cc10001_adc_start(struct cc10001_adc_device *adc_dev,
        val = (channel & CC10001_ADC_CH_MASK) | CC10001_ADC_MODE_SINGLE_CONV;
        cc10001_adc_write_reg(adc_dev, CC10001_ADC_CONFIG, val);
 
+       udelay(1);
        val = cc10001_adc_read_reg(adc_dev, CC10001_ADC_CONFIG);
        val = val | CC10001_ADC_START_CONV;
        cc10001_adc_write_reg(adc_dev, CC10001_ADC_CONFIG, val);
@@ -129,6 +142,7 @@ static irqreturn_t cc10001_adc_trigger_h(int irq, void *p)
        struct iio_dev *indio_dev;
        unsigned int delay_ns;
        unsigned int channel;
+       unsigned int scan_idx;
        bool sample_invalid;
        u16 *data;
        int i;
@@ -139,20 +153,17 @@ static irqreturn_t cc10001_adc_trigger_h(int irq, void *p)
 
        mutex_lock(&adc_dev->lock);
 
-       cc10001_adc_write_reg(adc_dev, CC10001_ADC_POWER_UP,
-                             CC10001_ADC_POWER_UP_SET);
-
-       /* Wait for 8 (6+2) clock cycles before activating START */
-       ndelay(adc_dev->start_delay_ns);
+       cc10001_adc_power_up(adc_dev);
 
        /* Calculate delay step for eoc and sampled data */
        delay_ns = adc_dev->eoc_delay_ns / CC10001_MAX_POLL_COUNT;
 
        i = 0;
        sample_invalid = false;
-       for_each_set_bit(channel, indio_dev->active_scan_mask,
+       for_each_set_bit(scan_idx, indio_dev->active_scan_mask,
                                  indio_dev->masklength) {
 
+               channel = indio_dev->channels[scan_idx].channel;
                cc10001_adc_start(adc_dev, channel);
 
                data[i] = cc10001_adc_poll_done(indio_dev, channel, delay_ns);
@@ -166,7 +177,7 @@ static irqreturn_t cc10001_adc_trigger_h(int irq, void *p)
        }
 
 done:
-       cc10001_adc_write_reg(adc_dev, CC10001_ADC_POWER_UP, 0);
+       cc10001_adc_power_down(adc_dev);
 
        mutex_unlock(&adc_dev->lock);
 
@@ -185,11 +196,7 @@ static u16 cc10001_adc_read_raw_voltage(struct iio_dev *indio_dev,
        unsigned int delay_ns;
        u16 val;
 
-       cc10001_adc_write_reg(adc_dev, CC10001_ADC_POWER_UP,
-                             CC10001_ADC_POWER_UP_SET);
-
-       /* Wait for 8 (6+2) clock cycles before activating START */
-       ndelay(adc_dev->start_delay_ns);
+       cc10001_adc_power_up(adc_dev);
 
        /* Calculate delay step for eoc and sampled data */
        delay_ns = adc_dev->eoc_delay_ns / CC10001_MAX_POLL_COUNT;
@@ -198,7 +205,7 @@ static u16 cc10001_adc_read_raw_voltage(struct iio_dev *indio_dev,
 
        val = cc10001_adc_poll_done(indio_dev, chan->channel, delay_ns);
 
-       cc10001_adc_write_reg(adc_dev, CC10001_ADC_POWER_UP, 0);
+       cc10001_adc_power_down(adc_dev);
 
        return val;
 }
@@ -224,7 +231,7 @@ static int cc10001_adc_read_raw(struct iio_dev *indio_dev,
 
        case IIO_CHAN_INFO_SCALE:
                ret = regulator_get_voltage(adc_dev->reg);
-               if (ret)
+               if (ret < 0)
                        return ret;
 
                *val = ret / 1000;
@@ -255,22 +262,22 @@ static const struct iio_info cc10001_adc_info = {
        .update_scan_mode = &cc10001_update_scan_mode,
 };
 
-static int cc10001_adc_channel_init(struct iio_dev *indio_dev)
+static int cc10001_adc_channel_init(struct iio_dev *indio_dev,
+                                   unsigned long channel_map)
 {
-       struct cc10001_adc_device *adc_dev = iio_priv(indio_dev);
        struct iio_chan_spec *chan_array, *timestamp;
        unsigned int bit, idx = 0;
 
-       indio_dev->num_channels = bitmap_weight(&adc_dev->channel_map,
-                                               CC10001_ADC_NUM_CHANNELS);
+       indio_dev->num_channels = bitmap_weight(&channel_map,
+                                               CC10001_ADC_NUM_CHANNELS) + 1;
 
-       chan_array = devm_kcalloc(&indio_dev->dev, indio_dev->num_channels + 1,
+       chan_array = devm_kcalloc(&indio_dev->dev, indio_dev->num_channels,
                                  sizeof(struct iio_chan_spec),
                                  GFP_KERNEL);
        if (!chan_array)
                return -ENOMEM;
 
-       for_each_set_bit(bit, &adc_dev->channel_map, CC10001_ADC_NUM_CHANNELS) {
+       for_each_set_bit(bit, &channel_map, CC10001_ADC_NUM_CHANNELS) {
                struct iio_chan_spec *chan = &chan_array[idx];
 
                chan->type = IIO_VOLTAGE;
@@ -305,6 +312,7 @@ static int cc10001_adc_probe(struct platform_device *pdev)
        unsigned long adc_clk_rate;
        struct resource *res;
        struct iio_dev *indio_dev;
+       unsigned long channel_map;
        int ret;
 
        indio_dev = devm_iio_device_alloc(&pdev->dev, sizeof(*adc_dev));
@@ -313,9 +321,9 @@ static int cc10001_adc_probe(struct platform_device *pdev)
 
        adc_dev = iio_priv(indio_dev);
 
-       adc_dev->channel_map = GENMASK(CC10001_ADC_NUM_CHANNELS - 1, 0);
+       channel_map = GENMASK(CC10001_ADC_NUM_CHANNELS - 1, 0);
        if (!of_property_read_u32(node, "adc-reserved-channels", &ret))
-               adc_dev->channel_map &= ~ret;
+               channel_map &= ~ret;
 
        adc_dev->reg = devm_regulator_get(&pdev->dev, "vref");
        if (IS_ERR(adc_dev->reg))
@@ -361,7 +369,7 @@ static int cc10001_adc_probe(struct platform_device *pdev)
        adc_dev->start_delay_ns = adc_dev->eoc_delay_ns * CC10001_WAIT_CYCLES;
 
        /* Setup the ADC channels available on the device */
-       ret = cc10001_adc_channel_init(indio_dev);
+       ret = cc10001_adc_channel_init(indio_dev, channel_map);
        if (ret < 0)
                goto err_disable_clk;
 
index efbfd12..8d9c9b9 100644 (file)
@@ -60,12 +60,12 @@ struct mcp320x {
        struct spi_message msg;
        struct spi_transfer transfer[2];
 
-       u8 tx_buf;
-       u8 rx_buf[2];
-
        struct regulator *reg;
        struct mutex lock;
        const struct mcp320x_chip_info *chip_info;
+
+       u8 tx_buf ____cacheline_aligned;
+       u8 rx_buf[2];
 };
 
 static int mcp320x_channel_to_tx_data(int device_index,
index 3211729..0c4618b 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/iio/iio.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
+#include <linux/math64.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/platform_device.h>
@@ -471,11 +472,11 @@ static s32 vadc_calibrate(struct vadc_priv *vadc,
                          const struct vadc_channel_prop *prop, u16 adc_code)
 {
        const struct vadc_prescale_ratio *prescale;
-       s32 voltage;
+       s64 voltage;
 
        voltage = adc_code - vadc->graph[prop->calibration].gnd;
        voltage *= vadc->graph[prop->calibration].dx;
-       voltage = voltage / vadc->graph[prop->calibration].dy;
+       voltage = div64_s64(voltage, vadc->graph[prop->calibration].dy);
 
        if (prop->calibration == VADC_CALIB_ABSOLUTE)
                voltage += vadc->graph[prop->calibration].dx;
@@ -487,7 +488,7 @@ static s32 vadc_calibrate(struct vadc_priv *vadc,
 
        voltage = voltage * prescale->den;
 
-       return voltage / prescale->num;
+       return div64_s64(voltage, prescale->num);
 }
 
 static int vadc_decimation_from_dt(u32 value)
index 89d8aa1..df12c57 100644 (file)
@@ -1001,7 +1001,7 @@ static struct platform_driver twl6030_gpadc_driver = {
 
 module_platform_driver(twl6030_gpadc_driver);
 
-MODULE_ALIAS("platform: " DRIVER_NAME);
+MODULE_ALIAS("platform:" DRIVER_NAME);
 MODULE_AUTHOR("Balaji T K <balajitk@ti.com>");
 MODULE_AUTHOR("Graeme Gregory <gg@slimlogic.co.uk>");
 MODULE_AUTHOR("Oleksandr Kozaruk <oleksandr.kozaruk@ti.com");
index a221f73..ce93bd8 100644 (file)
@@ -856,6 +856,7 @@ static int xadc_read_raw(struct iio_dev *indio_dev,
                        switch (chan->address) {
                        case XADC_REG_VCCINT:
                        case XADC_REG_VCCAUX:
+                       case XADC_REG_VREFP:
                        case XADC_REG_VCCBRAM:
                        case XADC_REG_VCCPINT:
                        case XADC_REG_VCCPAUX:
@@ -996,7 +997,7 @@ static const struct iio_event_spec xadc_voltage_events[] = {
        .num_event_specs = (_alarm) ? ARRAY_SIZE(xadc_voltage_events) : 0, \
        .scan_index = (_scan_index), \
        .scan_type = { \
-               .sign = 'u', \
+               .sign = ((_addr) == XADC_REG_VREFN) ? 's' : 'u', \
                .realbits = 12, \
                .storagebits = 16, \
                .shift = 4, \
@@ -1008,7 +1009,7 @@ static const struct iio_event_spec xadc_voltage_events[] = {
 static const struct iio_chan_spec xadc_channels[] = {
        XADC_CHAN_TEMP(0, 8, XADC_REG_TEMP),
        XADC_CHAN_VOLTAGE(0, 9, XADC_REG_VCCINT, "vccint", true),
-       XADC_CHAN_VOLTAGE(1, 10, XADC_REG_VCCINT, "vccaux", true),
+       XADC_CHAN_VOLTAGE(1, 10, XADC_REG_VCCAUX, "vccaux", true),
        XADC_CHAN_VOLTAGE(2, 14, XADC_REG_VCCBRAM, "vccbram", true),
        XADC_CHAN_VOLTAGE(3, 5, XADC_REG_VCCPINT, "vccpint", true),
        XADC_CHAN_VOLTAGE(4, 6, XADC_REG_VCCPAUX, "vccpaux", true),
index c7487e8..54adc50 100644 (file)
@@ -145,9 +145,9 @@ static inline int xadc_write_adc_reg(struct xadc *xadc, unsigned int reg,
 #define XADC_REG_MAX_VCCPINT   0x28
 #define XADC_REG_MAX_VCCPAUX   0x29
 #define XADC_REG_MAX_VCCO_DDR  0x2a
-#define XADC_REG_MIN_VCCPINT   0x2b
-#define XADC_REG_MIN_VCCPAUX   0x2c
-#define XADC_REG_MIN_VCCO_DDR  0x2d
+#define XADC_REG_MIN_VCCPINT   0x2c
+#define XADC_REG_MIN_VCCPAUX   0x2d
+#define XADC_REG_MIN_VCCO_DDR  0x2e
 
 #define XADC_REG_CONF0         0x40
 #define XADC_REG_CONF1         0x41
index edd13d2..8dd0477 100644 (file)
@@ -304,8 +304,6 @@ int st_sensors_init_sensor(struct iio_dev *indio_dev,
        struct st_sensors_platform_data *of_pdata;
        int err = 0;
 
-       mutex_init(&sdata->tb.buf_lock);
-
        /* If OF/DT pdata exists, it will take precedence of anything else */
        of_pdata = st_sensors_of_probe(indio_dev->dev.parent, pdata);
        if (of_pdata)
index 21395f2..ffe9664 100644 (file)
@@ -400,6 +400,7 @@ int st_gyro_common_probe(struct iio_dev *indio_dev)
 
        indio_dev->modes = INDIO_DIRECT_MODE;
        indio_dev->info = &gyro_info;
+       mutex_init(&gdata->tb.buf_lock);
 
        st_sensors_power_enable(indio_dev);
 
index 0916bf6..73b189c 100644 (file)
 #define ADIS16400_NO_BURST             BIT(1)
 #define ADIS16400_HAS_SLOW_MODE                BIT(2)
 #define ADIS16400_HAS_SERIAL_NUMBER    BIT(3)
+#define ADIS16400_BURST_DIAG_STAT      BIT(4)
 
 struct adis16400_state;
 
@@ -165,6 +166,7 @@ struct adis16400_state {
        int                             filt_int;
 
        struct adis adis;
+       unsigned long avail_scan_mask[2];
 };
 
 /* At the moment triggers are only used for ring buffer
index 6e727ff..90c24a2 100644 (file)
@@ -18,7 +18,8 @@ int adis16400_update_scan_mode(struct iio_dev *indio_dev,
 {
        struct adis16400_state *st = iio_priv(indio_dev);
        struct adis *adis = &st->adis;
-       uint16_t *tx;
+       unsigned int burst_length;
+       u8 *tx;
 
        if (st->variant->flags & ADIS16400_NO_BURST)
                return adis_update_scan_mode(indio_dev, scan_mask);
@@ -26,26 +27,29 @@ int adis16400_update_scan_mode(struct iio_dev *indio_dev,
        kfree(adis->xfer);
        kfree(adis->buffer);
 
+       /* All but the timestamp channel */
+       burst_length = (indio_dev->num_channels - 1) * sizeof(u16);
+       if (st->variant->flags & ADIS16400_BURST_DIAG_STAT)
+               burst_length += sizeof(u16);
+
        adis->xfer = kcalloc(2, sizeof(*adis->xfer), GFP_KERNEL);
        if (!adis->xfer)
                return -ENOMEM;
 
-       adis->buffer = kzalloc(indio_dev->scan_bytes + sizeof(u16),
-               GFP_KERNEL);
+       adis->buffer = kzalloc(burst_length + sizeof(u16), GFP_KERNEL);
        if (!adis->buffer)
                return -ENOMEM;
 
-       tx = adis->buffer + indio_dev->scan_bytes;
-
+       tx = adis->buffer + burst_length;
        tx[0] = ADIS_READ_REG(ADIS16400_GLOB_CMD);
        tx[1] = 0;
 
        adis->xfer[0].tx_buf = tx;
        adis->xfer[0].bits_per_word = 8;
        adis->xfer[0].len = 2;
-       adis->xfer[1].tx_buf = tx;
+       adis->xfer[1].rx_buf = adis->buffer;
        adis->xfer[1].bits_per_word = 8;
-       adis->xfer[1].len = indio_dev->scan_bytes;
+       adis->xfer[1].len = burst_length;
 
        spi_message_init(&adis->msg);
        spi_message_add_tail(&adis->xfer[0], &adis->msg);
@@ -61,6 +65,7 @@ irqreturn_t adis16400_trigger_handler(int irq, void *p)
        struct adis16400_state *st = iio_priv(indio_dev);
        struct adis *adis = &st->adis;
        u32 old_speed_hz = st->adis.spi->max_speed_hz;
+       void *buffer;
        int ret;
 
        if (!adis->buffer)
@@ -81,7 +86,12 @@ irqreturn_t adis16400_trigger_handler(int irq, void *p)
                spi_setup(st->adis.spi);
        }
 
-       iio_push_to_buffers_with_timestamp(indio_dev, adis->buffer,
+       if (st->variant->flags & ADIS16400_BURST_DIAG_STAT)
+               buffer = adis->buffer + sizeof(u16);
+       else
+               buffer = adis->buffer;
+
+       iio_push_to_buffers_with_timestamp(indio_dev, buffer,
                pf->timestamp);
 
        iio_trigger_notify_done(indio_dev->trig);
index fa795dc..2fd68f2 100644 (file)
@@ -405,6 +405,11 @@ static int adis16400_read_raw(struct iio_dev *indio_dev,
                        *val = st->variant->temp_scale_nano / 1000000;
                        *val2 = (st->variant->temp_scale_nano % 1000000);
                        return IIO_VAL_INT_PLUS_MICRO;
+               case IIO_PRESSURE:
+                       /* 20 uBar = 0.002kPascal */
+                       *val = 0;
+                       *val2 = 2000;
+                       return IIO_VAL_INT_PLUS_MICRO;
                default:
                        return -EINVAL;
                }
@@ -454,10 +459,10 @@ static int adis16400_read_raw(struct iio_dev *indio_dev,
        }
 }
 
-#define ADIS16400_VOLTAGE_CHAN(addr, bits, name, si) { \
+#define ADIS16400_VOLTAGE_CHAN(addr, bits, name, si, chn) { \
        .type = IIO_VOLTAGE, \
        .indexed = 1, \
-       .channel = 0, \
+       .channel = chn, \
        .extend_name = name, \
        .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | \
                BIT(IIO_CHAN_INFO_SCALE), \
@@ -474,10 +479,10 @@ static int adis16400_read_raw(struct iio_dev *indio_dev,
 }
 
 #define ADIS16400_SUPPLY_CHAN(addr, bits) \
-       ADIS16400_VOLTAGE_CHAN(addr, bits, "supply", ADIS16400_SCAN_SUPPLY)
+       ADIS16400_VOLTAGE_CHAN(addr, bits, "supply", ADIS16400_SCAN_SUPPLY, 0)
 
 #define ADIS16400_AUX_ADC_CHAN(addr, bits) \
-       ADIS16400_VOLTAGE_CHAN(addr, bits, NULL, ADIS16400_SCAN_ADC)
+       ADIS16400_VOLTAGE_CHAN(addr, bits, NULL, ADIS16400_SCAN_ADC, 1)
 
 #define ADIS16400_GYRO_CHAN(mod, addr, bits) { \
        .type = IIO_ANGL_VEL, \
@@ -773,7 +778,8 @@ static struct adis16400_chip_info adis16400_chips[] = {
                .channels = adis16448_channels,
                .num_channels = ARRAY_SIZE(adis16448_channels),
                .flags = ADIS16400_HAS_PROD_ID |
-                               ADIS16400_HAS_SERIAL_NUMBER,
+                               ADIS16400_HAS_SERIAL_NUMBER |
+                               ADIS16400_BURST_DIAG_STAT,
                .gyro_scale_micro = IIO_DEGREE_TO_RAD(10000), /* 0.01 deg/s */
                .accel_scale_micro = IIO_G_TO_M_S_2(833), /* 1/1200 g */
                .temp_scale_nano = 73860000, /* 0.07386 C */
@@ -791,11 +797,6 @@ static const struct iio_info adis16400_info = {
        .debugfs_reg_access = adis_debugfs_reg_access,
 };
 
-static const unsigned long adis16400_burst_scan_mask[] = {
-       ~0UL,
-       0,
-};
-
 static const char * const adis16400_status_error_msgs[] = {
        [ADIS16400_DIAG_STAT_ZACCL_FAIL] = "Z-axis accelerometer self-test failure",
        [ADIS16400_DIAG_STAT_YACCL_FAIL] = "Y-axis accelerometer self-test failure",
@@ -843,6 +844,20 @@ static const struct adis_data adis16400_data = {
                BIT(ADIS16400_DIAG_STAT_POWER_LOW),
 };
 
+static void adis16400_setup_chan_mask(struct adis16400_state *st)
+{
+       const struct adis16400_chip_info *chip_info = st->variant;
+       unsigned i;
+
+       for (i = 0; i < chip_info->num_channels; i++) {
+               const struct iio_chan_spec *ch = &chip_info->channels[i];
+
+               if (ch->scan_index >= 0 &&
+                   ch->scan_index != ADIS16400_SCAN_TIMESTAMP)
+                       st->avail_scan_mask[0] |= BIT(ch->scan_index);
+       }
+}
+
 static int adis16400_probe(struct spi_device *spi)
 {
        struct adis16400_state *st;
@@ -866,8 +881,10 @@ static int adis16400_probe(struct spi_device *spi)
        indio_dev->info = &adis16400_info;
        indio_dev->modes = INDIO_DIRECT_MODE;
 
-       if (!(st->variant->flags & ADIS16400_NO_BURST))
-               indio_dev->available_scan_masks = adis16400_burst_scan_mask;
+       if (!(st->variant->flags & ADIS16400_NO_BURST)) {
+               adis16400_setup_chan_mask(st);
+               indio_dev->available_scan_masks = st->avail_scan_mask;
+       }
 
        ret = adis_init(&st->adis, indio_dev, spi, &adis16400_data);
        if (ret)
index 847ca56..55c267b 100644 (file)
@@ -38,7 +38,8 @@ static int iio_request_update_kfifo(struct iio_buffer *r)
                kfifo_free(&buf->kf);
                ret = __iio_allocate_kfifo(buf, buf->buffer.bytes_per_datum,
                                   buf->buffer.length);
-               buf->update_needed = false;
+               if (ret >= 0)
+                       buf->update_needed = false;
        } else {
                kfifo_reset_out(&buf->kf);
        }
index 91ecc46..ef60bae 100644 (file)
@@ -43,8 +43,6 @@ struct prox_state {
 static const struct iio_chan_spec prox_channels[] = {
        {
                .type = IIO_PROXIMITY,
-               .modified = 1,
-               .channel2 = IIO_NO_MOD,
                .info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
                .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_OFFSET) |
                BIT(IIO_CHAN_INFO_SCALE) |
@@ -253,7 +251,6 @@ static int hid_prox_probe(struct platform_device *pdev)
        struct iio_dev *indio_dev;
        struct prox_state *prox_state;
        struct hid_sensor_hub_device *hsdev = pdev->dev.platform_data;
-       struct iio_chan_spec *channels;
 
        indio_dev = devm_iio_device_alloc(&pdev->dev,
                                sizeof(struct prox_state));
@@ -272,20 +269,21 @@ static int hid_prox_probe(struct platform_device *pdev)
                return ret;
        }
 
-       channels = kmemdup(prox_channels, sizeof(prox_channels), GFP_KERNEL);
-       if (!channels) {
+       indio_dev->channels = kmemdup(prox_channels, sizeof(prox_channels),
+                                     GFP_KERNEL);
+       if (!indio_dev->channels) {
                dev_err(&pdev->dev, "failed to duplicate channels\n");
                return -ENOMEM;
        }
 
-       ret = prox_parse_report(pdev, hsdev, channels,
+       ret = prox_parse_report(pdev, hsdev,
+                               (struct iio_chan_spec *)indio_dev->channels,
                                HID_USAGE_SENSOR_PROX, prox_state);
        if (ret) {
                dev_err(&pdev->dev, "failed to setup attributes\n");
                goto error_free_dev_mem;
        }
 
-       indio_dev->channels = channels;
        indio_dev->num_channels =
                                ARRAY_SIZE(prox_channels);
        indio_dev->dev.parent = &pdev->dev;
index 8ade473..2e56f81 100644 (file)
@@ -369,6 +369,7 @@ int st_magn_common_probe(struct iio_dev *indio_dev)
 
        indio_dev->modes = INDIO_DIRECT_MODE;
        indio_dev->info = &magn_info;
+       mutex_init(&mdata->tb.buf_lock);
 
        st_sensors_power_enable(indio_dev);
 
index 7c623e2..a2602d8 100644 (file)
@@ -172,6 +172,7 @@ static s32 bmp280_compensate_temp(struct bmp280_data *data,
        var2 = (((((adc_temp >> 4) - ((s32)le16_to_cpu(buf[T1]))) *
                  ((adc_temp >> 4) - ((s32)le16_to_cpu(buf[T1])))) >> 12) *
                ((s32)(s16)le16_to_cpu(buf[T3]))) >> 14;
+       data->t_fine = var1 + var2;
 
        return (data->t_fine * 5 + 128) >> 8;
 }
index 7bb8d4c..3cf0bd6 100644 (file)
@@ -47,8 +47,6 @@ struct press_state {
 static const struct iio_chan_spec press_channels[] = {
        {
                .type = IIO_PRESSURE,
-               .modified = 1,
-               .channel2 = IIO_NO_MOD,
                .info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
                .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_OFFSET) |
                BIT(IIO_CHAN_INFO_SCALE) |
index 97baf40..e881fa6 100644 (file)
@@ -417,6 +417,7 @@ int st_press_common_probe(struct iio_dev *indio_dev)
 
        indio_dev->modes = INDIO_DIRECT_MODE;
        indio_dev->info = &press_info;
+       mutex_init(&press_data->tb.buf_lock);
 
        st_sensors_power_enable(indio_dev);
 
index f80da50..38339d2 100644 (file)
@@ -472,13 +472,8 @@ int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *dmac,
        } sgid_addr, dgid_addr;
 
 
-       ret = rdma_gid2ip(&sgid_addr._sockaddr, sgid);
-       if (ret)
-               return ret;
-
-       ret = rdma_gid2ip(&dgid_addr._sockaddr, dgid);
-       if (ret)
-               return ret;
+       rdma_gid2ip(&sgid_addr._sockaddr, sgid);
+       rdma_gid2ip(&dgid_addr._sockaddr, dgid);
 
        memset(&dev_addr, 0, sizeof(dev_addr));
 
@@ -512,10 +507,8 @@ int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
                struct sockaddr_in6 _sockaddr_in6;
        } gid_addr;
 
-       ret = rdma_gid2ip(&gid_addr._sockaddr, sgid);
+       rdma_gid2ip(&gid_addr._sockaddr, sgid);
 
-       if (ret)
-               return ret;
        memset(&dev_addr, 0, sizeof(dev_addr));
        ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id);
        if (ret)
index e28a494..0271608 100644 (file)
@@ -437,39 +437,38 @@ static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id)
        return cm_id_priv;
 }
 
-static void cm_mask_copy(u8 *dst, u8 *src, u8 *mask)
+static void cm_mask_copy(u32 *dst, const u32 *src, const u32 *mask)
 {
        int i;
 
-       for (i = 0; i < IB_CM_COMPARE_SIZE / sizeof(unsigned long); i++)
-               ((unsigned long *) dst)[i] = ((unsigned long *) src)[i] &
-                                            ((unsigned long *) mask)[i];
+       for (i = 0; i < IB_CM_COMPARE_SIZE; i++)
+               dst[i] = src[i] & mask[i];
 }
 
 static int cm_compare_data(struct ib_cm_compare_data *src_data,
                           struct ib_cm_compare_data *dst_data)
 {
-       u8 src[IB_CM_COMPARE_SIZE];
-       u8 dst[IB_CM_COMPARE_SIZE];
+       u32 src[IB_CM_COMPARE_SIZE];
+       u32 dst[IB_CM_COMPARE_SIZE];
 
        if (!src_data || !dst_data)
                return 0;
 
        cm_mask_copy(src, src_data->data, dst_data->mask);
        cm_mask_copy(dst, dst_data->data, src_data->mask);
-       return memcmp(src, dst, IB_CM_COMPARE_SIZE);
+       return memcmp(src, dst, sizeof(src));
 }
 
-static int cm_compare_private_data(u8 *private_data,
+static int cm_compare_private_data(u32 *private_data,
                                   struct ib_cm_compare_data *dst_data)
 {
-       u8 src[IB_CM_COMPARE_SIZE];
+       u32 src[IB_CM_COMPARE_SIZE];
 
        if (!dst_data)
                return 0;
 
        cm_mask_copy(src, private_data, dst_data->mask);
-       return memcmp(src, dst_data->data, IB_CM_COMPARE_SIZE);
+       return memcmp(src, dst_data->data, sizeof(src));
 }
 
 /*
@@ -538,7 +537,7 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
 
 static struct cm_id_private * cm_find_listen(struct ib_device *device,
                                             __be64 service_id,
-                                            u8 *private_data)
+                                            u32 *private_data)
 {
        struct rb_node *node = cm.listen_service_table.rb_node;
        struct cm_id_private *cm_id_priv;
@@ -862,6 +861,7 @@ retest:
                cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT);
                break;
        case IB_CM_REQ_SENT:
+       case IB_CM_MRA_REQ_RCVD:
                ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
                spin_unlock_irq(&cm_id_priv->lock);
                ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT,
@@ -880,7 +880,6 @@ retest:
                                       NULL, 0, NULL, 0);
                }
                break;
-       case IB_CM_MRA_REQ_RCVD:
        case IB_CM_REP_SENT:
        case IB_CM_MRA_REP_RCVD:
                ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
@@ -953,7 +952,7 @@ int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask,
                cm_mask_copy(cm_id_priv->compare_data->data,
                             compare_data->data, compare_data->mask);
                memcpy(cm_id_priv->compare_data->mask, compare_data->mask,
-                      IB_CM_COMPARE_SIZE);
+                      sizeof(compare_data->mask));
        }
 
        cm_id->state = IB_CM_LISTEN;
index be068f4..8b76f0e 100644 (file)
@@ -103,7 +103,7 @@ struct cm_req_msg {
        /* local ACK timeout:5, rsvd:3 */
        u8 alt_offset139;
 
-       u8 private_data[IB_CM_REQ_PRIVATE_DATA_SIZE];
+       u32 private_data[IB_CM_REQ_PRIVATE_DATA_SIZE / sizeof(u32)];
 
 } __attribute__ ((packed));
 
@@ -801,7 +801,7 @@ struct cm_sidr_req_msg {
        __be16 rsvd;
        __be64 service_id;
 
-       u8 private_data[IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE];
+       u32 private_data[IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE / sizeof(u32)];
 } __attribute__ ((packed));
 
 struct cm_sidr_rep_msg {
index d570030..38ffe09 100644 (file)
@@ -845,33 +845,49 @@ static void cma_save_ib_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id
        listen_ib = (struct sockaddr_ib *) &listen_id->route.addr.src_addr;
        ib = (struct sockaddr_ib *) &id->route.addr.src_addr;
        ib->sib_family = listen_ib->sib_family;
-       ib->sib_pkey = path->pkey;
-       ib->sib_flowinfo = path->flow_label;
-       memcpy(&ib->sib_addr, &path->sgid, 16);
+       if (path) {
+               ib->sib_pkey = path->pkey;
+               ib->sib_flowinfo = path->flow_label;
+               memcpy(&ib->sib_addr, &path->sgid, 16);
+       } else {
+               ib->sib_pkey = listen_ib->sib_pkey;
+               ib->sib_flowinfo = listen_ib->sib_flowinfo;
+               ib->sib_addr = listen_ib->sib_addr;
+       }
        ib->sib_sid = listen_ib->sib_sid;
        ib->sib_sid_mask = cpu_to_be64(0xffffffffffffffffULL);
        ib->sib_scope_id = listen_ib->sib_scope_id;
 
-       ib = (struct sockaddr_ib *) &id->route.addr.dst_addr;
-       ib->sib_family = listen_ib->sib_family;
-       ib->sib_pkey = path->pkey;
-       ib->sib_flowinfo = path->flow_label;
-       memcpy(&ib->sib_addr, &path->dgid, 16);
+       if (path) {
+               ib = (struct sockaddr_ib *) &id->route.addr.dst_addr;
+               ib->sib_family = listen_ib->sib_family;
+               ib->sib_pkey = path->pkey;
+               ib->sib_flowinfo = path->flow_label;
+               memcpy(&ib->sib_addr, &path->dgid, 16);
+       }
+}
+
+static __be16 ss_get_port(const struct sockaddr_storage *ss)
+{
+       if (ss->ss_family == AF_INET)
+               return ((struct sockaddr_in *)ss)->sin_port;
+       else if (ss->ss_family == AF_INET6)
+               return ((struct sockaddr_in6 *)ss)->sin6_port;
+       BUG();
 }
 
 static void cma_save_ip4_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id,
                              struct cma_hdr *hdr)
 {
-       struct sockaddr_in *listen4, *ip4;
+       struct sockaddr_in *ip4;
 
-       listen4 = (struct sockaddr_in *) &listen_id->route.addr.src_addr;
        ip4 = (struct sockaddr_in *) &id->route.addr.src_addr;
-       ip4->sin_family = listen4->sin_family;
+       ip4->sin_family = AF_INET;
        ip4->sin_addr.s_addr = hdr->dst_addr.ip4.addr;
-       ip4->sin_port = listen4->sin_port;
+       ip4->sin_port = ss_get_port(&listen_id->route.addr.src_addr);
 
        ip4 = (struct sockaddr_in *) &id->route.addr.dst_addr;
-       ip4->sin_family = listen4->sin_family;
+       ip4->sin_family = AF_INET;
        ip4->sin_addr.s_addr = hdr->src_addr.ip4.addr;
        ip4->sin_port = hdr->port;
 }
@@ -879,16 +895,15 @@ static void cma_save_ip4_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_i
 static void cma_save_ip6_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id,
                              struct cma_hdr *hdr)
 {
-       struct sockaddr_in6 *listen6, *ip6;
+       struct sockaddr_in6 *ip6;
 
-       listen6 = (struct sockaddr_in6 *) &listen_id->route.addr.src_addr;
        ip6 = (struct sockaddr_in6 *) &id->route.addr.src_addr;
-       ip6->sin6_family = listen6->sin6_family;
+       ip6->sin6_family = AF_INET6;
        ip6->sin6_addr = hdr->dst_addr.ip6;
-       ip6->sin6_port = listen6->sin6_port;
+       ip6->sin6_port = ss_get_port(&listen_id->route.addr.src_addr);
 
        ip6 = (struct sockaddr_in6 *) &id->route.addr.dst_addr;
-       ip6->sin6_family = listen6->sin6_family;
+       ip6->sin6_family = AF_INET6;
        ip6->sin6_addr = hdr->src_addr.ip6;
        ip6->sin6_port = hdr->port;
 }
@@ -898,9 +913,11 @@ static int cma_save_net_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id
 {
        struct cma_hdr *hdr;
 
-       if ((listen_id->route.addr.src_addr.ss_family == AF_IB) &&
-           (ib_event->event == IB_CM_REQ_RECEIVED)) {
-               cma_save_ib_info(id, listen_id, ib_event->param.req_rcvd.primary_path);
+       if (listen_id->route.addr.src_addr.ss_family == AF_IB) {
+               if (ib_event->event == IB_CM_REQ_RECEIVED)
+                       cma_save_ib_info(id, listen_id, ib_event->param.req_rcvd.primary_path);
+               else if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED)
+                       cma_save_ib_info(id, listen_id, NULL);
                return 0;
        }
 
index b85ddbc..e6ffa2e 100644 (file)
@@ -33,7 +33,7 @@
 
 #include "iwpm_util.h"
 
-static const char iwpm_ulib_name[] = "iWarpPortMapperUser";
+static const char iwpm_ulib_name[IWPM_ULIBNAME_SIZE] = "iWarpPortMapperUser";
 static int iwpm_ulib_version = 3;
 static int iwpm_user_pid = IWPM_PID_UNDEFINED;
 static atomic_t echo_nlmsg_seq;
@@ -468,7 +468,8 @@ add_mapping_response_exit:
 }
 EXPORT_SYMBOL(iwpm_add_mapping_cb);
 
-/* netlink attribute policy for the response to add and query mapping request */
+/* netlink attribute policy for the response to add and query mapping request
+ * and response with remote address info */
 static const struct nla_policy resp_query_policy[IWPM_NLA_RQUERY_MAPPING_MAX] = {
        [IWPM_NLA_QUERY_MAPPING_SEQ]      = { .type = NLA_U32 },
        [IWPM_NLA_QUERY_LOCAL_ADDR]       = { .len = sizeof(struct sockaddr_storage) },
@@ -559,6 +560,76 @@ query_mapping_response_exit:
 }
 EXPORT_SYMBOL(iwpm_add_and_query_mapping_cb);
 
+/*
+ * iwpm_remote_info_cb - Process a port mapper message, containing
+ *                       the remote connecting peer address info
+ */
+int iwpm_remote_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
+{
+       struct nlattr *nltb[IWPM_NLA_RQUERY_MAPPING_MAX];
+       struct sockaddr_storage *local_sockaddr, *remote_sockaddr;
+       struct sockaddr_storage *mapped_loc_sockaddr, *mapped_rem_sockaddr;
+       struct iwpm_remote_info *rem_info;
+       const char *msg_type;
+       u8 nl_client;
+       int ret = -EINVAL;
+
+       msg_type = "Remote Mapping info";
+       if (iwpm_parse_nlmsg(cb, IWPM_NLA_RQUERY_MAPPING_MAX,
+                               resp_query_policy, nltb, msg_type))
+               return ret;
+
+       nl_client = RDMA_NL_GET_CLIENT(cb->nlh->nlmsg_type);
+       if (!iwpm_valid_client(nl_client)) {
+               pr_info("%s: Invalid port mapper client = %d\n",
+                               __func__, nl_client);
+               return ret;
+       }
+       atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
+
+       local_sockaddr = (struct sockaddr_storage *)
+                       nla_data(nltb[IWPM_NLA_QUERY_LOCAL_ADDR]);
+       remote_sockaddr = (struct sockaddr_storage *)
+                       nla_data(nltb[IWPM_NLA_QUERY_REMOTE_ADDR]);
+       mapped_loc_sockaddr = (struct sockaddr_storage *)
+                       nla_data(nltb[IWPM_NLA_RQUERY_MAPPED_LOC_ADDR]);
+       mapped_rem_sockaddr = (struct sockaddr_storage *)
+                       nla_data(nltb[IWPM_NLA_RQUERY_MAPPED_REM_ADDR]);
+
+       if (mapped_loc_sockaddr->ss_family != local_sockaddr->ss_family ||
+               mapped_rem_sockaddr->ss_family != remote_sockaddr->ss_family) {
+               pr_info("%s: Sockaddr family doesn't match the requested one\n",
+                               __func__);
+               return ret;
+       }
+       rem_info = kzalloc(sizeof(struct iwpm_remote_info), GFP_ATOMIC);
+       if (!rem_info) {
+               pr_err("%s: Unable to allocate a remote info\n", __func__);
+               ret = -ENOMEM;
+               return ret;
+       }
+       memcpy(&rem_info->mapped_loc_sockaddr, mapped_loc_sockaddr,
+              sizeof(struct sockaddr_storage));
+       memcpy(&rem_info->remote_sockaddr, remote_sockaddr,
+              sizeof(struct sockaddr_storage));
+       memcpy(&rem_info->mapped_rem_sockaddr, mapped_rem_sockaddr,
+              sizeof(struct sockaddr_storage));
+       rem_info->nl_client = nl_client;
+
+       iwpm_add_remote_info(rem_info);
+
+       iwpm_print_sockaddr(local_sockaddr,
+                       "remote_info: Local sockaddr:");
+       iwpm_print_sockaddr(mapped_loc_sockaddr,
+                       "remote_info: Mapped local sockaddr:");
+       iwpm_print_sockaddr(remote_sockaddr,
+                       "remote_info: Remote sockaddr:");
+       iwpm_print_sockaddr(mapped_rem_sockaddr,
+                       "remote_info: Mapped remote sockaddr:");
+       return ret;
+}
+EXPORT_SYMBOL(iwpm_remote_info_cb);
+
 /* netlink attribute policy for the received request for mapping info */
 static const struct nla_policy resp_mapinfo_policy[IWPM_NLA_MAPINFO_REQ_MAX] = {
        [IWPM_NLA_MAPINFO_ULIB_NAME] = { .type = NLA_STRING,
index 69e9f84..a626795 100644 (file)
 
 #include "iwpm_util.h"
 
-#define IWPM_HASH_BUCKET_SIZE  512
-#define IWPM_HASH_BUCKET_MASK  (IWPM_HASH_BUCKET_SIZE - 1)
+#define IWPM_MAPINFO_HASH_SIZE 512
+#define IWPM_MAPINFO_HASH_MASK (IWPM_MAPINFO_HASH_SIZE - 1)
+#define IWPM_REMINFO_HASH_SIZE 64
+#define IWPM_REMINFO_HASH_MASK (IWPM_REMINFO_HASH_SIZE - 1)
 
 static LIST_HEAD(iwpm_nlmsg_req_list);
 static DEFINE_SPINLOCK(iwpm_nlmsg_req_lock);
@@ -42,31 +44,49 @@ static DEFINE_SPINLOCK(iwpm_nlmsg_req_lock);
 static struct hlist_head *iwpm_hash_bucket;
 static DEFINE_SPINLOCK(iwpm_mapinfo_lock);
 
+static struct hlist_head *iwpm_reminfo_bucket;
+static DEFINE_SPINLOCK(iwpm_reminfo_lock);
+
 static DEFINE_MUTEX(iwpm_admin_lock);
 static struct iwpm_admin_data iwpm_admin;
 
 int iwpm_init(u8 nl_client)
 {
+       int ret = 0;
        if (iwpm_valid_client(nl_client))
                return -EINVAL;
        mutex_lock(&iwpm_admin_lock);
        if (atomic_read(&iwpm_admin.refcount) == 0) {
-               iwpm_hash_bucket = kzalloc(IWPM_HASH_BUCKET_SIZE *
+               iwpm_hash_bucket = kzalloc(IWPM_MAPINFO_HASH_SIZE *
                                        sizeof(struct hlist_head), GFP_KERNEL);
                if (!iwpm_hash_bucket) {
-                       mutex_unlock(&iwpm_admin_lock);
+                       ret = -ENOMEM;
                        pr_err("%s Unable to create mapinfo hash table\n", __func__);
-                       return -ENOMEM;
+                       goto init_exit;
+               }
+               iwpm_reminfo_bucket = kzalloc(IWPM_REMINFO_HASH_SIZE *
+                                       sizeof(struct hlist_head), GFP_KERNEL);
+               if (!iwpm_reminfo_bucket) {
+                       kfree(iwpm_hash_bucket);
+                       ret = -ENOMEM;
+                       pr_err("%s Unable to create reminfo hash table\n", __func__);
+                       goto init_exit;
                }
        }
        atomic_inc(&iwpm_admin.refcount);
+init_exit:
        mutex_unlock(&iwpm_admin_lock);
-       iwpm_set_valid(nl_client, 1);
-       return 0;
+       if (!ret) {
+               iwpm_set_valid(nl_client, 1);
+               pr_debug("%s: Mapinfo and reminfo tables are created\n",
+                               __func__);
+       }
+       return ret;
 }
 EXPORT_SYMBOL(iwpm_init);
 
 static void free_hash_bucket(void);
+static void free_reminfo_bucket(void);
 
 int iwpm_exit(u8 nl_client)
 {
@@ -81,7 +101,8 @@ int iwpm_exit(u8 nl_client)
        }
        if (atomic_dec_and_test(&iwpm_admin.refcount)) {
                free_hash_bucket();
-               pr_debug("%s: Mapinfo hash table is destroyed\n", __func__);
+               free_reminfo_bucket();
+               pr_debug("%s: Resources are destroyed\n", __func__);
        }
        mutex_unlock(&iwpm_admin_lock);
        iwpm_set_valid(nl_client, 0);
@@ -89,7 +110,7 @@ int iwpm_exit(u8 nl_client)
 }
 EXPORT_SYMBOL(iwpm_exit);
 
-static struct hlist_head *get_hash_bucket_head(struct sockaddr_storage *,
+static struct hlist_head *get_mapinfo_hash_bucket(struct sockaddr_storage *,
                                               struct sockaddr_storage *);
 
 int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr,
@@ -99,9 +120,10 @@ int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr,
        struct hlist_head *hash_bucket_head;
        struct iwpm_mapping_info *map_info;
        unsigned long flags;
+       int ret = -EINVAL;
 
        if (!iwpm_valid_client(nl_client))
-               return -EINVAL;
+               return ret;
        map_info = kzalloc(sizeof(struct iwpm_mapping_info), GFP_KERNEL);
        if (!map_info) {
                pr_err("%s: Unable to allocate a mapping info\n", __func__);
@@ -115,13 +137,16 @@ int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr,
 
        spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
        if (iwpm_hash_bucket) {
-               hash_bucket_head = get_hash_bucket_head(
+               hash_bucket_head = get_mapinfo_hash_bucket(
                                        &map_info->local_sockaddr,
                                        &map_info->mapped_sockaddr);
-               hlist_add_head(&map_info->hlist_node, hash_bucket_head);
+               if (hash_bucket_head) {
+                       hlist_add_head(&map_info->hlist_node, hash_bucket_head);
+                       ret = 0;
+               }
        }
        spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags);
-       return 0;
+       return ret;
 }
 EXPORT_SYMBOL(iwpm_create_mapinfo);
 
@@ -136,9 +161,12 @@ int iwpm_remove_mapinfo(struct sockaddr_storage *local_sockaddr,
 
        spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
        if (iwpm_hash_bucket) {
-               hash_bucket_head = get_hash_bucket_head(
+               hash_bucket_head = get_mapinfo_hash_bucket(
                                        local_sockaddr,
                                        mapped_local_addr);
+               if (!hash_bucket_head)
+                       goto remove_mapinfo_exit;
+
                hlist_for_each_entry_safe(map_info, tmp_hlist_node,
                                        hash_bucket_head, hlist_node) {
 
@@ -152,6 +180,7 @@ int iwpm_remove_mapinfo(struct sockaddr_storage *local_sockaddr,
                        }
                }
        }
+remove_mapinfo_exit:
        spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags);
        return ret;
 }
@@ -166,7 +195,7 @@ static void free_hash_bucket(void)
 
        /* remove all the mapinfo data from the list */
        spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
-       for (i = 0; i < IWPM_HASH_BUCKET_SIZE; i++) {
+       for (i = 0; i < IWPM_MAPINFO_HASH_SIZE; i++) {
                hlist_for_each_entry_safe(map_info, tmp_hlist_node,
                        &iwpm_hash_bucket[i], hlist_node) {
 
@@ -180,6 +209,96 @@ static void free_hash_bucket(void)
        spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags);
 }
 
+static void free_reminfo_bucket(void)
+{
+       struct hlist_node *tmp_hlist_node;
+       struct iwpm_remote_info *rem_info;
+       unsigned long flags;
+       int i;
+
+       /* remove all the remote info from the list */
+       spin_lock_irqsave(&iwpm_reminfo_lock, flags);
+       for (i = 0; i < IWPM_REMINFO_HASH_SIZE; i++) {
+               hlist_for_each_entry_safe(rem_info, tmp_hlist_node,
+                       &iwpm_reminfo_bucket[i], hlist_node) {
+
+                               hlist_del_init(&rem_info->hlist_node);
+                               kfree(rem_info);
+                       }
+       }
+       /* free the hash list */
+       kfree(iwpm_reminfo_bucket);
+       iwpm_reminfo_bucket = NULL;
+       spin_unlock_irqrestore(&iwpm_reminfo_lock, flags);
+}
+
+static struct hlist_head *get_reminfo_hash_bucket(struct sockaddr_storage *,
+                                               struct sockaddr_storage *);
+
+void iwpm_add_remote_info(struct iwpm_remote_info *rem_info)
+{
+       struct hlist_head *hash_bucket_head;
+       unsigned long flags;
+
+       spin_lock_irqsave(&iwpm_reminfo_lock, flags);
+       if (iwpm_reminfo_bucket) {
+               hash_bucket_head = get_reminfo_hash_bucket(
+                                       &rem_info->mapped_loc_sockaddr,
+                                       &rem_info->mapped_rem_sockaddr);
+               if (hash_bucket_head)
+                       hlist_add_head(&rem_info->hlist_node, hash_bucket_head);
+       }
+       spin_unlock_irqrestore(&iwpm_reminfo_lock, flags);
+}
+
+int iwpm_get_remote_info(struct sockaddr_storage *mapped_loc_addr,
+                               struct sockaddr_storage *mapped_rem_addr,
+                               struct sockaddr_storage *remote_addr,
+                               u8 nl_client)
+{
+       struct hlist_node *tmp_hlist_node;
+       struct hlist_head *hash_bucket_head;
+       struct iwpm_remote_info *rem_info = NULL;
+       unsigned long flags;
+       int ret = -EINVAL;
+
+       if (!iwpm_valid_client(nl_client)) {
+               pr_info("%s: Invalid client = %d\n", __func__, nl_client);
+               return ret;
+       }
+       spin_lock_irqsave(&iwpm_reminfo_lock, flags);
+       if (iwpm_reminfo_bucket) {
+               hash_bucket_head = get_reminfo_hash_bucket(
+                                       mapped_loc_addr,
+                                       mapped_rem_addr);
+               if (!hash_bucket_head)
+                       goto get_remote_info_exit;
+               hlist_for_each_entry_safe(rem_info, tmp_hlist_node,
+                                       hash_bucket_head, hlist_node) {
+
+                       if (!iwpm_compare_sockaddr(&rem_info->mapped_loc_sockaddr,
+                               mapped_loc_addr) &&
+                               !iwpm_compare_sockaddr(&rem_info->mapped_rem_sockaddr,
+                               mapped_rem_addr)) {
+
+                               memcpy(remote_addr, &rem_info->remote_sockaddr,
+                                       sizeof(struct sockaddr_storage));
+                               iwpm_print_sockaddr(remote_addr,
+                                               "get_remote_info: Remote sockaddr:");
+
+                               hlist_del_init(&rem_info->hlist_node);
+                               kfree(rem_info);
+                               ret = 0;
+                               break;
+                       }
+               }
+       }
+get_remote_info_exit:
+       spin_unlock_irqrestore(&iwpm_reminfo_lock, flags);
+       return ret;
+}
+EXPORT_SYMBOL(iwpm_get_remote_info);
+
 struct iwpm_nlmsg_request *iwpm_get_nlmsg_request(__u32 nlmsg_seq,
                                        u8 nl_client, gfp_t gfp)
 {
@@ -409,31 +528,54 @@ static u32 iwpm_ipv4_jhash(struct sockaddr_in *ipv4_sockaddr)
        return hash;
 }
 
-static struct hlist_head *get_hash_bucket_head(struct sockaddr_storage
-                                              *local_sockaddr,
-                                              struct sockaddr_storage
-                                              *mapped_sockaddr)
+static int get_hash_bucket(struct sockaddr_storage *a_sockaddr,
+                               struct sockaddr_storage *b_sockaddr, u32 *hash)
 {
-       u32 local_hash, mapped_hash, hash;
+       u32 a_hash, b_hash;
 
-       if (local_sockaddr->ss_family == AF_INET) {
-               local_hash = iwpm_ipv4_jhash((struct sockaddr_in *) local_sockaddr);
-               mapped_hash = iwpm_ipv4_jhash((struct sockaddr_in *) mapped_sockaddr);
+       if (a_sockaddr->ss_family == AF_INET) {
+               a_hash = iwpm_ipv4_jhash((struct sockaddr_in *) a_sockaddr);
+               b_hash = iwpm_ipv4_jhash((struct sockaddr_in *) b_sockaddr);
 
-       } else if (local_sockaddr->ss_family == AF_INET6) {
-               local_hash = iwpm_ipv6_jhash((struct sockaddr_in6 *) local_sockaddr);
-               mapped_hash = iwpm_ipv6_jhash((struct sockaddr_in6 *) mapped_sockaddr);
+       } else if (a_sockaddr->ss_family == AF_INET6) {
+               a_hash = iwpm_ipv6_jhash((struct sockaddr_in6 *) a_sockaddr);
+               b_hash = iwpm_ipv6_jhash((struct sockaddr_in6 *) b_sockaddr);
        } else {
                pr_err("%s: Invalid sockaddr family\n", __func__);
-               return NULL;
+               return -EINVAL;
        }
 
-       if (local_hash == mapped_hash) /* if port mapper isn't available */
-               hash = local_hash;
+       if (a_hash == b_hash) /* if port mapper isn't available */
+               *hash = a_hash;
        else
-               hash = jhash_2words(local_hash, mapped_hash, 0);
+               *hash = jhash_2words(a_hash, b_hash, 0);
+       return 0;
+}
+
+static struct hlist_head *get_mapinfo_hash_bucket(struct sockaddr_storage
+                               *local_sockaddr, struct sockaddr_storage
+                               *mapped_sockaddr)
+{
+       u32 hash;
+       int ret;
 
-       return &iwpm_hash_bucket[hash & IWPM_HASH_BUCKET_MASK];
+       ret = get_hash_bucket(local_sockaddr, mapped_sockaddr, &hash);
+       if (ret)
+               return NULL;
+       return &iwpm_hash_bucket[hash & IWPM_MAPINFO_HASH_MASK];
+}
+
+static struct hlist_head *get_reminfo_hash_bucket(struct sockaddr_storage
+                               *mapped_loc_sockaddr, struct sockaddr_storage
+                               *mapped_rem_sockaddr)
+{
+       u32 hash;
+       int ret;
+
+       ret = get_hash_bucket(mapped_loc_sockaddr, mapped_rem_sockaddr, &hash);
+       if (ret)
+               return NULL;
+       return &iwpm_reminfo_bucket[hash & IWPM_REMINFO_HASH_MASK];
 }
 
 static int send_mapinfo_num(u32 mapping_num, u8 nl_client, int iwpm_pid)
@@ -512,7 +654,7 @@ int iwpm_send_mapinfo(u8 nl_client, int iwpm_pid)
        }
        skb_num++;
        spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
-       for (i = 0; i < IWPM_HASH_BUCKET_SIZE; i++) {
+       for (i = 0; i < IWPM_MAPINFO_HASH_SIZE; i++) {
                hlist_for_each_entry(map_info, &iwpm_hash_bucket[i],
                                     hlist_node) {
                        if (map_info->nl_client != nl_client)
@@ -595,7 +737,7 @@ int iwpm_mapinfo_available(void)
 
        spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
        if (iwpm_hash_bucket) {
-               for (i = 0; i < IWPM_HASH_BUCKET_SIZE; i++) {
+               for (i = 0; i < IWPM_MAPINFO_HASH_SIZE; i++) {
                        if (!hlist_empty(&iwpm_hash_bucket[i])) {
                                full_bucket = 1;
                                break;
index 9777c86..ee2d9ff 100644 (file)
@@ -76,6 +76,14 @@ struct iwpm_mapping_info {
        u8     nl_client;
 };
 
+struct iwpm_remote_info {
+       struct hlist_node hlist_node;
+       struct sockaddr_storage remote_sockaddr;
+       struct sockaddr_storage mapped_loc_sockaddr;
+       struct sockaddr_storage mapped_rem_sockaddr;
+       u8     nl_client;
+};
+
 struct iwpm_admin_data {
        atomic_t refcount;
        atomic_t nlmsg_seq;
@@ -128,6 +136,13 @@ int iwpm_wait_complete_req(struct iwpm_nlmsg_request *nlmsg_request);
 int iwpm_get_nlmsg_seq(void);
 
 /**
+ * iwpm_add_reminfo - Add remote address info of the connecting peer
+ *                    to the remote info hash table
+ * @reminfo: The remote info to be added
+ */
+void iwpm_add_remote_info(struct iwpm_remote_info *reminfo);
+
+/**
  * iwpm_valid_client - Check if the port mapper client is valid
  * @nl_client: The index of the netlink client
  *
index 8b8cc6f..40becdb 100644 (file)
@@ -446,7 +446,6 @@ static int ib_umem_odp_map_dma_single_page(
        int remove_existing_mapping = 0;
        int ret = 0;
 
-       mutex_lock(&umem->odp_data->umem_mutex);
        /*
         * Note: we avoid writing if seq is different from the initial seq, to
         * handle case of a racing notifier. This check also allows us to bail
@@ -479,8 +478,6 @@ static int ib_umem_odp_map_dma_single_page(
        }
 
 out:
-       mutex_unlock(&umem->odp_data->umem_mutex);
-
        /* On Demand Paging - avoid pinning the page */
        if (umem->context->invalidate_range || !stored_page)
                put_page(page);
@@ -586,6 +583,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
 
                bcnt -= min_t(size_t, npages << PAGE_SHIFT, bcnt);
                user_virt += npages << PAGE_SHIFT;
+               mutex_lock(&umem->odp_data->umem_mutex);
                for (j = 0; j < npages; ++j) {
                        ret = ib_umem_odp_map_dma_single_page(
                                umem, k, base_virt_addr, local_page_list[j],
@@ -594,6 +592,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
                                break;
                        k++;
                }
+               mutex_unlock(&umem->odp_data->umem_mutex);
 
                if (ret < 0) {
                        /* Release left over pages when handling errors. */
@@ -633,12 +632,11 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt,
         * faults from completion. We might be racing with other
         * invalidations, so we must make sure we free each page only
         * once. */
+       mutex_lock(&umem->odp_data->umem_mutex);
        for (addr = virt; addr < bound; addr += (u64)umem->page_size) {
                idx = (addr - ib_umem_start(umem)) / PAGE_SIZE;
-               mutex_lock(&umem->odp_data->umem_mutex);
                if (umem->odp_data->page_list[idx]) {
                        struct page *page = umem->odp_data->page_list[idx];
-                       struct page *head_page = compound_head(page);
                        dma_addr_t dma = umem->odp_data->dma_list[idx];
                        dma_addr_t dma_addr = dma & ODP_DMA_ADDR_MASK;
 
@@ -646,7 +644,8 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt,
 
                        ib_dma_unmap_page(dev, dma_addr, PAGE_SIZE,
                                          DMA_BIDIRECTIONAL);
-                       if (dma & ODP_WRITE_ALLOWED_BIT)
+                       if (dma & ODP_WRITE_ALLOWED_BIT) {
+                               struct page *head_page = compound_head(page);
                                /*
                                 * set_page_dirty prefers being called with
                                 * the page lock. However, MMU notifiers are
@@ -657,13 +656,14 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt,
                                 * be removed.
                                 */
                                set_page_dirty(head_page);
+                       }
                        /* on demand pinning support */
                        if (!umem->context->invalidate_range)
                                put_page(page);
                        umem->odp_data->page_list[idx] = NULL;
                        umem->odp_data->dma_list[idx] = 0;
                }
-               mutex_unlock(&umem->odp_data->umem_mutex);
        }
+       mutex_unlock(&umem->odp_data->umem_mutex);
 }
 EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages);
index 57176dd..3ad8dc7 100644 (file)
@@ -583,6 +583,22 @@ static void c4iw_record_pm_msg(struct c4iw_ep *ep,
                sizeof(ep->com.mapped_remote_addr));
 }
 
+static int get_remote_addr(struct c4iw_ep *parent_ep, struct c4iw_ep *child_ep)
+{
+       int ret;
+
+       print_addr(&parent_ep->com, __func__, "get_remote_addr parent_ep ");
+       print_addr(&child_ep->com, __func__, "get_remote_addr child_ep ");
+
+       ret = iwpm_get_remote_info(&parent_ep->com.mapped_local_addr,
+                                  &child_ep->com.mapped_remote_addr,
+                                  &child_ep->com.remote_addr, RDMA_NL_C4IW);
+       if (ret)
+               PDBG("Unable to find remote peer addr info - err %d\n", ret);
+
+       return ret;
+}
+
 static void best_mtu(const unsigned short *mtus, unsigned short mtu,
                     unsigned int *idx, int use_ts, int ipv6)
 {
@@ -675,7 +691,7 @@ static int send_connect(struct c4iw_ep *ep)
        if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
                opt2 |= T5_OPT_2_VALID_F;
                opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE);
-               opt2 |= CONG_CNTRL_VALID; /* OPT_2_ISS for T5 */
+               opt2 |= T5_ISS_F;
        }
        t4_set_arp_err_handler(skb, ep, act_open_req_arp_failure);
 
@@ -2042,9 +2058,12 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
             status, status2errno(status));
 
        if (is_neg_adv(status)) {
-               dev_warn(&dev->rdev.lldi.pdev->dev,
-                        "Connection problems for atid %u status %u (%s)\n",
-                        atid, status, neg_adv_str(status));
+               PDBG("%s Connection problems for atid %u status %u (%s)\n",
+                    __func__, atid, status, neg_adv_str(status));
+               ep->stats.connect_neg_adv++;
+               mutex_lock(&dev->rdev.stats.lock);
+               dev->rdev.stats.neg_adv++;
+               mutex_unlock(&dev->rdev.stats.lock);
                return 0;
        }
 
@@ -2214,7 +2233,7 @@ static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
                u32 isn = (prandom_u32() & ~7UL) - 1;
                opt2 |= T5_OPT_2_VALID_F;
                opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE);
-               opt2 |= CONG_CNTRL_VALID; /* OPT_2_ISS for T5 */
+               opt2 |= T5_ISS_F;
                rpl5 = (void *)rpl;
                memset(&rpl5->iss, 0, roundup(sizeof(*rpl5)-sizeof(*rpl), 16));
                if (peer2peer)
@@ -2352,27 +2371,57 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
        state_set(&child_ep->com, CONNECTING);
        child_ep->com.dev = dev;
        child_ep->com.cm_id = NULL;
+
+       /*
+        * The mapped_local and mapped_remote addresses get setup with
+        * the actual 4-tuple.  The local address will be based on the
+        * actual local address of the connection, but on the port number
+        * of the parent listening endpoint.  The remote address is
+        * setup based on a query to the IWPM since we don't know what it
+        * originally was before mapping.  If no mapping was done, then
+        * mapped_remote == remote, and mapped_local == local.
+        */
        if (iptype == 4) {
                struct sockaddr_in *sin = (struct sockaddr_in *)
-                       &child_ep->com.local_addr;
+                       &child_ep->com.mapped_local_addr;
+
                sin->sin_family = PF_INET;
                sin->sin_port = local_port;
                sin->sin_addr.s_addr = *(__be32 *)local_ip;
-               sin = (struct sockaddr_in *)&child_ep->com.remote_addr;
+
+               sin = (struct sockaddr_in *)&child_ep->com.local_addr;
+               sin->sin_family = PF_INET;
+               sin->sin_port = ((struct sockaddr_in *)
+                                &parent_ep->com.local_addr)->sin_port;
+               sin->sin_addr.s_addr = *(__be32 *)local_ip;
+
+               sin = (struct sockaddr_in *)&child_ep->com.mapped_remote_addr;
                sin->sin_family = PF_INET;
                sin->sin_port = peer_port;
                sin->sin_addr.s_addr = *(__be32 *)peer_ip;
        } else {
                struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)
-                       &child_ep->com.local_addr;
+                       &child_ep->com.mapped_local_addr;
+
                sin6->sin6_family = PF_INET6;
                sin6->sin6_port = local_port;
                memcpy(sin6->sin6_addr.s6_addr, local_ip, 16);
-               sin6 = (struct sockaddr_in6 *)&child_ep->com.remote_addr;
+
+               sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr;
+               sin6->sin6_family = PF_INET6;
+               sin6->sin6_port = ((struct sockaddr_in6 *)
+                                  &parent_ep->com.local_addr)->sin6_port;
+               memcpy(sin6->sin6_addr.s6_addr, local_ip, 16);
+
+               sin6 = (struct sockaddr_in6 *)&child_ep->com.mapped_remote_addr;
                sin6->sin6_family = PF_INET6;
                sin6->sin6_port = peer_port;
                memcpy(sin6->sin6_addr.s6_addr, peer_ip, 16);
        }
+       memcpy(&child_ep->com.remote_addr, &child_ep->com.mapped_remote_addr,
+              sizeof(child_ep->com.remote_addr));
+       get_remote_addr(parent_ep, child_ep);
+
        c4iw_get_ep(&parent_ep->com);
        child_ep->parent_ep = parent_ep;
        child_ep->tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid));
@@ -2520,9 +2569,13 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
 
        ep = lookup_tid(t, tid);
        if (is_neg_adv(req->status)) {
-               dev_warn(&dev->rdev.lldi.pdev->dev,
-                        "Negative advice on abort - tid %u status %d (%s)\n",
-                        ep->hwtid, req->status, neg_adv_str(req->status));
+               PDBG("%s Negative advice on abort- tid %u status %d (%s)\n",
+                    __func__, ep->hwtid, req->status,
+                    neg_adv_str(req->status));
+               ep->stats.abort_neg_adv++;
+               mutex_lock(&dev->rdev.stats.lock);
+               dev->rdev.stats.neg_adv++;
+               mutex_unlock(&dev->rdev.stats.lock);
                return 0;
        }
        PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid,
@@ -3571,7 +3624,7 @@ static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb,
         * TP will ignore any value > 0 for MSS index.
         */
        req->tcb.opt0 = cpu_to_be64(MSS_IDX_V(0xF));
-       req->cookie = (unsigned long)skb;
+       req->cookie = (uintptr_t)skb;
 
        set_wr_txq(req_skb, CPL_PRIORITY_CONTROL, port_id);
        ret = cxgb4_ofld_send(dev->rdev.lldi.ports[0], req_skb);
@@ -3931,9 +3984,11 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb)
                return 0;
        }
        if (is_neg_adv(req->status)) {
-               dev_warn(&dev->rdev.lldi.pdev->dev,
-                        "Negative advice on abort - tid %u status %d (%s)\n",
-                        ep->hwtid, req->status, neg_adv_str(req->status));
+               PDBG("%s Negative advice on abort- tid %u status %d (%s)\n",
+                    __func__, ep->hwtid, req->status,
+                    neg_adv_str(req->status));
+               ep->stats.abort_neg_adv++;
+               dev->rdev.stats.neg_adv++;
                kfree_skb(skb);
                return 0;
        }
index ab7692a..68ddb37 100644 (file)
@@ -55,7 +55,7 @@ static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
                        FW_RI_RES_WR_NRES_V(1) |
                        FW_WR_COMPL_F);
        res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
-       res_wr->cookie = (unsigned long) &wr_wait;
+       res_wr->cookie = (uintptr_t)&wr_wait;
        res = res_wr->res;
        res->u.cq.restype = FW_RI_RES_TYPE_CQ;
        res->u.cq.op = FW_RI_RES_OP_RESET;
@@ -125,7 +125,7 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
                        FW_RI_RES_WR_NRES_V(1) |
                        FW_WR_COMPL_F);
        res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
-       res_wr->cookie = (unsigned long) &wr_wait;
+       res_wr->cookie = (uintptr_t)&wr_wait;
        res = res_wr->res;
        res->u.cq.restype = FW_RI_RES_TYPE_CQ;
        res->u.cq.op = FW_RI_RES_OP_WRITE;
@@ -156,12 +156,19 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
                goto err4;
 
        cq->gen = 1;
-       cq->gts = rdev->lldi.gts_reg;
        cq->rdev = rdev;
        if (user) {
-               cq->ugts = (u64)pci_resource_start(rdev->lldi.pdev, 2) +
-                                       (cq->cqid << rdev->cqshift);
-               cq->ugts &= PAGE_MASK;
+               u32 off = (cq->cqid << rdev->cqshift) & PAGE_MASK;
+
+               cq->ugts = (u64)rdev->bar2_pa + off;
+       } else if (is_t4(rdev->lldi.adapter_type)) {
+               cq->gts = rdev->lldi.gts_reg;
+               cq->qid_mask = -1U;
+       } else {
+               u32 off = ((cq->cqid << rdev->cqshift) & PAGE_MASK) + 12;
+
+               cq->gts = rdev->bar2_kva + off;
+               cq->qid_mask = rdev->qpmask;
        }
        return 0;
 err4:
@@ -970,8 +977,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries,
        }
        PDBG("%s cqid 0x%0x chp %p size %u memsize %zu, dma_addr 0x%0llx\n",
             __func__, chp->cq.cqid, chp, chp->cq.size,
-            chp->cq.memsize,
-            (unsigned long long) chp->cq.dma_addr);
+            chp->cq.memsize, (unsigned long long) chp->cq.dma_addr);
        return &chp->ibcq;
 err5:
        kfree(mm2);
index 8fb295e..7e895d7 100644 (file)
@@ -93,6 +93,7 @@ static struct ibnl_client_cbs c4iw_nl_cb_table[] = {
        [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb},
        [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb},
        [RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb},
+       [RDMA_NL_IWPM_REMOTE_INFO] = {.dump = iwpm_remote_info_cb},
        [RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb},
        [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb}
 };
@@ -151,7 +152,7 @@ static int wr_log_show(struct seq_file *seq, void *v)
        int prev_ts_set = 0;
        int idx, end;
 
-#define ts2ns(ts) div64_ul((ts) * dev->rdev.lldi.cclk_ps, 1000)
+#define ts2ns(ts) div64_u64((ts) * dev->rdev.lldi.cclk_ps, 1000)
 
        idx = atomic_read(&dev->rdev.wr_log_idx) &
                (dev->rdev.wr_log_size - 1);
@@ -489,6 +490,7 @@ static int stats_show(struct seq_file *seq, void *v)
                   dev->rdev.stats.act_ofld_conn_fails);
        seq_printf(seq, "PAS_OFLD_CONN_FAILS: %10llu\n",
                   dev->rdev.stats.pas_ofld_conn_fails);
+       seq_printf(seq, "NEG_ADV_RCVD: %10llu\n", dev->rdev.stats.neg_adv);
        seq_printf(seq, "AVAILABLE IRD: %10u\n", dev->avail_ird);
        return 0;
 }
@@ -560,10 +562,13 @@ static int dump_ep(int id, void *p, void *data)
                cc = snprintf(epd->buf + epd->pos, space,
                              "ep %p cm_id %p qp %p state %d flags 0x%lx "
                              "history 0x%lx hwtid %d atid %d "
+                             "conn_na %u abort_na %u "
                              "%pI4:%d/%d <-> %pI4:%d/%d\n",
                              ep, ep->com.cm_id, ep->com.qp,
                              (int)ep->com.state, ep->com.flags,
                              ep->com.history, ep->hwtid, ep->atid,
+                             ep->stats.connect_neg_adv,
+                             ep->stats.abort_neg_adv,
                              &lsin->sin_addr, ntohs(lsin->sin_port),
                              ntohs(mapped_lsin->sin_port),
                              &rsin->sin_addr, ntohs(rsin->sin_port),
@@ -581,10 +586,13 @@ static int dump_ep(int id, void *p, void *data)
                cc = snprintf(epd->buf + epd->pos, space,
                              "ep %p cm_id %p qp %p state %d flags 0x%lx "
                              "history 0x%lx hwtid %d atid %d "
+                             "conn_na %u abort_na %u "
                              "%pI6:%d/%d <-> %pI6:%d/%d\n",
                              ep, ep->com.cm_id, ep->com.qp,
                              (int)ep->com.state, ep->com.flags,
                              ep->com.history, ep->hwtid, ep->atid,
+                             ep->stats.connect_neg_adv,
+                             ep->stats.abort_neg_adv,
                              &lsin6->sin6_addr, ntohs(lsin6->sin6_port),
                              ntohs(mapped_lsin6->sin6_port),
                              &rsin6->sin6_addr, ntohs(rsin6->sin6_port),
@@ -765,6 +773,29 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
        c4iw_init_dev_ucontext(rdev, &rdev->uctx);
 
        /*
+        * This implementation assumes udb_density == ucq_density!  Eventually
+        * we might need to support this but for now fail the open. Also the
+        * cqid and qpid range must match for now.
+        */
+       if (rdev->lldi.udb_density != rdev->lldi.ucq_density) {
+               pr_err(MOD "%s: unsupported udb/ucq densities %u/%u\n",
+                      pci_name(rdev->lldi.pdev), rdev->lldi.udb_density,
+                      rdev->lldi.ucq_density);
+               err = -EINVAL;
+               goto err1;
+       }
+       if (rdev->lldi.vr->qp.start != rdev->lldi.vr->cq.start ||
+           rdev->lldi.vr->qp.size != rdev->lldi.vr->cq.size) {
+               pr_err(MOD "%s: unsupported qp and cq id ranges "
+                      "qp start %u size %u cq start %u size %u\n",
+                      pci_name(rdev->lldi.pdev), rdev->lldi.vr->qp.start,
+                      rdev->lldi.vr->qp.size, rdev->lldi.vr->cq.size,
+                      rdev->lldi.vr->cq.size);
+               err = -EINVAL;
+               goto err1;
+       }
+
+       /*
         * qpshift is the number of bits to shift the qpid left in order
         * to get the correct address of the doorbell for that qp.
         */
@@ -784,10 +815,10 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
             rdev->lldi.vr->qp.size,
             rdev->lldi.vr->cq.start,
             rdev->lldi.vr->cq.size);
-       PDBG("udb len 0x%x udb base %llx db_reg %p gts_reg %p qpshift %lu "
+       PDBG("udb len 0x%x udb base %p db_reg %p gts_reg %p qpshift %lu "
             "qpmask 0x%x cqshift %lu cqmask 0x%x\n",
             (unsigned)pci_resource_len(rdev->lldi.pdev, 2),
-            (u64)pci_resource_start(rdev->lldi.pdev, 2),
+            (void *)pci_resource_start(rdev->lldi.pdev, 2),
             rdev->lldi.db_reg,
             rdev->lldi.gts_reg,
             rdev->qpshift, rdev->qpmask,
@@ -1355,7 +1386,7 @@ static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list)
                                          t4_sq_host_wq_pidx(&qp->wq),
                                          t4_sq_wq_size(&qp->wq));
                if (ret) {
-                       pr_err(KERN_ERR MOD "%s: Fatal error - "
+                       pr_err(MOD "%s: Fatal error - "
                               "DB overflow recovery failed - "
                               "error syncing SQ qid %u\n",
                               pci_name(ctx->lldi.pdev), qp->wq.sq.qid);
@@ -1371,7 +1402,7 @@ static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list)
                                          t4_rq_wq_size(&qp->wq));
 
                if (ret) {
-                       pr_err(KERN_ERR MOD "%s: Fatal error - "
+                       pr_err(MOD "%s: Fatal error - "
                               "DB overflow recovery failed - "
                               "error syncing RQ qid %u\n",
                               pci_name(ctx->lldi.pdev), qp->wq.rq.qid);
index d87e165..97bb555 100644 (file)
@@ -137,6 +137,7 @@ struct c4iw_stats {
        u64  tcam_full;
        u64  act_ofld_conn_fails;
        u64  pas_ofld_conn_fails;
+       u64  neg_adv;
 };
 
 struct c4iw_hw_queue {
@@ -814,6 +815,11 @@ struct c4iw_listen_ep {
        int backlog;
 };
 
+struct c4iw_ep_stats {
+       unsigned connect_neg_adv;
+       unsigned abort_neg_adv;
+};
+
 struct c4iw_ep {
        struct c4iw_ep_common com;
        struct c4iw_ep *parent_ep;
@@ -846,6 +852,7 @@ struct c4iw_ep {
        unsigned int retry_count;
        int snd_win;
        int rcv_win;
+       struct c4iw_ep_stats stats;
 };
 
 static inline void print_addr(struct c4iw_ep_common *epc, const char *func,
index 3ef0cf9..cff815b 100644 (file)
@@ -144,7 +144,7 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len,
                if (i == (num_wqe-1)) {
                        req->wr.wr_hi = cpu_to_be32(FW_WR_OP_V(FW_ULPTX_WR) |
                                                    FW_WR_COMPL_F);
-                       req->wr.wr_lo = (__force __be64)(unsigned long) &wr_wait;
+                       req->wr.wr_lo = (__force __be64)&wr_wait;
                } else
                        req->wr.wr_hi = cpu_to_be32(FW_WR_OP_V(FW_ULPTX_WR));
                req->wr.wr_mid = cpu_to_be32(
@@ -676,12 +676,12 @@ struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc)
        mhp->attr.zbva = 0;
        mhp->attr.va_fbo = 0;
        mhp->attr.page_size = 0;
-       mhp->attr.len = ~0UL;
+       mhp->attr.len = ~0ULL;
        mhp->attr.pbl_size = 0;
 
        ret = write_tpt_entry(&rhp->rdev, 0, &stag, 1, php->pdid,
                              FW_RI_STAG_NSMR, mhp->attr.perms,
-                             mhp->attr.mw_bind_enable, 0, 0, ~0UL, 0, 0, 0);
+                             mhp->attr.mw_bind_enable, 0, 0, ~0ULL, 0, 0, 0);
        if (ret)
                goto err1;
 
index 15cae5a..389ced3 100644 (file)
@@ -275,7 +275,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
                        FW_RI_RES_WR_NRES_V(2) |
                        FW_WR_COMPL_F);
        res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
-       res_wr->cookie = (unsigned long) &wr_wait;
+       res_wr->cookie = (uintptr_t)&wr_wait;
        res = res_wr->res;
        res->u.sqrq.restype = FW_RI_RES_TYPE_SQ;
        res->u.sqrq.op = FW_RI_RES_OP_WRITE;
@@ -1209,7 +1209,7 @@ static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
        wqe->flowid_len16 = cpu_to_be32(
                FW_WR_FLOWID_V(ep->hwtid) |
                FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*wqe), 16)));
-       wqe->cookie = (unsigned long) &ep->com.wr_wait;
+       wqe->cookie = (uintptr_t)&ep->com.wr_wait;
 
        wqe->u.fini.type = FW_RI_TYPE_FINI;
        ret = c4iw_ofld_send(&rhp->rdev, skb);
@@ -1279,7 +1279,7 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp)
                FW_WR_FLOWID_V(qhp->ep->hwtid) |
                FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*wqe), 16)));
 
-       wqe->cookie = (unsigned long) &qhp->ep->com.wr_wait;
+       wqe->cookie = (uintptr_t)&qhp->ep->com.wr_wait;
 
        wqe->u.init.type = FW_RI_TYPE_INIT;
        wqe->u.init.mpareqbit_p2ptype =
@@ -1766,11 +1766,11 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
                mm2->len = PAGE_ALIGN(qhp->wq.rq.memsize);
                insert_mmap(ucontext, mm2);
                mm3->key = uresp.sq_db_gts_key;
-               mm3->addr = (__force unsigned long) qhp->wq.sq.udb;
+               mm3->addr = (__force unsigned long)qhp->wq.sq.udb;
                mm3->len = PAGE_SIZE;
                insert_mmap(ucontext, mm3);
                mm4->key = uresp.rq_db_gts_key;
-               mm4->addr = (__force unsigned long) qhp->wq.rq.udb;
+               mm4->addr = (__force unsigned long)qhp->wq.rq.udb;
                mm4->len = PAGE_SIZE;
                insert_mmap(ucontext, mm4);
                if (mm5) {
index 871cdca..7f2a6c2 100644 (file)
@@ -539,6 +539,7 @@ struct t4_cq {
        size_t memsize;
        __be64 bits_type_ts;
        u32 cqid;
+       u32 qid_mask;
        int vector;
        u16 size; /* including status page */
        u16 cidx;
@@ -563,12 +564,12 @@ static inline int t4_arm_cq(struct t4_cq *cq, int se)
        set_bit(CQ_ARMED, &cq->flags);
        while (cq->cidx_inc > CIDXINC_M) {
                val = SEINTARM_V(0) | CIDXINC_V(CIDXINC_M) | TIMERREG_V(7) |
-                     INGRESSQID_V(cq->cqid);
+                     INGRESSQID_V(cq->cqid & cq->qid_mask);
                writel(val, cq->gts);
                cq->cidx_inc -= CIDXINC_M;
        }
        val = SEINTARM_V(se) | CIDXINC_V(cq->cidx_inc) | TIMERREG_V(6) |
-             INGRESSQID_V(cq->cqid);
+             INGRESSQID_V(cq->cqid & cq->qid_mask);
        writel(val, cq->gts);
        cq->cidx_inc = 0;
        return 0;
@@ -601,7 +602,7 @@ static inline void t4_hwcq_consume(struct t4_cq *cq)
                u32 val;
 
                val = SEINTARM_V(0) | CIDXINC_V(cq->cidx_inc) | TIMERREG_V(7) |
-                     INGRESSQID_V(cq->cqid);
+                     INGRESSQID_V(cq->cqid & cq->qid_mask);
                writel(val, cq->gts);
                cq->cidx_inc = 0;
        }
index 5e53327..343e8da 100644 (file)
@@ -848,6 +848,8 @@ enum {                     /* TCP congestion control algorithms */
 #define CONG_CNTRL_V(x) ((x) << CONG_CNTRL_S)
 #define CONG_CNTRL_G(x) (((x) >> CONG_CNTRL_S) & CONG_CNTRL_M)
 
-#define CONG_CNTRL_VALID   (1 << 18)
+#define T5_ISS_S    18
+#define T5_ISS_V(x) ((x) << T5_ISS_S)
+#define T5_ISS_F    T5_ISS_V(1U)
 
 #endif /* _T4FW_RI_API_H_ */
index 120aedf..cec1815 100644 (file)
@@ -77,7 +77,7 @@ int ehca_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
                return -EINVAL;
        }
 
-       memcpy(&my_gid.raw, gid->raw, sizeof(union ib_gid));
+       memcpy(&my_gid, gid->raw, sizeof(union ib_gid));
 
        subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix);
        interface_id = be64_to_cpu(my_gid.global.interface_id);
@@ -114,7 +114,7 @@ int ehca_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
                return -EINVAL;
        }
 
-       memcpy(&my_gid.raw, gid->raw, sizeof(union ib_gid));
+       memcpy(&my_gid, gid->raw, sizeof(union ib_gid));
 
        subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix);
        interface_id = be64_to_cpu(my_gid.global.interface_id);
index 57070c5..cc64400 100644 (file)
@@ -1569,8 +1569,7 @@ static void reset_gids_task(struct work_struct *work)
                               MLX4_CMD_TIME_CLASS_B,
                               MLX4_CMD_WRAPPED);
                if (err)
-                       pr_warn(KERN_WARNING
-                               "set port %d command failed\n", gw->port);
+                       pr_warn("set port %d command failed\n", gw->port);
        }
 
        mlx4_free_cmd_mailbox(dev, mailbox);
index 4d7024b..d35f62d 100644 (file)
@@ -1392,7 +1392,7 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah,
 
        if (ah->ah_flags & IB_AH_GRH) {
                if (ah->grh.sgid_index >= gen->port[port - 1].gid_table_len) {
-                       pr_err(KERN_ERR "sgid_index (%u) too large. max is %d\n",
+                       pr_err("sgid_index (%u) too large. max is %d\n",
                               ah->grh.sgid_index, gen->port[port - 1].gid_table_len);
                        return -EINVAL;
                }
index 3b2a6dc..9f9d5c5 100644 (file)
@@ -116,6 +116,7 @@ static struct ibnl_client_cbs nes_nl_cb_table[] = {
        [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb},
        [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb},
        [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb},
+       [RDMA_NL_IWPM_REMOTE_INFO] = {.dump = iwpm_remote_info_cb},
        [RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb},
        [RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb},
        [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb}
index 6f09a72..72b4341 100644 (file)
@@ -596,27 +596,52 @@ static void nes_form_reg_msg(struct nes_vnic *nesvnic,
        memcpy(pm_msg->if_name, nesvnic->netdev->name, IWPM_IFNAME_SIZE);
 }
 
+static void record_sockaddr_info(struct sockaddr_storage *addr_info,
+                                       nes_addr_t *ip_addr, u16 *port_num)
+{
+       struct sockaddr_in *in_addr = (struct sockaddr_in *)addr_info;
+
+       if (in_addr->sin_family == AF_INET) {
+               *ip_addr = ntohl(in_addr->sin_addr.s_addr);
+               *port_num = ntohs(in_addr->sin_port);
+       }
+}
+
 /*
  * nes_record_pm_msg - Save the received mapping info
  */
 static void nes_record_pm_msg(struct nes_cm_info *cm_info,
                        struct iwpm_sa_data *pm_msg)
 {
-       struct sockaddr_in *mapped_loc_addr =
-                       (struct sockaddr_in *)&pm_msg->mapped_loc_addr;
-       struct sockaddr_in *mapped_rem_addr =
-                       (struct sockaddr_in *)&pm_msg->mapped_rem_addr;
-
-       if (mapped_loc_addr->sin_family == AF_INET) {
-               cm_info->mapped_loc_addr =
-                       ntohl(mapped_loc_addr->sin_addr.s_addr);
-               cm_info->mapped_loc_port = ntohs(mapped_loc_addr->sin_port);
-       }
-       if (mapped_rem_addr->sin_family == AF_INET) {
-               cm_info->mapped_rem_addr =
-                       ntohl(mapped_rem_addr->sin_addr.s_addr);
-               cm_info->mapped_rem_port = ntohs(mapped_rem_addr->sin_port);
-       }
+       record_sockaddr_info(&pm_msg->mapped_loc_addr,
+               &cm_info->mapped_loc_addr, &cm_info->mapped_loc_port);
+
+       record_sockaddr_info(&pm_msg->mapped_rem_addr,
+               &cm_info->mapped_rem_addr, &cm_info->mapped_rem_port);
+}
+
+/*
+ * nes_get_reminfo - Get the address info of the remote connecting peer
+ */
+static int nes_get_remote_addr(struct nes_cm_node *cm_node)
+{
+       struct sockaddr_storage mapped_loc_addr, mapped_rem_addr;
+       struct sockaddr_storage remote_addr;
+       int ret;
+
+       nes_create_sockaddr(htonl(cm_node->mapped_loc_addr),
+                       htons(cm_node->mapped_loc_port), &mapped_loc_addr);
+       nes_create_sockaddr(htonl(cm_node->mapped_rem_addr),
+                       htons(cm_node->mapped_rem_port), &mapped_rem_addr);
+
+       ret = iwpm_get_remote_info(&mapped_loc_addr, &mapped_rem_addr,
+                               &remote_addr, RDMA_NL_NES);
+       if (ret)
+               nes_debug(NES_DBG_CM, "Unable to find remote peer address info\n");
+       else
+               record_sockaddr_info(&remote_addr, &cm_node->rem_addr,
+                               &cm_node->rem_port);
+       return ret;
 }
 
 /**
@@ -1566,9 +1591,14 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
                return NULL;
 
        /* set our node specific transport info */
-       cm_node->loc_addr = cm_info->loc_addr;
+       if (listener) {
+               cm_node->loc_addr = listener->loc_addr;
+               cm_node->loc_port = listener->loc_port;
+       } else {
+               cm_node->loc_addr = cm_info->loc_addr;
+               cm_node->loc_port = cm_info->loc_port;
+       }
        cm_node->rem_addr = cm_info->rem_addr;
-       cm_node->loc_port = cm_info->loc_port;
        cm_node->rem_port = cm_info->rem_port;
 
        cm_node->mapped_loc_addr = cm_info->mapped_loc_addr;
@@ -2151,6 +2181,7 @@ static int handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
                cm_node->state = NES_CM_STATE_ESTABLISHED;
                if (datasize) {
                        cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
+                       nes_get_remote_addr(cm_node);
                        handle_rcv_mpa(cm_node, skb);
                } else { /* rcvd ACK only */
                        dev_kfree_skb_any(skb);
index c9780d9..b396344 100644 (file)
@@ -40,7 +40,7 @@
 #include <be_roce.h>
 #include "ocrdma_sli.h"
 
-#define OCRDMA_ROCE_DRV_VERSION "10.4.205.0u"
+#define OCRDMA_ROCE_DRV_VERSION "10.6.0.0"
 
 #define OCRDMA_ROCE_DRV_DESC "Emulex OneConnect RoCE Driver"
 #define OCRDMA_NODE_DESC "Emulex OneConnect RoCE HCA"
@@ -515,6 +515,8 @@ static inline int ocrdma_resolve_dmac(struct ocrdma_dev *dev,
        memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(in6));
        if (rdma_is_multicast_addr(&in6))
                rdma_get_mcast_mac(&in6, mac_addr);
+       else if (rdma_link_local_addr(&in6))
+               rdma_get_ll_mac(&in6, mac_addr);
        else
                memcpy(mac_addr, ah_attr->dmac, ETH_ALEN);
        return 0;
index d812904..f5a5ea8 100644 (file)
@@ -56,7 +56,13 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
        vlan_tag = attr->vlan_id;
        if (!vlan_tag || (vlan_tag > 0xFFF))
                vlan_tag = dev->pvid;
-       if (vlan_tag && (vlan_tag < 0x1000)) {
+       if (vlan_tag || dev->pfc_state) {
+               if (!vlan_tag) {
+                       pr_err("ocrdma%d:Using VLAN with PFC is recommended\n",
+                               dev->id);
+                       pr_err("ocrdma%d:Using VLAN 0 for this connection\n",
+                               dev->id);
+               }
                eth.eth_type = cpu_to_be16(0x8100);
                eth.roce_eth_type = cpu_to_be16(OCRDMA_ROCE_ETH_TYPE);
                vlan_tag |= (dev->sl & 0x07) << OCRDMA_VID_PCP_SHIFT;
@@ -121,7 +127,9 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
                goto av_conf_err;
        }
 
-       if (pd->uctx) {
+       if ((pd->uctx) &&
+           (!rdma_is_multicast_addr((struct in6_addr *)attr->grh.dgid.raw)) &&
+           (!rdma_link_local_addr((struct in6_addr *)attr->grh.dgid.raw))) {
                status = rdma_addr_find_dmac_by_grh(&sgid, &attr->grh.dgid,
                                         attr->dmac, &attr->vlan_id);
                if (status) {
index 0c9e959..47615ff 100644 (file)
@@ -933,12 +933,18 @@ static irqreturn_t ocrdma_irq_handler(int irq, void *handle)
        struct ocrdma_eqe eqe;
        struct ocrdma_eqe *ptr;
        u16 cq_id;
+       u8 mcode;
        int budget = eq->cq_cnt;
 
        do {
                ptr = ocrdma_get_eqe(eq);
                eqe = *ptr;
                ocrdma_le32_to_cpu(&eqe, sizeof(eqe));
+               mcode = (eqe.id_valid & OCRDMA_EQE_MAJOR_CODE_MASK)
+                               >> OCRDMA_EQE_MAJOR_CODE_SHIFT;
+               if (mcode == OCRDMA_MAJOR_CODE_SENTINAL)
+                       pr_err("EQ full on eqid = 0x%x, eqe = 0x%x\n",
+                              eq->q.id, eqe.id_valid);
                if ((eqe.id_valid & OCRDMA_EQE_VALID_MASK) == 0)
                        break;
 
@@ -1434,27 +1440,30 @@ static int ocrdma_mbx_alloc_pd_range(struct ocrdma_dev *dev)
        struct ocrdma_alloc_pd_range_rsp *rsp;
 
        /* Pre allocate the DPP PDs */
-       cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_ALLOC_PD_RANGE, sizeof(*cmd));
-       if (!cmd)
-               return -ENOMEM;
-       cmd->pd_count = dev->attr.max_dpp_pds;
-       cmd->enable_dpp_rsvd |= OCRDMA_ALLOC_PD_ENABLE_DPP;
-       status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
-       if (status)
-               goto mbx_err;
-       rsp = (struct ocrdma_alloc_pd_range_rsp *)cmd;
-
-       if ((rsp->dpp_page_pdid & OCRDMA_ALLOC_PD_RSP_DPP) && rsp->pd_count) {
-               dev->pd_mgr->dpp_page_index = rsp->dpp_page_pdid >>
-                               OCRDMA_ALLOC_PD_RSP_DPP_PAGE_SHIFT;
-               dev->pd_mgr->pd_dpp_start = rsp->dpp_page_pdid &
-                               OCRDMA_ALLOC_PD_RNG_RSP_START_PDID_MASK;
-               dev->pd_mgr->max_dpp_pd = rsp->pd_count;
-               pd_bitmap_size = BITS_TO_LONGS(rsp->pd_count) * sizeof(long);
-               dev->pd_mgr->pd_dpp_bitmap = kzalloc(pd_bitmap_size,
-                                                    GFP_KERNEL);
+       if (dev->attr.max_dpp_pds) {
+               cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_ALLOC_PD_RANGE,
+                                         sizeof(*cmd));
+               if (!cmd)
+                       return -ENOMEM;
+               cmd->pd_count = dev->attr.max_dpp_pds;
+               cmd->enable_dpp_rsvd |= OCRDMA_ALLOC_PD_ENABLE_DPP;
+               status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+               rsp = (struct ocrdma_alloc_pd_range_rsp *)cmd;
+
+               if (!status && (rsp->dpp_page_pdid & OCRDMA_ALLOC_PD_RSP_DPP) &&
+                   rsp->pd_count) {
+                       dev->pd_mgr->dpp_page_index = rsp->dpp_page_pdid >>
+                                       OCRDMA_ALLOC_PD_RSP_DPP_PAGE_SHIFT;
+                       dev->pd_mgr->pd_dpp_start = rsp->dpp_page_pdid &
+                                       OCRDMA_ALLOC_PD_RNG_RSP_START_PDID_MASK;
+                       dev->pd_mgr->max_dpp_pd = rsp->pd_count;
+                       pd_bitmap_size =
+                               BITS_TO_LONGS(rsp->pd_count) * sizeof(long);
+                       dev->pd_mgr->pd_dpp_bitmap = kzalloc(pd_bitmap_size,
+                                                            GFP_KERNEL);
+               }
+               kfree(cmd);
        }
-       kfree(cmd);
 
        cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_ALLOC_PD_RANGE, sizeof(*cmd));
        if (!cmd)
@@ -1462,10 +1471,8 @@ static int ocrdma_mbx_alloc_pd_range(struct ocrdma_dev *dev)
 
        cmd->pd_count = dev->attr.max_pd - dev->attr.max_dpp_pds;
        status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
-       if (status)
-               goto mbx_err;
        rsp = (struct ocrdma_alloc_pd_range_rsp *)cmd;
-       if (rsp->pd_count) {
+       if (!status && rsp->pd_count) {
                dev->pd_mgr->pd_norm_start = rsp->dpp_page_pdid &
                                        OCRDMA_ALLOC_PD_RNG_RSP_START_PDID_MASK;
                dev->pd_mgr->max_normal_pd = rsp->pd_count;
@@ -1473,15 +1480,13 @@ static int ocrdma_mbx_alloc_pd_range(struct ocrdma_dev *dev)
                dev->pd_mgr->pd_norm_bitmap = kzalloc(pd_bitmap_size,
                                                      GFP_KERNEL);
        }
+       kfree(cmd);
 
        if (dev->pd_mgr->pd_norm_bitmap || dev->pd_mgr->pd_dpp_bitmap) {
                /* Enable PD resource manager */
                dev->pd_mgr->pd_prealloc_valid = true;
-       } else {
-               return -ENOMEM;
+               return 0;
        }
-mbx_err:
-       kfree(cmd);
        return status;
 }
 
@@ -2406,7 +2411,7 @@ int ocrdma_mbx_query_qp(struct ocrdma_dev *dev, struct ocrdma_qp *qp,
        struct ocrdma_query_qp *cmd;
        struct ocrdma_query_qp_rsp *rsp;
 
-       cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_QUERY_QP, sizeof(*cmd));
+       cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_QUERY_QP, sizeof(*rsp));
        if (!cmd)
                return status;
        cmd->qp_id = qp->id;
@@ -2428,7 +2433,7 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
        int status;
        struct ib_ah_attr *ah_attr = &attrs->ah_attr;
        union ib_gid sgid, zgid;
-       u32 vlan_id;
+       u32 vlan_id = 0xFFFF;
        u8 mac_addr[6];
        struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device);
 
@@ -2468,12 +2473,22 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
        cmd->params.vlan_dmac_b4_to_b5 = mac_addr[4] | (mac_addr[5] << 8);
        if (attr_mask & IB_QP_VID) {
                vlan_id = attrs->vlan_id;
+       } else if (dev->pfc_state) {
+               vlan_id = 0;
+               pr_err("ocrdma%d:Using VLAN with PFC is recommended\n",
+                       dev->id);
+               pr_err("ocrdma%d:Using VLAN 0 for this connection\n",
+                       dev->id);
+       }
+
+       if (vlan_id < 0x1000) {
                cmd->params.vlan_dmac_b4_to_b5 |=
                    vlan_id << OCRDMA_QP_PARAMS_VLAN_SHIFT;
                cmd->flags |= OCRDMA_QP_PARA_VLAN_EN_VALID;
                cmd->params.rnt_rc_sl_fl |=
                        (dev->sl & 0x07) << OCRDMA_QP_PARAMS_SL_SHIFT;
        }
+
        return 0;
 }
 
@@ -2519,8 +2534,10 @@ static int ocrdma_set_qp_params(struct ocrdma_qp *qp,
                cmd->flags |= OCRDMA_QP_PARA_DST_QPN_VALID;
        }
        if (attr_mask & IB_QP_PATH_MTU) {
-               if (attrs->path_mtu < IB_MTU_256 ||
+               if (attrs->path_mtu < IB_MTU_512 ||
                    attrs->path_mtu > IB_MTU_4096) {
+                       pr_err("ocrdma%d: IB MTU %d is not supported\n",
+                              dev->id, ib_mtu_enum_to_int(attrs->path_mtu));
                        status = -EINVAL;
                        goto pmtu_err;
                }
@@ -3147,9 +3164,9 @@ void ocrdma_cleanup_hw(struct ocrdma_dev *dev)
        ocrdma_free_pd_pool(dev);
        ocrdma_mbx_delete_ah_tbl(dev);
 
-       /* cleanup the eqs */
-       ocrdma_destroy_eqs(dev);
-
        /* cleanup the control path */
        ocrdma_destroy_mq(dev);
+
+       /* cleanup the eqs */
+       ocrdma_destroy_eqs(dev);
 }
index 243c87c..02ad0ae 100644 (file)
@@ -1176,6 +1176,8 @@ struct ocrdma_query_qp_rsp {
        struct ocrdma_mqe_hdr hdr;
        struct ocrdma_mbx_rsp rsp;
        struct ocrdma_qp_params params;
+       u32 dpp_credits_cqid;
+       u32 rbq_id;
 };
 
 enum {
@@ -1624,12 +1626,19 @@ struct ocrdma_delete_ah_tbl_rsp {
 enum {
        OCRDMA_EQE_VALID_SHIFT          = 0,
        OCRDMA_EQE_VALID_MASK           = BIT(0),
+       OCRDMA_EQE_MAJOR_CODE_MASK      = 0x0E,
+       OCRDMA_EQE_MAJOR_CODE_SHIFT     = 0x01,
        OCRDMA_EQE_FOR_CQE_MASK         = 0xFFFE,
        OCRDMA_EQE_RESOURCE_ID_SHIFT    = 16,
        OCRDMA_EQE_RESOURCE_ID_MASK     = 0xFFFF <<
                                OCRDMA_EQE_RESOURCE_ID_SHIFT,
 };
 
+enum major_code {
+       OCRDMA_MAJOR_CODE_COMPLETION    = 0x00,
+       OCRDMA_MAJOR_CODE_SENTINAL      = 0x01
+};
+
 struct ocrdma_eqe {
        u32 id_valid;
 };
index 8771755..9dcb660 100644 (file)
@@ -365,7 +365,7 @@ static struct ocrdma_pd *_ocrdma_alloc_pd(struct ocrdma_dev *dev,
        if (!pd)
                return ERR_PTR(-ENOMEM);
 
-       if (udata && uctx) {
+       if (udata && uctx && dev->attr.max_dpp_pds) {
                pd->dpp_enabled =
                        ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R;
                pd->num_dpp_qp =
@@ -1721,18 +1721,20 @@ int ocrdma_destroy_qp(struct ib_qp *ibqp)
        struct ocrdma_qp *qp;
        struct ocrdma_dev *dev;
        struct ib_qp_attr attrs;
-       int attr_mask = IB_QP_STATE;
+       int attr_mask;
        unsigned long flags;
 
        qp = get_ocrdma_qp(ibqp);
        dev = get_ocrdma_dev(ibqp->device);
 
-       attrs.qp_state = IB_QPS_ERR;
        pd = qp->pd;
 
        /* change the QP state to ERROR */
-       _ocrdma_modify_qp(ibqp, &attrs, attr_mask);
-
+       if (qp->state != OCRDMA_QPS_RST) {
+               attrs.qp_state = IB_QPS_ERR;
+               attr_mask = IB_QP_STATE;
+               _ocrdma_modify_qp(ibqp, &attrs, attr_mask);
+       }
        /* ensure that CQEs for newly created QP (whose id may be same with
         * one which just getting destroyed are same), dont get
         * discarded until the old CQEs are discarded.
index ffd48bf..7df16f7 100644 (file)
@@ -903,7 +903,7 @@ struct qib_devdata {
        /* PCI Device ID (here for NodeInfo) */
        u16 deviceid;
        /* for write combining settings */
-       unsigned long wc_cookie;
+       int wc_cookie;
        unsigned long wc_base;
        unsigned long wc_len;
 
@@ -1136,7 +1136,6 @@ extern struct qib_devdata *qib_lookup(int unit);
 extern u32 qib_cpulist_count;
 extern unsigned long *qib_cpulist;
 
-extern unsigned qib_wc_pat;
 extern unsigned qib_cc_table_size;
 int qib_init(struct qib_devdata *, int);
 int init_chip_wc_pat(struct qib_devdata *dd, u32);
index 9ea6c44..7258818 100644 (file)
@@ -835,7 +835,8 @@ static int mmap_piobufs(struct vm_area_struct *vma,
        vma->vm_flags &= ~VM_MAYREAD;
        vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
 
-       if (qib_wc_pat)
+       /* We used PAT if wc_cookie == 0 */
+       if (!dd->wc_cookie)
                vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
 
        ret = io_remap_pfn_range(vma, vma->vm_start, phys >> PAGE_SHIFT,
index 0d2ba59..4b92780 100644 (file)
@@ -3315,11 +3315,9 @@ static int init_6120_variables(struct qib_devdata *dd)
        qib_6120_config_ctxts(dd);
        qib_set_ctxtcnt(dd);
 
-       if (qib_wc_pat) {
-               ret = init_chip_wc_pat(dd, 0);
-               if (ret)
-                       goto bail;
-       }
+       ret = init_chip_wc_pat(dd, 0);
+       if (ret)
+               goto bail;
        set_6120_baseaddrs(dd); /* set chip access pointers now */
 
        ret = 0;
index 22affda..00b2af2 100644 (file)
@@ -4126,11 +4126,9 @@ static int qib_init_7220_variables(struct qib_devdata *dd)
        qib_7220_config_ctxts(dd);
        qib_set_ctxtcnt(dd);  /* needed for PAT setup */
 
-       if (qib_wc_pat) {
-               ret = init_chip_wc_pat(dd, 0);
-               if (ret)
-                       goto bail;
-       }
+       ret = init_chip_wc_pat(dd, 0);
+       if (ret)
+               goto bail;
        set_7220_baseaddrs(dd); /* set chip access pointers now */
 
        ret = 0;
index ef97b71..f32b462 100644 (file)
@@ -6429,6 +6429,7 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
        unsigned features, pidx, sbufcnt;
        int ret, mtu;
        u32 sbufs, updthresh;
+       resource_size_t vl15off;
 
        /* pport structs are contiguous, allocated after devdata */
        ppd = (struct qib_pportdata *)(dd + 1);
@@ -6677,29 +6678,27 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
        qib_7322_config_ctxts(dd);
        qib_set_ctxtcnt(dd);
 
-       if (qib_wc_pat) {
-               resource_size_t vl15off;
-               /*
-                * We do not set WC on the VL15 buffers to avoid
-                * a rare problem with unaligned writes from
-                * interrupt-flushed store buffers, so we need
-                * to map those separately here.  We can't solve
-                * this for the rarely used mtrr case.
-                */
-               ret = init_chip_wc_pat(dd, 0);
-               if (ret)
-                       goto bail;
+       /*
+        * We do not set WC on the VL15 buffers to avoid
+        * a rare problem with unaligned writes from
+        * interrupt-flushed store buffers, so we need
+        * to map those separately here.  We can't solve
+        * this for the rarely used mtrr case.
+        */
+       ret = init_chip_wc_pat(dd, 0);
+       if (ret)
+               goto bail;
 
-               /* vl15 buffers start just after the 4k buffers */
-               vl15off = dd->physaddr + (dd->piobufbase >> 32) +
-                       dd->piobcnt4k * dd->align4k;
-               dd->piovl15base = ioremap_nocache(vl15off,
-                                                 NUM_VL15_BUFS * dd->align4k);
-               if (!dd->piovl15base) {
-                       ret = -ENOMEM;
-                       goto bail;
-               }
+       /* vl15 buffers start just after the 4k buffers */
+       vl15off = dd->physaddr + (dd->piobufbase >> 32) +
+                 dd->piobcnt4k * dd->align4k;
+       dd->piovl15base = ioremap_nocache(vl15off,
+                                         NUM_VL15_BUFS * dd->align4k);
+       if (!dd->piovl15base) {
+               ret = -ENOMEM;
+               goto bail;
        }
+
        qib_7322_set_baseaddrs(dd); /* set chip access pointers now */
 
        ret = 0;
index 2ee3695..7e00470 100644 (file)
@@ -91,15 +91,6 @@ MODULE_PARM_DESC(krcvqs, "number of kernel receive queues per IB port");
 unsigned qib_cc_table_size;
 module_param_named(cc_table_size, qib_cc_table_size, uint, S_IRUGO);
 MODULE_PARM_DESC(cc_table_size, "Congestion control table entries 0 (CCA disabled - default), min = 128, max = 1984");
-/*
- * qib_wc_pat parameter:
- *      0 is WC via MTRR
- *      1 is WC via PAT
- *      If PAT initialization fails, code reverts back to MTRR
- */
-unsigned qib_wc_pat = 1; /* default (1) is to use PAT, not MTRR */
-module_param_named(wc_pat, qib_wc_pat, uint, S_IRUGO);
-MODULE_PARM_DESC(wc_pat, "enable write-combining via PAT mechanism");
 
 static void verify_interrupt(unsigned long);
 
@@ -1377,8 +1368,7 @@ static void cleanup_device_data(struct qib_devdata *dd)
                spin_unlock(&dd->pport[pidx].cc_shadow_lock);
        }
 
-       if (!qib_wc_pat)
-               qib_disable_wc(dd);
+       qib_disable_wc(dd);
 
        if (dd->pioavailregs_dma) {
                dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
@@ -1547,14 +1537,12 @@ static int qib_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
                goto bail;
        }
 
-       if (!qib_wc_pat) {
-               ret = qib_enable_wc(dd);
-               if (ret) {
-                       qib_dev_err(dd,
-                               "Write combining not enabled (err %d): performance may be poor\n",
-                               -ret);
-                       ret = 0;
-               }
+       ret = qib_enable_wc(dd);
+       if (ret) {
+               qib_dev_err(dd,
+                       "Write combining not enabled (err %d): performance may be poor\n",
+                       -ret);
+               ret = 0;
        }
 
        qib_verify_pioperf(dd);
index 81b225f..edd0ddb 100644 (file)
@@ -116,21 +116,10 @@ int qib_enable_wc(struct qib_devdata *dd)
        }
 
        if (!ret) {
-               int cookie;
-
-               cookie = mtrr_add(pioaddr, piolen, MTRR_TYPE_WRCOMB, 0);
-               if (cookie < 0) {
-                       {
-                               qib_devinfo(dd->pcidev,
-                                        "mtrr_add()  WC for PIO bufs failed (%d)\n",
-                                        cookie);
-                               ret = -EINVAL;
-                       }
-               } else {
-                       dd->wc_cookie = cookie;
-                       dd->wc_base = (unsigned long) pioaddr;
-                       dd->wc_len = (unsigned long) piolen;
-               }
+               dd->wc_cookie = arch_phys_wc_add(pioaddr, piolen);
+               if (dd->wc_cookie < 0)
+                       /* use error from routine */
+                       ret = dd->wc_cookie;
        }
 
        return ret;
@@ -142,18 +131,7 @@ int qib_enable_wc(struct qib_devdata *dd)
  */
 void qib_disable_wc(struct qib_devdata *dd)
 {
-       if (dd->wc_cookie) {
-               int r;
-
-               r = mtrr_del(dd->wc_cookie, dd->wc_base,
-                            dd->wc_len);
-               if (r < 0)
-                       qib_devinfo(dd->pcidev,
-                                "mtrr_del(%lx, %lx, %lx) failed: %d\n",
-                                dd->wc_cookie, dd->wc_base,
-                                dd->wc_len, r);
-               dd->wc_cookie = 0; /* even on failure */
-       }
+       arch_phys_wc_del(dd->wc_cookie);
 }
 
 /**
index 56959ad..cf32a77 100644 (file)
@@ -386,8 +386,8 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i
                                           rx->rx_ring[i].mapping,
                                           GFP_KERNEL)) {
                        ipoib_warn(priv, "failed to allocate receive buffer %d\n", i);
-                               ret = -ENOMEM;
-                               goto err_count;
+                       ret = -ENOMEM;
+                       goto err_count;
                }
                ret = ipoib_cm_post_receive_nonsrq(dev, rx, &t->wr, t->sge, i);
                if (ret) {
index 327529e..575a072 100644 (file)
@@ -65,6 +65,8 @@ static int
 isert_rdma_accept(struct isert_conn *isert_conn);
 struct rdma_cm_id *isert_setup_id(struct isert_np *isert_np);
 
+static void isert_release_work(struct work_struct *work);
+
 static inline bool
 isert_prot_cmd(struct isert_conn *conn, struct se_cmd *cmd)
 {
@@ -547,11 +549,11 @@ isert_create_pi_ctx(struct fast_reg_descriptor *desc,
        return 0;
 
 err_prot_mr:
-       ib_dereg_mr(desc->pi_ctx->prot_mr);
+       ib_dereg_mr(pi_ctx->prot_mr);
 err_prot_frpl:
-       ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl);
+       ib_free_fast_reg_page_list(pi_ctx->prot_frpl);
 err_pi_ctx:
-       kfree(desc->pi_ctx);
+       kfree(pi_ctx);
 
        return ret;
 }
@@ -648,6 +650,7 @@ isert_init_conn(struct isert_conn *isert_conn)
        mutex_init(&isert_conn->mutex);
        spin_lock_init(&isert_conn->pool_lock);
        INIT_LIST_HEAD(&isert_conn->fr_pool);
+       INIT_WORK(&isert_conn->release_work, isert_release_work);
 }
 
 static void
@@ -925,6 +928,7 @@ isert_disconnected_handler(struct rdma_cm_id *cma_id,
 {
        struct isert_np *isert_np = cma_id->context;
        struct isert_conn *isert_conn;
+       bool terminating = false;
 
        if (isert_np->np_cm_id == cma_id)
                return isert_np_cma_handler(cma_id->context, event);
@@ -932,12 +936,25 @@ isert_disconnected_handler(struct rdma_cm_id *cma_id,
        isert_conn = cma_id->qp->qp_context;
 
        mutex_lock(&isert_conn->mutex);
+       terminating = (isert_conn->state == ISER_CONN_TERMINATING);
        isert_conn_terminate(isert_conn);
        mutex_unlock(&isert_conn->mutex);
 
        isert_info("conn %p completing wait\n", isert_conn);
        complete(&isert_conn->wait);
 
+       if (terminating)
+               goto out;
+
+       mutex_lock(&isert_np->np_accept_mutex);
+       if (!list_empty(&isert_conn->accept_node)) {
+               list_del_init(&isert_conn->accept_node);
+               isert_put_conn(isert_conn);
+               queue_work(isert_release_wq, &isert_conn->release_work);
+       }
+       mutex_unlock(&isert_np->np_accept_mutex);
+
+out:
        return 0;
 }
 
@@ -2380,7 +2397,6 @@ isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
        page_off = offset % PAGE_SIZE;
 
        send_wr->sg_list = ib_sge;
-       send_wr->num_sge = sg_nents;
        send_wr->wr_id = (uintptr_t)&isert_cmd->tx_desc;
        /*
         * Perform mapping of TCM scatterlist memory ib_sge dma_addr.
@@ -2400,14 +2416,17 @@ isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
                          ib_sge->addr, ib_sge->length, ib_sge->lkey);
                page_off = 0;
                data_left -= ib_sge->length;
+               if (!data_left)
+                       break;
                ib_sge++;
                isert_dbg("Incrementing ib_sge pointer to %p\n", ib_sge);
        }
 
+       send_wr->num_sge = ++i;
        isert_dbg("Set outgoing sg_list: %p num_sg: %u from TCM SGLs\n",
                  send_wr->sg_list, send_wr->num_sge);
 
-       return sg_nents;
+       return send_wr->num_sge;
 }
 
 static int
@@ -3366,7 +3385,6 @@ static void isert_wait_conn(struct iscsi_conn *conn)
        isert_wait4flush(isert_conn);
        isert_wait4logout(isert_conn);
 
-       INIT_WORK(&isert_conn->release_work, isert_release_work);
        queue_work(isert_release_wq, &isert_conn->release_work);
 }
 
@@ -3374,6 +3392,7 @@ static void isert_free_conn(struct iscsi_conn *conn)
 {
        struct isert_conn *isert_conn = conn->context;
 
+       isert_wait4flush(isert_conn);
        isert_put_conn(isert_conn);
 }
 
index f362883..1d247bc 100644 (file)
@@ -747,6 +747,63 @@ static void joydev_cleanup(struct joydev *joydev)
                input_close_device(handle);
 }
 
+static bool joydev_dev_is_absolute_mouse(struct input_dev *dev)
+{
+       DECLARE_BITMAP(jd_scratch, KEY_CNT);
+
+       BUILD_BUG_ON(ABS_CNT > KEY_CNT || EV_CNT > KEY_CNT);
+
+       /*
+        * Virtualization (VMware, etc) and remote management (HP
+        * ILO2) solutions use absolute coordinates for their virtual
+        * pointing devices so that there is one-to-one relationship
+        * between pointer position on the host screen and virtual
+        * guest screen, and so their mice use ABS_X, ABS_Y and 3
+        * primary button events. This clashes with what joydev
+        * considers to be joysticks (a device with at minimum ABS_X
+        * axis).
+        *
+        * Here we are trying to separate absolute mice from
+        * joysticks. A device is, for joystick detection purposes,
+        * considered to be an absolute mouse if the following is
+        * true:
+        *
+        * 1) Event types are exactly EV_ABS, EV_KEY and EV_SYN.
+        * 2) Absolute events are exactly ABS_X and ABS_Y.
+        * 3) Keys are exactly BTN_LEFT, BTN_RIGHT and BTN_MIDDLE.
+        * 4) Device is not on "Amiga" bus.
+        */
+
+       bitmap_zero(jd_scratch, EV_CNT);
+       __set_bit(EV_ABS, jd_scratch);
+       __set_bit(EV_KEY, jd_scratch);
+       __set_bit(EV_SYN, jd_scratch);
+       if (!bitmap_equal(jd_scratch, dev->evbit, EV_CNT))
+               return false;
+
+       bitmap_zero(jd_scratch, ABS_CNT);
+       __set_bit(ABS_X, jd_scratch);
+       __set_bit(ABS_Y, jd_scratch);
+       if (!bitmap_equal(dev->absbit, jd_scratch, ABS_CNT))
+               return false;
+
+       bitmap_zero(jd_scratch, KEY_CNT);
+       __set_bit(BTN_LEFT, jd_scratch);
+       __set_bit(BTN_RIGHT, jd_scratch);
+       __set_bit(BTN_MIDDLE, jd_scratch);
+
+       if (!bitmap_equal(dev->keybit, jd_scratch, KEY_CNT))
+               return false;
+
+       /*
+        * Amiga joystick (amijoy) historically uses left/middle/right
+        * button events.
+        */
+       if (dev->id.bustype == BUS_AMIGA)
+               return false;
+
+       return true;
+}
 
 static bool joydev_match(struct input_handler *handler, struct input_dev *dev)
 {
@@ -758,6 +815,10 @@ static bool joydev_match(struct input_handler *handler, struct input_dev *dev)
        if (test_bit(EV_KEY, dev->evbit) && test_bit(BTN_DIGI, dev->keybit))
                return false;
 
+       /* Avoid absolute mice */
+       if (joydev_dev_is_absolute_mouse(dev))
+               return false;
+
        return true;
 }
 
index 7462d2f..d7820d1 100644 (file)
@@ -156,7 +156,7 @@ config MOUSE_PS2_VMMOUSE
          Say Y here if you are running under control of VMware hypervisor
          (ESXi, Workstation or Fusion). Also make sure that when you enable
          this option, you remove the xf86-input-vmmouse user-space driver
-         or upgrade it to at least xf86-input-vmmouse 13.0.1, which doesn't
+         or upgrade it to at least xf86-input-vmmouse 13.1.0, which doesn't
          load in the presence of an in-kernel vmmouse driver.
 
          If unsure, say N.
index e6708f6..a353b7d 100644 (file)
@@ -941,6 +941,11 @@ static void alps_get_finger_coordinate_v7(struct input_mt_pos *mt,
        case V7_PACKET_ID_TWO:
                mt[1].x &= ~0x000F;
                mt[1].y |= 0x000F;
+               /* Detect false-postive touches where x & y report max value */
+               if (mt[1].y == 0x7ff && mt[1].x == 0xff0) {
+                       mt[1].x = 0;
+                       /* y gets set to 0 at the end of this function */
+               }
                break;
 
        case V7_PACKET_ID_MULTI:
@@ -1058,9 +1063,8 @@ static void alps_process_trackstick_packet_v7(struct psmouse *psmouse)
        right = (packet[1] & 0x02) >> 1;
        middle = (packet[1] & 0x04) >> 2;
 
-       /* Divide 2 since trackpoint's speed is too fast */
-       input_report_rel(dev2, REL_X, (char)x / 2);
-       input_report_rel(dev2, REL_Y, -((char)y / 2));
+       input_report_rel(dev2, REL_X, (char)x);
+       input_report_rel(dev2, REL_Y, -((char)y));
 
        input_report_key(dev2, BTN_LEFT, left);
        input_report_key(dev2, BTN_RIGHT, right);
index 991dc6b..ce3d400 100644 (file)
@@ -315,7 +315,7 @@ static void elantech_report_semi_mt_data(struct input_dev *dev,
                                         unsigned int x2, unsigned int y2)
 {
        elantech_set_slot(dev, 0, num_fingers != 0, x1, y1);
-       elantech_set_slot(dev, 1, num_fingers == 2, x2, y2);
+       elantech_set_slot(dev, 1, num_fingers >= 2, x2, y2);
 }
 
 /*
@@ -1376,10 +1376,11 @@ static bool elantech_is_signature_valid(const unsigned char *param)
                return true;
 
        /*
-        * Some models have a revision higher then 20. Meaning param[2] may
-        * be 10 or 20, skip the rates check for these.
+        * Some hw_version >= 4 models have a revision higher then 20. Meaning
+        * that param[2] may be 10 or 20, skip the rates check for these.
         */
-       if (param[0] == 0x46 && (param[1] & 0xef) == 0x0f && param[2] < 40)
+       if ((param[0] & 0x0f) >= 0x06 && (param[1] & 0xaf) == 0x0f &&
+           param[2] < 40)
                return true;
 
        for (i = 0; i < ARRAY_SIZE(rates); i++)
@@ -1555,6 +1556,7 @@ static int elantech_set_properties(struct elantech_data *etd)
                case 9:
                case 10:
                case 13:
+               case 14:
                        etd->hw_version = 4;
                        break;
                default:
index 630af73..35c8d0c 100644 (file)
@@ -151,6 +151,11 @@ static const struct min_max_quirk min_max_pnpid_table[] = {
                1024, 5112, 2024, 4832
        },
        {
+               (const char * const []){"LEN2000", NULL},
+               {ANY_BOARD_ID, ANY_BOARD_ID},
+               1024, 5113, 2021, 4832
+       },
+       {
                (const char * const []){"LEN2001", NULL},
                {ANY_BOARD_ID, ANY_BOARD_ID},
                1024, 5022, 2508, 4832
@@ -191,7 +196,7 @@ static const char * const topbuttonpad_pnp_ids[] = {
        "LEN0045",
        "LEN0047",
        "LEN0049",
-       "LEN2000",
+       "LEN2000", /* S540 */
        "LEN2001", /* Edge E431 */
        "LEN2002", /* Edge E531 */
        "LEN2003",
index 2d5ff86..e4c3125 100644 (file)
@@ -164,7 +164,7 @@ static irqreturn_t stmpe_ts_handler(int irq, void *data)
                        STMPE_TSC_CTRL_TSC_EN, STMPE_TSC_CTRL_TSC_EN);
 
        /* start polling for touch_det to detect release */
-       schedule_delayed_work(&ts->work, HZ / 50);
+       schedule_delayed_work(&ts->work, msecs_to_jiffies(50));
 
        return IRQ_HANDLED;
 }
index aecb9ad..642f4a5 100644 (file)
@@ -187,7 +187,7 @@ static int sx8654_probe(struct i2c_client *client,
                return -ENOMEM;
 
        input = devm_input_allocate_device(&client->dev);
-       if (!sx8654)
+       if (!input)
                return -ENOMEM;
 
        input->name = "SX8654 I2C Touchscreen";
index e43d489..e1c7e9e 100644 (file)
@@ -2930,6 +2930,7 @@ static void *alloc_coherent(struct device *dev, size_t size,
        size      = PAGE_ALIGN(size);
        dma_mask  = dev->coherent_dma_mask;
        flag     &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
+       flag     |= __GFP_ZERO;
 
        page = alloc_pages(flag | __GFP_NOWARN,  get_order(size));
        if (!page) {
index a1cbba9..3465faf 100644 (file)
@@ -266,6 +266,7 @@ static void put_pasid_state(struct pasid_state *pasid_state)
 
 static void put_pasid_state_wait(struct pasid_state *pasid_state)
 {
+       atomic_dec(&pasid_state->count);
        wait_event(pasid_state->wq, !atomic_read(&pasid_state->count));
        free_pasid_state(pasid_state);
 }
index 9f7e1d3..66a803b 100644 (file)
 #define RESUME_TERMINATE               (1 << 0)
 
 #define TTBCR2_SEP_SHIFT               15
-#define TTBCR2_SEP_MASK                        0x7
-
-#define TTBCR2_ADDR_32                 0
-#define TTBCR2_ADDR_36                 1
-#define TTBCR2_ADDR_40                 2
-#define TTBCR2_ADDR_42                 3
-#define TTBCR2_ADDR_44                 4
-#define TTBCR2_ADDR_48                 5
+#define TTBCR2_SEP_UPSTREAM            (0x7 << TTBCR2_SEP_SHIFT)
 
 #define TTBRn_HI_ASID_SHIFT            16
 
@@ -793,26 +786,7 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
                writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR);
                if (smmu->version > ARM_SMMU_V1) {
                        reg = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
-                       switch (smmu->va_size) {
-                       case 32:
-                               reg |= (TTBCR2_ADDR_32 << TTBCR2_SEP_SHIFT);
-                               break;
-                       case 36:
-                               reg |= (TTBCR2_ADDR_36 << TTBCR2_SEP_SHIFT);
-                               break;
-                       case 40:
-                               reg |= (TTBCR2_ADDR_40 << TTBCR2_SEP_SHIFT);
-                               break;
-                       case 42:
-                               reg |= (TTBCR2_ADDR_42 << TTBCR2_SEP_SHIFT);
-                               break;
-                       case 44:
-                               reg |= (TTBCR2_ADDR_44 << TTBCR2_SEP_SHIFT);
-                               break;
-                       case 48:
-                               reg |= (TTBCR2_ADDR_48 << TTBCR2_SEP_SHIFT);
-                               break;
-                       }
+                       reg |= TTBCR2_SEP_UPSTREAM;
                        writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR2);
                }
        } else {
index 68d43be..5ecfaf2 100644 (file)
@@ -422,6 +422,14 @@ static int dmar_map_gfx = 1;
 static int dmar_forcedac;
 static int intel_iommu_strict;
 static int intel_iommu_superpage = 1;
+static int intel_iommu_ecs = 1;
+
+/* We only actually use ECS when PASID support (on the new bit 40)
+ * is also advertised. Some early implementations â€” the ones with
+ * PASID support on bit 28 â€” have issues even when we *only* use
+ * extended root/context tables. */
+#define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \
+                           ecap_pasid(iommu->ecap))
 
 int intel_iommu_gfx_mapped;
 EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
@@ -465,6 +473,10 @@ static int __init intel_iommu_setup(char *str)
                        printk(KERN_INFO
                                "Intel-IOMMU: disable supported super page\n");
                        intel_iommu_superpage = 0;
+               } else if (!strncmp(str, "ecs_off", 7)) {
+                       printk(KERN_INFO
+                               "Intel-IOMMU: disable extended context table support\n");
+                       intel_iommu_ecs = 0;
                }
 
                str += strcspn(str, ",");
@@ -669,7 +681,7 @@ static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu
        struct context_entry *context;
        u64 *entry;
 
-       if (ecap_ecs(iommu->ecap)) {
+       if (ecs_enabled(iommu)) {
                if (devfn >= 0x80) {
                        devfn -= 0x80;
                        entry = &root->hi;
@@ -696,6 +708,11 @@ static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu
        return &context[devfn];
 }
 
+static int iommu_dummy(struct device *dev)
+{
+       return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
+}
+
 static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
 {
        struct dmar_drhd_unit *drhd = NULL;
@@ -705,6 +722,9 @@ static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devf
        u16 segment = 0;
        int i;
 
+       if (iommu_dummy(dev))
+               return NULL;
+
        if (dev_is_pci(dev)) {
                pdev = to_pci_dev(dev);
                segment = pci_domain_nr(pdev->bus);
@@ -798,7 +818,7 @@ static void free_context_table(struct intel_iommu *iommu)
                if (context)
                        free_pgtable_page(context);
 
-               if (!ecap_ecs(iommu->ecap))
+               if (!ecs_enabled(iommu))
                        continue;
 
                context = iommu_context_addr(iommu, i, 0x80, 0);
@@ -1133,7 +1153,7 @@ static void iommu_set_root_entry(struct intel_iommu *iommu)
        unsigned long flag;
 
        addr = virt_to_phys(iommu->root_entry);
-       if (ecap_ecs(iommu->ecap))
+       if (ecs_enabled(iommu))
                addr |= DMA_RTADDR_RTT;
 
        raw_spin_lock_irqsave(&iommu->register_lock, flag);
@@ -2969,11 +2989,6 @@ static inline struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
        return __get_valid_domain_for_dev(dev);
 }
 
-static int iommu_dummy(struct device *dev)
-{
-       return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
-}
-
 /* Check if the dev needs to go through non-identity map and unmap process.*/
 static int iommu_no_mapping(struct device *dev)
 {
index 4015560..cab2145 100644 (file)
@@ -1004,20 +1004,18 @@ static int rk_iommu_remove(struct platform_device *pdev)
        return 0;
 }
 
-#ifdef CONFIG_OF
 static const struct of_device_id rk_iommu_dt_ids[] = {
        { .compatible = "rockchip,iommu" },
        { /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, rk_iommu_dt_ids);
-#endif
 
 static struct platform_driver rk_iommu_driver = {
        .probe = rk_iommu_probe,
        .remove = rk_iommu_remove,
        .driver = {
                   .name = "rk_iommu",
-                  .of_match_table = of_match_ptr(rk_iommu_dt_ids),
+                  .of_match_table = rk_iommu_dt_ids,
        },
 };
 
index 9687f8a..1b7e155 100644 (file)
@@ -828,7 +828,14 @@ static int its_alloc_tables(struct its_node *its)
                        u64 typer = readq_relaxed(its->base + GITS_TYPER);
                        u32 ids = GITS_TYPER_DEVBITS(typer);
 
-                       order = get_order((1UL << ids) * entry_size);
+                       /*
+                        * 'order' was initialized earlier to the default page
+                        * granule of the the ITS.  We can't have an allocation
+                        * smaller than that.  If the requested allocation
+                        * is smaller, round up to the default page granule.
+                        */
+                       order = max(get_order((1UL << ids) * entry_size),
+                                   order);
                        if (order >= MAX_ORDER) {
                                order = MAX_ORDER - 1;
                                pr_warn("%s: Device Table too large, reduce its page order to %u\n",
index 7b315e3..01999d7 100644 (file)
@@ -82,19 +82,6 @@ static DEFINE_RAW_SPINLOCK(irq_controller_lock);
 #define NR_GIC_CPU_IF 8
 static u8 gic_cpu_map[NR_GIC_CPU_IF] __read_mostly;
 
-/*
- * Supported arch specific GIC irq extension.
- * Default make them NULL.
- */
-struct irq_chip gic_arch_extn = {
-       .irq_eoi        = NULL,
-       .irq_mask       = NULL,
-       .irq_unmask     = NULL,
-       .irq_retrigger  = NULL,
-       .irq_set_type   = NULL,
-       .irq_set_wake   = NULL,
-};
-
 #ifndef MAX_GIC_NR
 #define MAX_GIC_NR     1
 #endif
@@ -167,34 +154,16 @@ static int gic_peek_irq(struct irq_data *d, u32 offset)
 
 static void gic_mask_irq(struct irq_data *d)
 {
-       unsigned long flags;
-
-       raw_spin_lock_irqsave(&irq_controller_lock, flags);
        gic_poke_irq(d, GIC_DIST_ENABLE_CLEAR);
-       if (gic_arch_extn.irq_mask)
-               gic_arch_extn.irq_mask(d);
-       raw_spin_unlock_irqrestore(&irq_controller_lock, flags);
 }
 
 static void gic_unmask_irq(struct irq_data *d)
 {
-       unsigned long flags;
-
-       raw_spin_lock_irqsave(&irq_controller_lock, flags);
-       if (gic_arch_extn.irq_unmask)
-               gic_arch_extn.irq_unmask(d);
        gic_poke_irq(d, GIC_DIST_ENABLE_SET);
-       raw_spin_unlock_irqrestore(&irq_controller_lock, flags);
 }
 
 static void gic_eoi_irq(struct irq_data *d)
 {
-       if (gic_arch_extn.irq_eoi) {
-               raw_spin_lock(&irq_controller_lock);
-               gic_arch_extn.irq_eoi(d);
-               raw_spin_unlock(&irq_controller_lock);
-       }
-
        writel_relaxed(gic_irq(d), gic_cpu_base(d) + GIC_CPU_EOI);
 }
 
@@ -251,8 +220,6 @@ static int gic_set_type(struct irq_data *d, unsigned int type)
 {
        void __iomem *base = gic_dist_base(d);
        unsigned int gicirq = gic_irq(d);
-       unsigned long flags;
-       int ret;
 
        /* Interrupt configuration for SGIs can't be changed */
        if (gicirq < 16)
@@ -263,25 +230,7 @@ static int gic_set_type(struct irq_data *d, unsigned int type)
                            type != IRQ_TYPE_EDGE_RISING)
                return -EINVAL;
 
-       raw_spin_lock_irqsave(&irq_controller_lock, flags);
-
-       if (gic_arch_extn.irq_set_type)
-               gic_arch_extn.irq_set_type(d, type);
-
-       ret = gic_configure_irq(gicirq, type, base, NULL);
-
-       raw_spin_unlock_irqrestore(&irq_controller_lock, flags);
-
-       return ret;
-}
-
-static int gic_retrigger(struct irq_data *d)
-{
-       if (gic_arch_extn.irq_retrigger)
-               return gic_arch_extn.irq_retrigger(d);
-
-       /* the genirq layer expects 0 if we can't retrigger in hardware */
-       return 0;
+       return gic_configure_irq(gicirq, type, base, NULL);
 }
 
 #ifdef CONFIG_SMP
@@ -312,21 +261,6 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
 }
 #endif
 
-#ifdef CONFIG_PM
-static int gic_set_wake(struct irq_data *d, unsigned int on)
-{
-       int ret = -ENXIO;
-
-       if (gic_arch_extn.irq_set_wake)
-               ret = gic_arch_extn.irq_set_wake(d, on);
-
-       return ret;
-}
-
-#else
-#define gic_set_wake   NULL
-#endif
-
 static void __exception_irq_entry gic_handle_irq(struct pt_regs *regs)
 {
        u32 irqstat, irqnr;
@@ -385,11 +319,9 @@ static struct irq_chip gic_chip = {
        .irq_unmask             = gic_unmask_irq,
        .irq_eoi                = gic_eoi_irq,
        .irq_set_type           = gic_set_type,
-       .irq_retrigger          = gic_retrigger,
 #ifdef CONFIG_SMP
        .irq_set_affinity       = gic_set_affinity,
 #endif
-       .irq_set_wake           = gic_set_wake,
        .irq_get_irqchip_state  = gic_irq_get_irqchip_state,
        .irq_set_irqchip_state  = gic_irq_set_irqchip_state,
 };
@@ -1055,7 +987,6 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start,
                set_handle_irq(gic_handle_irq);
        }
 
-       gic_chip.flags |= gic_arch_extn.flags;
        gic_dist_init(gic);
        gic_cpu_init(gic);
        gic_pm_init(gic);
index 57f09cb..269c235 100644 (file)
@@ -271,7 +271,7 @@ int gic_get_c0_fdc_int(void)
                                  GIC_LOCAL_TO_HWIRQ(GIC_LOCAL_INT_FDC));
 }
 
-static void gic_handle_shared_int(void)
+static void gic_handle_shared_int(bool chained)
 {
        unsigned int i, intr, virq;
        unsigned long *pcpu_mask;
@@ -299,7 +299,10 @@ static void gic_handle_shared_int(void)
        while (intr != gic_shared_intrs) {
                virq = irq_linear_revmap(gic_irq_domain,
                                         GIC_SHARED_TO_HWIRQ(intr));
-               do_IRQ(virq);
+               if (chained)
+                       generic_handle_irq(virq);
+               else
+                       do_IRQ(virq);
 
                /* go to next pending bit */
                bitmap_clear(pending, intr, 1);
@@ -431,7 +434,7 @@ static struct irq_chip gic_edge_irq_controller = {
 #endif
 };
 
-static void gic_handle_local_int(void)
+static void gic_handle_local_int(bool chained)
 {
        unsigned long pending, masked;
        unsigned int intr, virq;
@@ -445,7 +448,10 @@ static void gic_handle_local_int(void)
        while (intr != GIC_NUM_LOCAL_INTRS) {
                virq = irq_linear_revmap(gic_irq_domain,
                                         GIC_LOCAL_TO_HWIRQ(intr));
-               do_IRQ(virq);
+               if (chained)
+                       generic_handle_irq(virq);
+               else
+                       do_IRQ(virq);
 
                /* go to next pending bit */
                bitmap_clear(&pending, intr, 1);
@@ -509,13 +515,14 @@ static struct irq_chip gic_all_vpes_local_irq_controller = {
 
 static void __gic_irq_dispatch(void)
 {
-       gic_handle_local_int();
-       gic_handle_shared_int();
+       gic_handle_local_int(false);
+       gic_handle_shared_int(false);
 }
 
 static void gic_irq_dispatch(unsigned int irq, struct irq_desc *desc)
 {
-       __gic_irq_dispatch();
+       gic_handle_local_int(true);
+       gic_handle_shared_int(true);
 }
 
 #ifdef CONFIG_MIPS_GIC_IPI
index 4a9ce5b..6b2b582 100644 (file)
@@ -104,7 +104,7 @@ static int sunxi_sc_nmi_set_type(struct irq_data *data, unsigned int flow_type)
        irqd_set_trigger_type(data, flow_type);
        irq_setup_alt_chip(data, flow_type);
 
-       for (i = 0; i <= gc->num_ct; i++, ct++)
+       for (i = 0; i < gc->num_ct; i++, ct++)
                if (ct->type & flow_type)
                        ctrl_off = ct->regs.type;
 
index 51c485d..f67bbd8 100644 (file)
@@ -264,7 +264,7 @@ static int tegra_ictlr_domain_alloc(struct irq_domain *domain,
 
                irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i,
                                              &tegra_ictlr_chip,
-                                             &info->base[ictlr]);
+                                             info->base[ictlr]);
        }
 
        parent_args = *args;
index 7dc93aa..312ffd3 100644 (file)
@@ -173,7 +173,7 @@ static void unmap_switcher(void)
 bool lguest_address_ok(const struct lguest *lg,
                       unsigned long addr, unsigned long len)
 {
-       return (addr+len) / PAGE_SIZE < lg->pfn_limit && (addr+len >= addr);
+       return addr+len <= lg->pfn_limit * PAGE_SIZE && (addr+len >= addr);
 }
 
 /*
index 2bc56e2..135a090 100644 (file)
@@ -177,11 +177,16 @@ static struct md_rdev *next_active_rdev(struct md_rdev *rdev, struct mddev *mdde
         * nr_pending is 0 and In_sync is clear, the entries we return will
         * still be in the same position on the list when we re-enter
         * list_for_each_entry_continue_rcu.
+        *
+        * Note that if entered with 'rdev == NULL' to start at the
+        * beginning, we temporarily assign 'rdev' to an address which
+        * isn't really an rdev, but which can be used by
+        * list_for_each_entry_continue_rcu() to find the first entry.
         */
        rcu_read_lock();
        if (rdev == NULL)
                /* start at the beginning */
-               rdev = list_entry_rcu(&mddev->disks, struct md_rdev, same_set);
+               rdev = list_entry(&mddev->disks, struct md_rdev, same_set);
        else {
                /* release the previous rdev and start from there. */
                rdev_dec_pending(rdev, mddev);
index 9eeea19..5503e43 100644 (file)
@@ -925,10 +925,11 @@ static int crypt_convert(struct crypt_config *cc,
 
                switch (r) {
                /* async */
-               case -EINPROGRESS:
                case -EBUSY:
                        wait_for_completion(&ctx->restart);
                        reinit_completion(&ctx->restart);
+                       /* fall through*/
+               case -EINPROGRESS:
                        ctx->req = NULL;
                        ctx->cc_sector++;
                        continue;
@@ -1345,8 +1346,10 @@ static void kcryptd_async_done(struct crypto_async_request *async_req,
        struct dm_crypt_io *io = container_of(ctx, struct dm_crypt_io, ctx);
        struct crypt_config *cc = io->cc;
 
-       if (error == -EINPROGRESS)
+       if (error == -EINPROGRESS) {
+               complete(&ctx->restart);
                return;
+       }
 
        if (!error && cc->iv_gen_ops && cc->iv_gen_ops->post)
                error = cc->iv_gen_ops->post(cc, iv_of_dmreq(cc, dmreq), dmreq);
@@ -1357,15 +1360,12 @@ static void kcryptd_async_done(struct crypto_async_request *async_req,
        crypt_free_req(cc, req_of_dmreq(cc, dmreq), io->base_bio);
 
        if (!atomic_dec_and_test(&ctx->cc_pending))
-               goto done;
+               return;
 
        if (bio_data_dir(io->base_bio) == READ)
                kcryptd_crypt_read_done(io);
        else
                kcryptd_crypt_write_io_submit(io, 1);
-done:
-       if (!completion_done(&ctx->restart))
-               complete(&ctx->restart);
 }
 
 static void kcryptd_crypt(struct work_struct *work)
index 6395347..eff7bdd 100644 (file)
@@ -429,9 +429,11 @@ static int __multipath_map(struct dm_target *ti, struct request *clone,
                /* blk-mq request-based interface */
                *__clone = blk_get_request(bdev_get_queue(bdev),
                                           rq_data_dir(rq), GFP_ATOMIC);
-               if (IS_ERR(*__clone))
+               if (IS_ERR(*__clone)) {
                        /* ENOMEM, requeue */
+                       clear_mapinfo(m, map_context);
                        return r;
+               }
                (*__clone)->bio = (*__clone)->biotail = NULL;
                (*__clone)->rq_disk = bdev->bd_disk;
                (*__clone)->cmd_flags |= REQ_FAILFAST_TRANSPORT;
index d9b00b8..16ba55a 100644 (file)
@@ -820,6 +820,12 @@ void dm_consume_args(struct dm_arg_set *as, unsigned num_args)
 }
 EXPORT_SYMBOL(dm_consume_args);
 
+static bool __table_type_request_based(unsigned table_type)
+{
+       return (table_type == DM_TYPE_REQUEST_BASED ||
+               table_type == DM_TYPE_MQ_REQUEST_BASED);
+}
+
 static int dm_table_set_type(struct dm_table *t)
 {
        unsigned i;
@@ -852,8 +858,7 @@ static int dm_table_set_type(struct dm_table *t)
                 * Determine the type from the live device.
                 * Default to bio-based if device is new.
                 */
-               if (live_md_type == DM_TYPE_REQUEST_BASED ||
-                   live_md_type == DM_TYPE_MQ_REQUEST_BASED)
+               if (__table_type_request_based(live_md_type))
                        request_based = 1;
                else
                        bio_based = 1;
@@ -903,7 +908,7 @@ static int dm_table_set_type(struct dm_table *t)
                        }
                t->type = DM_TYPE_MQ_REQUEST_BASED;
 
-       } else if (hybrid && list_empty(devices) && live_md_type != DM_TYPE_NONE) {
+       } else if (list_empty(devices) && __table_type_request_based(live_md_type)) {
                /* inherit live MD type */
                t->type = live_md_type;
 
@@ -925,10 +930,7 @@ struct target_type *dm_table_get_immutable_target_type(struct dm_table *t)
 
 bool dm_table_request_based(struct dm_table *t)
 {
-       unsigned table_type = dm_table_get_type(t);
-
-       return (table_type == DM_TYPE_REQUEST_BASED ||
-               table_type == DM_TYPE_MQ_REQUEST_BASED);
+       return __table_type_request_based(dm_table_get_type(t));
 }
 
 bool dm_table_mq_request_based(struct dm_table *t)
index a930b72..2caf492 100644 (file)
@@ -1082,13 +1082,11 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
        dm_put(md);
 }
 
-static void free_rq_clone(struct request *clone, bool must_be_mapped)
+static void free_rq_clone(struct request *clone)
 {
        struct dm_rq_target_io *tio = clone->end_io_data;
        struct mapped_device *md = tio->md;
 
-       WARN_ON_ONCE(must_be_mapped && !clone->q);
-
        blk_rq_unprep_clone(clone);
 
        if (md->type == DM_TYPE_MQ_REQUEST_BASED)
@@ -1132,7 +1130,7 @@ static void dm_end_request(struct request *clone, int error)
                        rq->sense_len = clone->sense_len;
        }
 
-       free_rq_clone(clone, true);
+       free_rq_clone(clone);
        if (!rq->q->mq_ops)
                blk_end_request_all(rq, error);
        else
@@ -1151,7 +1149,7 @@ static void dm_unprep_request(struct request *rq)
        }
 
        if (clone)
-               free_rq_clone(clone, false);
+               free_rq_clone(clone);
 }
 
 /*
@@ -1164,6 +1162,7 @@ static void old_requeue_request(struct request *rq)
 
        spin_lock_irqsave(q->queue_lock, flags);
        blk_requeue_request(q, rq);
+       blk_run_queue_async(q);
        spin_unlock_irqrestore(q->queue_lock, flags);
 }
 
@@ -1724,8 +1723,7 @@ static int dm_merge_bvec(struct request_queue *q,
        struct mapped_device *md = q->queuedata;
        struct dm_table *map = dm_get_live_table_fast(md);
        struct dm_target *ti;
-       sector_t max_sectors;
-       int max_size = 0;
+       sector_t max_sectors, max_size = 0;
 
        if (unlikely(!map))
                goto out;
@@ -1740,8 +1738,16 @@ static int dm_merge_bvec(struct request_queue *q,
        max_sectors = min(max_io_len(bvm->bi_sector, ti),
                          (sector_t) queue_max_sectors(q));
        max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size;
-       if (unlikely(max_size < 0)) /* this shouldn't _ever_ happen */
-               max_size = 0;
+
+       /*
+        * FIXME: this stop-gap fix _must_ be cleaned up (by passing a sector_t
+        * to the targets' merge function since it holds sectors not bytes).
+        * Just doing this as an interim fix for stable@ because the more
+        * comprehensive cleanup of switching to sector_t will impact every
+        * DM target that implements a ->merge hook.
+        */
+       if (max_size > INT_MAX)
+               max_size = INT_MAX;
 
        /*
         * merge_bvec_fn() returns number of bytes
@@ -1749,7 +1755,7 @@ static int dm_merge_bvec(struct request_queue *q,
         * max is precomputed maximal io size
         */
        if (max_size && ti->type->merge)
-               max_size = ti->type->merge(ti, bvm, biovec, max_size);
+               max_size = ti->type->merge(ti, bvm, biovec, (int) max_size);
        /*
         * If the target doesn't support merge method and some of the devices
         * provided their merge_bvec method (we know this by looking for the
@@ -1971,8 +1977,8 @@ static int map_request(struct dm_rq_target_io *tio, struct request *rq,
                        dm_kill_unmapped_request(rq, r);
                        return r;
                }
-               if (IS_ERR(clone))
-                       return DM_MAPIO_REQUEUE;
+               if (r != DM_MAPIO_REMAPPED)
+                       return r;
                if (setup_clone(clone, rq, tio, GFP_ATOMIC)) {
                        /* -ENOMEM */
                        ti->type->release_clone_rq(clone);
@@ -2753,13 +2759,15 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
        if (dm_table_get_type(map) == DM_TYPE_REQUEST_BASED) {
                /* clone request is allocated at the end of the pdu */
                tio->clone = (void *)blk_mq_rq_to_pdu(rq) + sizeof(struct dm_rq_target_io);
-               if (!clone_rq(rq, md, tio, GFP_ATOMIC))
-                       return BLK_MQ_RQ_QUEUE_BUSY;
+               (void) clone_rq(rq, md, tio, GFP_ATOMIC);
                queue_kthread_work(&md->kworker, &tio->work);
        } else {
                /* Direct call is fine since .queue_rq allows allocations */
-               if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE)
-                       dm_requeue_unmapped_original_request(md, rq);
+               if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE) {
+                       /* Undo dm_start_request() before requeuing */
+                       rq_completed(md, rq_data_dir(rq), false);
+                       return BLK_MQ_RQ_QUEUE_BUSY;
+               }
        }
 
        return BLK_MQ_RQ_QUEUE_OK;
index d4f31e1..4dbed4a 100644 (file)
@@ -3834,7 +3834,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
                                err = -EBUSY;
                }
                spin_unlock(&mddev->lock);
-               return err;
+               return err ?: len;
        }
        err = mddev_lock(mddev);
        if (err)
@@ -4211,34 +4211,36 @@ action_store(struct mddev *mddev, const char *page, size_t len)
        if (!mddev->pers || !mddev->pers->sync_request)
                return -EINVAL;
 
-       if (cmd_match(page, "frozen"))
-               set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
-       else
-               clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
 
        if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
-               flush_workqueue(md_misc_wq);
-               if (mddev->sync_thread) {
-                       set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-                       if (mddev_lock(mddev) == 0) {
+               if (cmd_match(page, "frozen"))
+                       set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+               else
+                       clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+               if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
+                   mddev_lock(mddev) == 0) {
+                       flush_workqueue(md_misc_wq);
+                       if (mddev->sync_thread) {
+                               set_bit(MD_RECOVERY_INTR, &mddev->recovery);
                                md_reap_sync_thread(mddev);
-                               mddev_unlock(mddev);
                        }
+                       mddev_unlock(mddev);
                }
        } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
                   test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
                return -EBUSY;
        else if (cmd_match(page, "resync"))
-               set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+               clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
        else if (cmd_match(page, "recover")) {
+               clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
                set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
-               set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
        } else if (cmd_match(page, "reshape")) {
                int err;
                if (mddev->pers->start_reshape == NULL)
                        return -EINVAL;
                err = mddev_lock(mddev);
                if (!err) {
+                       clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
                        err = mddev->pers->start_reshape(mddev);
                        mddev_unlock(mddev);
                }
@@ -4250,6 +4252,7 @@ action_store(struct mddev *mddev, const char *page, size_t len)
                        set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
                else if (!cmd_match(page, "repair"))
                        return -EINVAL;
+               clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
                set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
                set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
        }
@@ -4818,12 +4821,12 @@ static void md_free(struct kobject *ko)
        if (mddev->sysfs_state)
                sysfs_put(mddev->sysfs_state);
 
+       if (mddev->queue)
+               blk_cleanup_queue(mddev->queue);
        if (mddev->gendisk) {
                del_gendisk(mddev->gendisk);
                put_disk(mddev->gendisk);
        }
-       if (mddev->queue)
-               blk_cleanup_queue(mddev->queue);
 
        kfree(mddev);
 }
@@ -8259,6 +8262,7 @@ void md_reap_sync_thread(struct mddev *mddev)
        if (mddev_is_clustered(mddev))
                md_cluster_ops->metadata_update_finish(mddev);
        clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
+       clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
        clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
        clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
        clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
index 2cb59a6..efb654e 100644 (file)
@@ -188,8 +188,9 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
                }
                dev[j] = rdev1;
 
-               disk_stack_limits(mddev->gendisk, rdev1->bdev,
-                                 rdev1->data_offset << 9);
+               if (mddev->queue)
+                       disk_stack_limits(mddev->gendisk, rdev1->bdev,
+                                         rdev1->data_offset << 9);
 
                if (rdev1->bdev->bd_disk->queue->merge_bvec_fn)
                        conf->has_merge_bvec = 1;
@@ -523,6 +524,9 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
                         ? (sector & (chunk_sects-1))
                         : sector_div(sector, chunk_sects));
 
+               /* Restore due to sector_div */
+               sector = bio->bi_iter.bi_sector;
+
                if (sectors < bio_sectors(bio)) {
                        split = bio_split(bio, sectors, GFP_NOIO, fs_bio_set);
                        bio_chain(split, bio);
@@ -530,7 +534,6 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
                        split = bio;
                }
 
-               sector = bio->bi_iter.bi_sector;
                zone = find_zone(mddev->private, &sector);
                tmp_dev = map_sector(mddev, zone, sector, &sector);
                split->bi_bdev = tmp_dev->bdev;
index e793ab6..f55c3f3 100644 (file)
@@ -4156,6 +4156,7 @@ static int raid10_start_reshape(struct mddev *mddev)
 
        clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
        clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
+       clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
        set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
        set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
 
index 77dfd72..b6793d2 100644 (file)
@@ -749,6 +749,7 @@ static void unlock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2)
 static bool stripe_can_batch(struct stripe_head *sh)
 {
        return test_bit(STRIPE_BATCH_READY, &sh->state) &&
+               !test_bit(STRIPE_BITMAP_PENDING, &sh->state) &&
                is_full_stripe_write(sh);
 }
 
@@ -837,6 +838,15 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh
                    < IO_THRESHOLD)
                        md_wakeup_thread(conf->mddev->thread);
 
+       if (test_and_clear_bit(STRIPE_BIT_DELAY, &sh->state)) {
+               int seq = sh->bm_seq;
+               if (test_bit(STRIPE_BIT_DELAY, &sh->batch_head->state) &&
+                   sh->batch_head->bm_seq > seq)
+                       seq = sh->batch_head->bm_seq;
+               set_bit(STRIPE_BIT_DELAY, &sh->batch_head->state);
+               sh->batch_head->bm_seq = seq;
+       }
+
        atomic_inc(&sh->count);
 unlock_out:
        unlock_two_stripes(head, sh);
@@ -1078,9 +1088,6 @@ again:
                        pr_debug("skip op %ld on disc %d for sector %llu\n",
                                bi->bi_rw, i, (unsigned long long)sh->sector);
                        clear_bit(R5_LOCKED, &sh->dev[i].flags);
-                       if (sh->batch_head)
-                               set_bit(STRIPE_BATCH_ERR,
-                                       &sh->batch_head->state);
                        set_bit(STRIPE_HANDLE, &sh->state);
                }
 
@@ -1825,7 +1832,7 @@ again:
        } else
                init_async_submit(&submit, 0, tx, NULL, NULL,
                                  to_addr_conv(sh, percpu, j));
-       async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE,  &submit);
+       tx = async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE,  &submit);
        if (!last_stripe) {
                j++;
                sh = list_first_entry(&sh->batch_list, struct stripe_head,
@@ -1971,17 +1978,30 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
        put_cpu();
 }
 
+static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp)
+{
+       struct stripe_head *sh;
+
+       sh = kmem_cache_zalloc(sc, gfp);
+       if (sh) {
+               spin_lock_init(&sh->stripe_lock);
+               spin_lock_init(&sh->batch_lock);
+               INIT_LIST_HEAD(&sh->batch_list);
+               INIT_LIST_HEAD(&sh->lru);
+               atomic_set(&sh->count, 1);
+       }
+       return sh;
+}
 static int grow_one_stripe(struct r5conf *conf, gfp_t gfp)
 {
        struct stripe_head *sh;
-       sh = kmem_cache_zalloc(conf->slab_cache, gfp);
+
+       sh = alloc_stripe(conf->slab_cache, gfp);
        if (!sh)
                return 0;
 
        sh->raid_conf = conf;
 
-       spin_lock_init(&sh->stripe_lock);
-
        if (grow_buffers(sh, gfp)) {
                shrink_buffers(sh);
                kmem_cache_free(conf->slab_cache, sh);
@@ -1990,13 +2010,8 @@ static int grow_one_stripe(struct r5conf *conf, gfp_t gfp)
        sh->hash_lock_index =
                conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS;
        /* we just created an active stripe so... */
-       atomic_set(&sh->count, 1);
        atomic_inc(&conf->active_stripes);
-       INIT_LIST_HEAD(&sh->lru);
 
-       spin_lock_init(&sh->batch_lock);
-       INIT_LIST_HEAD(&sh->batch_list);
-       sh->batch_head = NULL;
        release_stripe(sh);
        conf->max_nr_stripes++;
        return 1;
@@ -2060,6 +2075,35 @@ static struct flex_array *scribble_alloc(int num, int cnt, gfp_t flags)
        return ret;
 }
 
+static int resize_chunks(struct r5conf *conf, int new_disks, int new_sectors)
+{
+       unsigned long cpu;
+       int err = 0;
+
+       mddev_suspend(conf->mddev);
+       get_online_cpus();
+       for_each_present_cpu(cpu) {
+               struct raid5_percpu *percpu;
+               struct flex_array *scribble;
+
+               percpu = per_cpu_ptr(conf->percpu, cpu);
+               scribble = scribble_alloc(new_disks,
+                                         new_sectors / STRIPE_SECTORS,
+                                         GFP_NOIO);
+
+               if (scribble) {
+                       flex_array_free(percpu->scribble);
+                       percpu->scribble = scribble;
+               } else {
+                       err = -ENOMEM;
+                       break;
+               }
+       }
+       put_online_cpus();
+       mddev_resume(conf->mddev);
+       return err;
+}
+
 static int resize_stripes(struct r5conf *conf, int newsize)
 {
        /* Make all the stripes able to hold 'newsize' devices.
@@ -2088,7 +2132,6 @@ static int resize_stripes(struct r5conf *conf, int newsize)
        struct stripe_head *osh, *nsh;
        LIST_HEAD(newstripes);
        struct disk_info *ndisks;
-       unsigned long cpu;
        int err;
        struct kmem_cache *sc;
        int i;
@@ -2109,13 +2152,11 @@ static int resize_stripes(struct r5conf *conf, int newsize)
                return -ENOMEM;
 
        for (i = conf->max_nr_stripes; i; i--) {
-               nsh = kmem_cache_zalloc(sc, GFP_KERNEL);
+               nsh = alloc_stripe(sc, GFP_KERNEL);
                if (!nsh)
                        break;
 
                nsh->raid_conf = conf;
-               spin_lock_init(&nsh->stripe_lock);
-
                list_add(&nsh->lru, &newstripes);
        }
        if (i) {
@@ -2142,13 +2183,11 @@ static int resize_stripes(struct r5conf *conf, int newsize)
                                    lock_device_hash_lock(conf, hash));
                osh = get_free_stripe(conf, hash);
                unlock_device_hash_lock(conf, hash);
-               atomic_set(&nsh->count, 1);
+
                for(i=0; i<conf->pool_size; i++) {
                        nsh->dev[i].page = osh->dev[i].page;
                        nsh->dev[i].orig_page = osh->dev[i].page;
                }
-               for( ; i<newsize; i++)
-                       nsh->dev[i].page = NULL;
                nsh->hash_lock_index = hash;
                kmem_cache_free(conf->slab_cache, osh);
                cnt++;
@@ -2174,25 +2213,6 @@ static int resize_stripes(struct r5conf *conf, int newsize)
        } else
                err = -ENOMEM;
 
-       get_online_cpus();
-       for_each_present_cpu(cpu) {
-               struct raid5_percpu *percpu;
-               struct flex_array *scribble;
-
-               percpu = per_cpu_ptr(conf->percpu, cpu);
-               scribble = scribble_alloc(newsize, conf->chunk_sectors /
-                       STRIPE_SECTORS, GFP_NOIO);
-
-               if (scribble) {
-                       flex_array_free(percpu->scribble);
-                       percpu->scribble = scribble;
-               } else {
-                       err = -ENOMEM;
-                       break;
-               }
-       }
-       put_online_cpus();
-
        /* Step 4, return new stripes to service */
        while(!list_empty(&newstripes)) {
                nsh = list_entry(newstripes.next, struct stripe_head, lru);
@@ -2212,7 +2232,8 @@ static int resize_stripes(struct r5conf *conf, int newsize)
 
        conf->slab_cache = sc;
        conf->active_name = 1-conf->active_name;
-       conf->pool_size = newsize;
+       if (!err)
+               conf->pool_size = newsize;
        return err;
 }
 
@@ -2434,7 +2455,7 @@ static void raid5_end_write_request(struct bio *bi, int error)
        }
        rdev_dec_pending(rdev, conf->mddev);
 
-       if (sh->batch_head && !uptodate)
+       if (sh->batch_head && !uptodate && !replacement)
                set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state);
 
        if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags))
@@ -2976,14 +2997,32 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx,
        pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n",
                (unsigned long long)(*bip)->bi_iter.bi_sector,
                (unsigned long long)sh->sector, dd_idx);
-       spin_unlock_irq(&sh->stripe_lock);
 
        if (conf->mddev->bitmap && firstwrite) {
+               /* Cannot hold spinlock over bitmap_startwrite,
+                * but must ensure this isn't added to a batch until
+                * we have added to the bitmap and set bm_seq.
+                * So set STRIPE_BITMAP_PENDING to prevent
+                * batching.
+                * If multiple add_stripe_bio() calls race here they
+                * much all set STRIPE_BITMAP_PENDING.  So only the first one
+                * to complete "bitmap_startwrite" gets to set
+                * STRIPE_BIT_DELAY.  This is important as once a stripe
+                * is added to a batch, STRIPE_BIT_DELAY cannot be changed
+                * any more.
+                */
+               set_bit(STRIPE_BITMAP_PENDING, &sh->state);
+               spin_unlock_irq(&sh->stripe_lock);
                bitmap_startwrite(conf->mddev->bitmap, sh->sector,
                                  STRIPE_SECTORS, 0);
-               sh->bm_seq = conf->seq_flush+1;
-               set_bit(STRIPE_BIT_DELAY, &sh->state);
+               spin_lock_irq(&sh->stripe_lock);
+               clear_bit(STRIPE_BITMAP_PENDING, &sh->state);
+               if (!sh->batch_head) {
+                       sh->bm_seq = conf->seq_flush+1;
+                       set_bit(STRIPE_BIT_DELAY, &sh->state);
+               }
        }
+       spin_unlock_irq(&sh->stripe_lock);
 
        if (stripe_can_batch(sh))
                stripe_add_to_batch_list(conf, sh);
@@ -3278,7 +3317,9 @@ static int need_this_block(struct stripe_head *sh, struct stripe_head_state *s,
                /* reconstruct-write isn't being forced */
                return 0;
        for (i = 0; i < s->failed; i++) {
-               if (!test_bit(R5_UPTODATE, &fdev[i]->flags) &&
+               if (s->failed_num[i] != sh->pd_idx &&
+                   s->failed_num[i] != sh->qd_idx &&
+                   !test_bit(R5_UPTODATE, &fdev[i]->flags) &&
                    !test_bit(R5_OVERWRITE, &fdev[i]->flags))
                        return 1;
        }
@@ -3298,6 +3339,7 @@ static int fetch_block(struct stripe_head *sh, struct stripe_head_state *s,
                 */
                BUG_ON(test_bit(R5_Wantcompute, &dev->flags));
                BUG_ON(test_bit(R5_Wantread, &dev->flags));
+               BUG_ON(sh->batch_head);
                if ((s->uptodate == disks - 1) &&
                    (s->failed && (disk_idx == s->failed_num[0] ||
                                   disk_idx == s->failed_num[1]))) {
@@ -3366,7 +3408,6 @@ static void handle_stripe_fill(struct stripe_head *sh,
 {
        int i;
 
-       BUG_ON(sh->batch_head);
        /* look for blocks to read/compute, skip this if a compute
         * is already in flight, or if the stripe contents are in the
         * midst of changing due to a write
@@ -3379,6 +3420,8 @@ static void handle_stripe_fill(struct stripe_head *sh,
        set_bit(STRIPE_HANDLE, &sh->state);
 }
 
+static void break_stripe_batch_list(struct stripe_head *head_sh,
+                                   unsigned long handle_flags);
 /* handle_stripe_clean_event
  * any written block on an uptodate or failed drive can be returned.
  * Note that if we 'wrote' to a failed drive, it will be UPTODATE, but
@@ -3392,7 +3435,6 @@ static void handle_stripe_clean_event(struct r5conf *conf,
        int discard_pending = 0;
        struct stripe_head *head_sh = sh;
        bool do_endio = false;
-       int wakeup_nr = 0;
 
        for (i = disks; i--; )
                if (sh->dev[i].written) {
@@ -3481,44 +3523,8 @@ unhash:
                if (atomic_dec_and_test(&conf->pending_full_writes))
                        md_wakeup_thread(conf->mddev->thread);
 
-       if (!head_sh->batch_head || !do_endio)
-               return;
-       for (i = 0; i < head_sh->disks; i++) {
-               if (test_and_clear_bit(R5_Overlap, &head_sh->dev[i].flags))
-                       wakeup_nr++;
-       }
-       while (!list_empty(&head_sh->batch_list)) {
-               int i;
-               sh = list_first_entry(&head_sh->batch_list,
-                                     struct stripe_head, batch_list);
-               list_del_init(&sh->batch_list);
-
-               set_mask_bits(&sh->state, ~STRIPE_EXPAND_SYNC_FLAG,
-                             head_sh->state & ~((1 << STRIPE_ACTIVE) |
-                                                (1 << STRIPE_PREREAD_ACTIVE) |
-                                                STRIPE_EXPAND_SYNC_FLAG));
-               sh->check_state = head_sh->check_state;
-               sh->reconstruct_state = head_sh->reconstruct_state;
-               for (i = 0; i < sh->disks; i++) {
-                       if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
-                               wakeup_nr++;
-                       sh->dev[i].flags = head_sh->dev[i].flags;
-               }
-
-               spin_lock_irq(&sh->stripe_lock);
-               sh->batch_head = NULL;
-               spin_unlock_irq(&sh->stripe_lock);
-               if (sh->state & STRIPE_EXPAND_SYNC_FLAG)
-                       set_bit(STRIPE_HANDLE, &sh->state);
-               release_stripe(sh);
-       }
-
-       spin_lock_irq(&head_sh->stripe_lock);
-       head_sh->batch_head = NULL;
-       spin_unlock_irq(&head_sh->stripe_lock);
-       wake_up_nr(&conf->wait_for_overlap, wakeup_nr);
-       if (head_sh->state & STRIPE_EXPAND_SYNC_FLAG)
-               set_bit(STRIPE_HANDLE, &head_sh->state);
+       if (head_sh->batch_head && do_endio)
+               break_stripe_batch_list(head_sh, STRIPE_EXPAND_SYNC_FLAGS);
 }
 
 static void handle_stripe_dirtying(struct r5conf *conf,
@@ -4159,9 +4165,13 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
 
 static int clear_batch_ready(struct stripe_head *sh)
 {
+       /* Return '1' if this is a member of batch, or
+        * '0' if it is a lone stripe or a head which can now be
+        * handled.
+        */
        struct stripe_head *tmp;
        if (!test_and_clear_bit(STRIPE_BATCH_READY, &sh->state))
-               return 0;
+               return (sh->batch_head && sh->batch_head != sh);
        spin_lock(&sh->stripe_lock);
        if (!sh->batch_head) {
                spin_unlock(&sh->stripe_lock);
@@ -4189,46 +4199,65 @@ static int clear_batch_ready(struct stripe_head *sh)
        return 0;
 }
 
-static void check_break_stripe_batch_list(struct stripe_head *sh)
+static void break_stripe_batch_list(struct stripe_head *head_sh,
+                                   unsigned long handle_flags)
 {
-       struct stripe_head *head_sh, *next;
+       struct stripe_head *sh, *next;
        int i;
+       int do_wakeup = 0;
 
-       if (!test_and_clear_bit(STRIPE_BATCH_ERR, &sh->state))
-               return;
+       list_for_each_entry_safe(sh, next, &head_sh->batch_list, batch_list) {
 
-       head_sh = sh;
-       do {
-               sh = list_first_entry(&sh->batch_list,
-                                     struct stripe_head, batch_list);
-               BUG_ON(sh == head_sh);
-       } while (!test_bit(STRIPE_DEGRADED, &sh->state));
-
-       while (sh != head_sh) {
-               next = list_first_entry(&sh->batch_list,
-                                       struct stripe_head, batch_list);
                list_del_init(&sh->batch_list);
 
-               set_mask_bits(&sh->state, ~STRIPE_EXPAND_SYNC_FLAG,
-                             head_sh->state & ~((1 << STRIPE_ACTIVE) |
-                                                (1 << STRIPE_PREREAD_ACTIVE) |
-                                                (1 << STRIPE_DEGRADED) |
-                                                STRIPE_EXPAND_SYNC_FLAG));
+               WARN_ON_ONCE(sh->state & ((1 << STRIPE_ACTIVE) |
+                                         (1 << STRIPE_SYNCING) |
+                                         (1 << STRIPE_REPLACED) |
+                                         (1 << STRIPE_PREREAD_ACTIVE) |
+                                         (1 << STRIPE_DELAYED) |
+                                         (1 << STRIPE_BIT_DELAY) |
+                                         (1 << STRIPE_FULL_WRITE) |
+                                         (1 << STRIPE_BIOFILL_RUN) |
+                                         (1 << STRIPE_COMPUTE_RUN)  |
+                                         (1 << STRIPE_OPS_REQ_PENDING) |
+                                         (1 << STRIPE_DISCARD) |
+                                         (1 << STRIPE_BATCH_READY) |
+                                         (1 << STRIPE_BATCH_ERR) |
+                                         (1 << STRIPE_BITMAP_PENDING)));
+               WARN_ON_ONCE(head_sh->state & ((1 << STRIPE_DISCARD) |
+                                             (1 << STRIPE_REPLACED)));
+
+               set_mask_bits(&sh->state, ~(STRIPE_EXPAND_SYNC_FLAGS |
+                                           (1 << STRIPE_DEGRADED)),
+                             head_sh->state & (1 << STRIPE_INSYNC));
+
                sh->check_state = head_sh->check_state;
                sh->reconstruct_state = head_sh->reconstruct_state;
-               for (i = 0; i < sh->disks; i++)
+               for (i = 0; i < sh->disks; i++) {
+                       if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
+                               do_wakeup = 1;
                        sh->dev[i].flags = head_sh->dev[i].flags &
                                (~((1 << R5_WriteError) | (1 << R5_Overlap)));
-
+               }
                spin_lock_irq(&sh->stripe_lock);
                sh->batch_head = NULL;
                spin_unlock_irq(&sh->stripe_lock);
-
-               set_bit(STRIPE_HANDLE, &sh->state);
+               if (handle_flags == 0 ||
+                   sh->state & handle_flags)
+                       set_bit(STRIPE_HANDLE, &sh->state);
                release_stripe(sh);
-
-               sh = next;
        }
+       spin_lock_irq(&head_sh->stripe_lock);
+       head_sh->batch_head = NULL;
+       spin_unlock_irq(&head_sh->stripe_lock);
+       for (i = 0; i < head_sh->disks; i++)
+               if (test_and_clear_bit(R5_Overlap, &head_sh->dev[i].flags))
+                       do_wakeup = 1;
+       if (head_sh->state & handle_flags)
+               set_bit(STRIPE_HANDLE, &head_sh->state);
+
+       if (do_wakeup)
+               wake_up(&head_sh->raid_conf->wait_for_overlap);
 }
 
 static void handle_stripe(struct stripe_head *sh)
@@ -4253,7 +4282,8 @@ static void handle_stripe(struct stripe_head *sh)
                return;
        }
 
-       check_break_stripe_batch_list(sh);
+       if (test_and_clear_bit(STRIPE_BATCH_ERR, &sh->state))
+               break_stripe_batch_list(sh, 0);
 
        if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state) && !sh->batch_head) {
                spin_lock(&sh->stripe_lock);
@@ -4307,6 +4337,7 @@ static void handle_stripe(struct stripe_head *sh)
        if (s.failed > conf->max_degraded) {
                sh->check_state = 0;
                sh->reconstruct_state = 0;
+               break_stripe_batch_list(sh, 0);
                if (s.to_read+s.to_write+s.written)
                        handle_failed_stripe(conf, sh, &s, disks, &s.return_bi);
                if (s.syncing + s.replacing)
@@ -6221,8 +6252,11 @@ static int alloc_scratch_buffer(struct r5conf *conf, struct raid5_percpu *percpu
                percpu->spare_page = alloc_page(GFP_KERNEL);
        if (!percpu->scribble)
                percpu->scribble = scribble_alloc(max(conf->raid_disks,
-                       conf->previous_raid_disks), conf->chunk_sectors /
-                       STRIPE_SECTORS, GFP_KERNEL);
+                                                     conf->previous_raid_disks),
+                                                 max(conf->chunk_sectors,
+                                                     conf->prev_chunk_sectors)
+                                                  / STRIPE_SECTORS,
+                                                 GFP_KERNEL);
 
        if (!percpu->scribble || (conf->level == 6 && !percpu->spare_page)) {
                free_scratch_buffer(conf, percpu);
@@ -7198,6 +7232,15 @@ static int check_reshape(struct mddev *mddev)
        if (!check_stripe_cache(mddev))
                return -ENOSPC;
 
+       if (mddev->new_chunk_sectors > mddev->chunk_sectors ||
+           mddev->delta_disks > 0)
+               if (resize_chunks(conf,
+                                 conf->previous_raid_disks
+                                 + max(0, mddev->delta_disks),
+                                 max(mddev->new_chunk_sectors,
+                                     mddev->chunk_sectors)
+                           ) < 0)
+                       return -ENOMEM;
        return resize_stripes(conf, (conf->previous_raid_disks
                                     + mddev->delta_disks));
 }
@@ -7311,6 +7354,7 @@ static int raid5_start_reshape(struct mddev *mddev)
 
        clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
        clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
+       clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
        set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
        set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
        mddev->sync_thread = md_register_thread(md_do_sync, mddev,
index 7dc0dd8..896d603 100644 (file)
@@ -337,9 +337,12 @@ enum {
        STRIPE_ON_RELEASE_LIST,
        STRIPE_BATCH_READY,
        STRIPE_BATCH_ERR,
+       STRIPE_BITMAP_PENDING,  /* Being added to bitmap, don't add
+                                * to batch yet.
+                                */
 };
 
-#define STRIPE_EXPAND_SYNC_FLAG \
+#define STRIPE_EXPAND_SYNC_FLAGS \
        ((1 << STRIPE_EXPAND_SOURCE) |\
        (1 << STRIPE_EXPAND_READY) |\
        (1 << STRIPE_EXPANDING) |\
index 3ef0f90..1570992 100644 (file)
@@ -97,6 +97,7 @@ config MEDIA_CONTROLLER
 config MEDIA_CONTROLLER_DVB
        bool "Enable Media controller for DVB"
        depends on MEDIA_CONTROLLER
+       depends on BROKEN
        ---help---
          Enable the media controller API support for DVB.
 
index ae498b5..46e3840 100644 (file)
@@ -433,6 +433,10 @@ EXPORT_SYMBOL_GPL(da9052_adc_read_temp);
 static const struct mfd_cell da9052_subdev_info[] = {
        {
                .name = "da9052-regulator",
+               .id = 0,
+       },
+       {
+               .name = "da9052-regulator",
                .id = 1,
        },
        {
@@ -484,10 +488,6 @@ static const struct mfd_cell da9052_subdev_info[] = {
                .id = 13,
        },
        {
-               .name = "da9052-regulator",
-               .id = 14,
-       },
-       {
                .name = "da9052-onkey",
        },
        {
index 2c25271..60f7141 100644 (file)
@@ -1029,6 +1029,18 @@ static inline void mmc_blk_reset_success(struct mmc_blk_data *md, int type)
        md->reset_done &= ~type;
 }
 
+int mmc_access_rpmb(struct mmc_queue *mq)
+{
+       struct mmc_blk_data *md = mq->data;
+       /*
+        * If this is a RPMB partition access, return ture
+        */
+       if (md && md->part_type == EXT_CSD_PART_CONFIG_ACC_RPMB)
+               return true;
+
+       return false;
+}
+
 static int mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req)
 {
        struct mmc_blk_data *md = mq->data;
index 236d194..8efa368 100644 (file)
@@ -38,7 +38,7 @@ static int mmc_prep_request(struct request_queue *q, struct request *req)
                return BLKPREP_KILL;
        }
 
-       if (mq && mmc_card_removed(mq->card))
+       if (mq && (mmc_card_removed(mq->card) || mmc_access_rpmb(mq)))
                return BLKPREP_KILL;
 
        req->cmd_flags |= REQ_DONTPREP;
index 5752d50..99e6521 100644 (file)
@@ -73,4 +73,6 @@ extern void mmc_queue_bounce_post(struct mmc_queue_req *);
 extern int mmc_packed_init(struct mmc_queue *, struct mmc_card *);
 extern void mmc_packed_clean(struct mmc_queue *);
 
+extern int mmc_access_rpmb(struct mmc_queue *);
+
 #endif
index c296bc0..92e7671 100644 (file)
@@ -2651,6 +2651,7 @@ int mmc_pm_notify(struct notifier_block *notify_block,
        switch (mode) {
        case PM_HIBERNATION_PREPARE:
        case PM_SUSPEND_PREPARE:
+       case PM_RESTORE_PREPARE:
                spin_lock_irqsave(&host->lock, flags);
                host->rescan_disable = 1;
                spin_unlock_irqrestore(&host->lock, flags);
index 03d7c75..9a39e0b 100644 (file)
@@ -1304,7 +1304,7 @@ static void atmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 
        if (ios->clock) {
                unsigned int clock_min = ~0U;
-               u32 clkdiv;
+               int clkdiv;
 
                spin_lock_bh(&host->lock);
                if (!host->mode_reg) {
@@ -1328,7 +1328,12 @@ static void atmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
                /* Calculate clock divider */
                if (host->caps.has_odd_clk_div) {
                        clkdiv = DIV_ROUND_UP(host->bus_hz, clock_min) - 2;
-                       if (clkdiv > 511) {
+                       if (clkdiv < 0) {
+                               dev_warn(&mmc->class_dev,
+                                        "clock %u too fast; using %lu\n",
+                                        clock_min, host->bus_hz / 2);
+                               clkdiv = 0;
+                       } else if (clkdiv > 511) {
                                dev_warn(&mmc->class_dev,
                                         "clock %u too slow; using %lu\n",
                                         clock_min, host->bus_hz / (511 + 2));
index 38b2926..5f5adaf 100644 (file)
@@ -589,9 +589,11 @@ static int dw_mci_idmac_init(struct dw_mci *host)
                host->ring_size = PAGE_SIZE / sizeof(struct idmac_desc);
 
                /* Forward link the descriptor list */
-               for (i = 0, p = host->sg_cpu; i < host->ring_size - 1; i++, p++)
+               for (i = 0, p = host->sg_cpu; i < host->ring_size - 1; i++, p++) {
                        p->des3 = cpu_to_le32(host->sg_dma +
                                        (sizeof(struct idmac_desc) * (i + 1)));
+                       p->des1 = 0;
+               }
 
                /* Set the last descriptor as the end-of-ring descriptor */
                p->des3 = cpu_to_le32(host->sg_dma);
@@ -1300,7 +1302,8 @@ static int dw_mci_get_cd(struct mmc_host *mmc)
        int gpio_cd = mmc_gpio_get_cd(mmc);
 
        /* Use platform get_cd function, else try onboard card detect */
-       if (brd->quirks & DW_MCI_QUIRK_BROKEN_CARD_DETECTION)
+       if ((brd->quirks & DW_MCI_QUIRK_BROKEN_CARD_DETECTION) ||
+           (mmc->caps & MMC_CAP_NONREMOVABLE))
                present = 1;
        else if (!IS_ERR_VALUE(gpio_cd))
                present = gpio_cd;
index 2b6ef6b..7eff087 100644 (file)
@@ -1408,7 +1408,7 @@ static int sh_mmcif_probe(struct platform_device *pdev)
        host            = mmc_priv(mmc);
        host->mmc       = mmc;
        host->addr      = reg;
-       host->timeout   = msecs_to_jiffies(1000);
+       host->timeout   = msecs_to_jiffies(10000);
        host->ccs_enable = !pd || !pd->ccs_unsupported;
        host->clk_ctrl2_enable = pd && pd->clk_ctrl2_present;
 
index 7c8b169..3af137f 100644 (file)
@@ -223,7 +223,7 @@ static int m25p_probe(struct spi_device *spi)
         */
        if (data && data->type)
                flash_name = data->type;
-       else if (!strcmp(spi->modalias, "nor-jedec"))
+       else if (!strcmp(spi->modalias, "spi-nor"))
                flash_name = NULL; /* auto-detect */
        else
                flash_name = spi->modalias;
@@ -255,7 +255,7 @@ static int m25p_remove(struct spi_device *spi)
  * since most of these flash are compatible to some extent, and their
  * differences can often be differentiated by the JEDEC read-ID command, we
  * encourage new users to add support to the spi-nor library, and simply bind
- * against a generic string here (e.g., "nor-jedec").
+ * against a generic string here (e.g., "jedec,spi-nor").
  *
  * Many flash names are kept here in this list (as well as in spi-nor.c) to
  * keep them available as module aliases for existing platforms.
@@ -305,7 +305,7 @@ static const struct spi_device_id m25p_ids[] = {
         * Generic support for SPI NOR that can be identified by the JEDEC READ
         * ID opcode (0x9F). Use this, if possible.
         */
-       {"nor-jedec"},
+       {"spi-nor"},
        { },
 };
 MODULE_DEVICE_TABLE(spi, m25p_ids);
index a3196b7..58df07a 100644 (file)
@@ -191,9 +191,11 @@ static int __init mtd_readtest_init(void)
                                err = ret;
                }
 
-               err = mtdtest_relax();
-               if (err)
+               ret = mtdtest_relax();
+               if (ret) {
+                       err = ret;
                        goto out;
+               }
        }
 
        if (err)
index db2c05b..c9eb78f 100644 (file)
@@ -310,6 +310,8 @@ static void ubiblock_do_work(struct work_struct *work)
        blk_rq_map_sg(req->q, req, pdu->usgl.sg);
 
        ret = ubiblock_read(pdu);
+       rq_flush_dcache_pages(req);
+
        blk_mq_end_request(req, ret);
 }
 
index 4df2894..e8d3c1d 100644 (file)
@@ -624,7 +624,7 @@ int __bond_opt_set(struct bonding *bond,
 out:
        if (ret)
                bond_opt_error_interpret(bond, opt, ret, val);
-       else
+       else if (bond->dev->reg_state == NETREG_REGISTERED)
                call_netdevice_notifiers(NETDEV_CHANGEINFODATA, bond->dev);
 
        return ret;
index 6bddfe0..fc55e8e 100644 (file)
@@ -509,10 +509,11 @@ static int xcan_rx(struct net_device *ndev)
                        cf->can_id |= CAN_RTR_FLAG;
        }
 
-       if (!(id_xcan & XCAN_IDR_SRR_MASK)) {
-               data[0] = priv->read_reg(priv, XCAN_RXFIFO_DW1_OFFSET);
-               data[1] = priv->read_reg(priv, XCAN_RXFIFO_DW2_OFFSET);
+       /* DW1/DW2 must always be read to remove message from RXFIFO */
+       data[0] = priv->read_reg(priv, XCAN_RXFIFO_DW1_OFFSET);
+       data[1] = priv->read_reg(priv, XCAN_RXFIFO_DW2_OFFSET);
 
+       if (!(cf->can_id & CAN_RTR_FLAG)) {
                /* Change Xilinx CAN data format to socketCAN data format */
                if (cf->can_dlc > 0)
                        *(__be32 *)(cf->data) = cpu_to_be32(data[0]);
index af639ab..cf309aa 100644 (file)
@@ -1469,6 +1469,9 @@ static void __exit mv88e6xxx_cleanup(void)
 #if IS_ENABLED(CONFIG_NET_DSA_MV88E6171)
        unregister_switch_driver(&mv88e6171_switch_driver);
 #endif
+#if IS_ENABLED(CONFIG_NET_DSA_MV88E6352)
+       unregister_switch_driver(&mv88e6352_switch_driver);
+#endif
 #if IS_ENABLED(CONFIG_NET_DSA_MV88E6123_61_65)
        unregister_switch_driver(&mv88e6123_61_65_switch_driver);
 #endif
index 089c269..4269160 100644 (file)
@@ -180,6 +180,7 @@ config SUNLANCE
 config AMD_XGBE
        tristate "AMD 10GbE Ethernet driver"
        depends on (OF_NET || ACPI) && HAS_IOMEM && HAS_DMA
+       depends on ARM64 || COMPILE_TEST
        select PHYLIB
        select AMD_XGBE_PHY
        select BITREVERSE
index db84ddc..9fd6c69 100644 (file)
@@ -423,7 +423,7 @@ static void xgbe_tx_timer(unsigned long data)
        if (napi_schedule_prep(napi)) {
                /* Disable Tx and Rx interrupts */
                if (pdata->per_channel_irq)
-                       disable_irq(channel->dma_irq);
+                       disable_irq_nosync(channel->dma_irq);
                else
                        xgbe_disable_rx_tx_ints(pdata);
 
index f4054d2..19e38af 100644 (file)
@@ -1,6 +1,7 @@
 config NET_XGENE
        tristate "APM X-Gene SoC Ethernet Driver"
        depends on HAS_DMA
+       depends on ARCH_XGENE || COMPILE_TEST
        select PHYLIB
        help
          This is the Ethernet driver for the on-chip ethernet interface on the
index 77363d6..a3b1c07 100644 (file)
@@ -2464,6 +2464,7 @@ err_out_powerdown:
        ssb_bus_may_powerdown(sdev->bus);
 
 err_out_free_dev:
+       netif_napi_del(&bp->napi);
        free_netdev(dev);
 
 out:
@@ -2480,6 +2481,7 @@ static void b44_remove_one(struct ssb_device *sdev)
                b44_unregister_phy_one(bp);
        ssb_device_disable(sdev, 0);
        ssb_bus_may_powerdown(sdev->bus);
+       netif_napi_del(&bp->napi);
        free_netdev(dev);
        ssb_pcihost_set_power_state(sdev, PCI_D3hot);
        ssb_set_drvdata(sdev, NULL);
index a3b0f7a..1f82a04 100644 (file)
@@ -1774,7 +1774,7 @@ struct bnx2x {
        int                     stats_state;
 
        /* used for synchronization of concurrent threads statistics handling */
-       struct mutex            stats_lock;
+       struct semaphore        stats_lock;
 
        /* used by dmae command loader */
        struct dmae_command     stats_dmae;
index a8bb8f6..ec56a9b 100644 (file)
@@ -4786,6 +4786,11 @@ int bnx2x_change_mtu(struct net_device *dev, int new_mtu)
 {
        struct bnx2x *bp = netdev_priv(dev);
 
+       if (pci_num_vf(bp->pdev)) {
+               DP(BNX2X_MSG_IOV, "VFs are enabled, can not change MTU\n");
+               return -EPERM;
+       }
+
        if (bp->recovery_state != BNX2X_RECOVERY_DONE) {
                BNX2X_ERR("Can't perform change MTU during parity recovery\n");
                return -EAGAIN;
@@ -4938,11 +4943,6 @@ int bnx2x_resume(struct pci_dev *pdev)
        }
        bp = netdev_priv(dev);
 
-       if (pci_num_vf(bp->pdev)) {
-               DP(BNX2X_MSG_IOV, "VFs are enabled, can not change MTU\n");
-               return -EPERM;
-       }
-
        if (bp->recovery_state != BNX2X_RECOVERY_DONE) {
                BNX2X_ERR("Handling parity error recovery. Try again later\n");
                return -EAGAIN;
index 556dcc1..33501bc 100644 (file)
@@ -12054,7 +12054,7 @@ static int bnx2x_init_bp(struct bnx2x *bp)
        mutex_init(&bp->port.phy_mutex);
        mutex_init(&bp->fw_mb_mutex);
        mutex_init(&bp->drv_info_mutex);
-       mutex_init(&bp->stats_lock);
+       sema_init(&bp->stats_lock, 1);
        bp->drv_info_mng_owner = false;
 
        INIT_DELAYED_WORK(&bp->sp_task, bnx2x_sp_task);
@@ -13371,8 +13371,13 @@ static int bnx2x_init_one(struct pci_dev *pdev,
        /* Management FW 'remembers' living interfaces. Allow it some time
         * to forget previously living interfaces, allowing a proper re-load.
         */
-       if (is_kdump_kernel())
-               msleep(5000);
+       if (is_kdump_kernel()) {
+               ktime_t now = ktime_get_boottime();
+               ktime_t fw_ready_time = ktime_set(5, 0);
+
+               if (ktime_before(now, fw_ready_time))
+                       msleep(ktime_ms_delta(fw_ready_time, now));
+       }
 
        /* An estimated maximum supported CoS number according to the chip
         * version.
@@ -13685,9 +13690,10 @@ static int bnx2x_eeh_nic_unload(struct bnx2x *bp)
        cancel_delayed_work_sync(&bp->sp_task);
        cancel_delayed_work_sync(&bp->period_task);
 
-       mutex_lock(&bp->stats_lock);
-       bp->stats_state = STATS_STATE_DISABLED;
-       mutex_unlock(&bp->stats_lock);
+       if (!down_timeout(&bp->stats_lock, HZ / 10)) {
+               bp->stats_state = STATS_STATE_DISABLED;
+               up(&bp->stats_lock);
+       }
 
        bnx2x_save_statistics(bp);
 
index 266b055..69d699f 100644 (file)
@@ -1372,19 +1372,23 @@ void bnx2x_stats_handle(struct bnx2x *bp, enum bnx2x_stats_event event)
         * that context in case someone is in the middle of a transition.
         * For other events, wait a bit until lock is taken.
         */
-       if (!mutex_trylock(&bp->stats_lock)) {
+       if (down_trylock(&bp->stats_lock)) {
                if (event == STATS_EVENT_UPDATE)
                        return;
 
                DP(BNX2X_MSG_STATS,
                   "Unlikely stats' lock contention [event %d]\n", event);
-               mutex_lock(&bp->stats_lock);
+               if (unlikely(down_timeout(&bp->stats_lock, HZ / 10))) {
+                       BNX2X_ERR("Failed to take stats lock [event %d]\n",
+                                 event);
+                       return;
+               }
        }
 
        bnx2x_stats_stm[state][event].action(bp);
        bp->stats_state = bnx2x_stats_stm[state][event].next_state;
 
-       mutex_unlock(&bp->stats_lock);
+       up(&bp->stats_lock);
 
        if ((event != STATS_EVENT_UPDATE) || netif_msg_timer(bp))
                DP(BNX2X_MSG_STATS, "state %d -> event %d -> state %d\n",
@@ -1970,7 +1974,11 @@ int bnx2x_stats_safe_exec(struct bnx2x *bp,
        /* Wait for statistics to end [while blocking further requests],
         * then run supplied function 'safely'.
         */
-       mutex_lock(&bp->stats_lock);
+       rc = down_timeout(&bp->stats_lock, HZ / 10);
+       if (unlikely(rc)) {
+               BNX2X_ERR("Failed to take statistics lock for safe execution\n");
+               goto out_no_lock;
+       }
 
        bnx2x_stats_comp(bp);
        while (bp->stats_pending && cnt--)
@@ -1988,7 +1996,7 @@ out:
        /* No need to restart statistics - if they're enabled, the timer
         * will restart the statistics.
         */
-       mutex_unlock(&bp->stats_lock);
-
+       up(&bp->stats_lock);
+out_no_lock:
        return rc;
 }
index e7651b3..420949c 100644 (file)
@@ -299,9 +299,6 @@ int bcmgenet_mii_config(struct net_device *dev, bool init)
                        phy_name = "external RGMII (no delay)";
                else
                        phy_name = "external RGMII (TX delay)";
-               reg = bcmgenet_ext_readl(priv, EXT_RGMII_OOB_CTRL);
-               reg |= RGMII_MODE_EN | id_mode_dis;
-               bcmgenet_ext_writel(priv, reg, EXT_RGMII_OOB_CTRL);
                bcmgenet_sys_writel(priv,
                                    PORT_MODE_EXT_GPHY, SYS_PORT_CTRL);
                break;
@@ -310,6 +307,15 @@ int bcmgenet_mii_config(struct net_device *dev, bool init)
                return -EINVAL;
        }
 
+       /* This is an external PHY (xMII), so we need to enable the RGMII
+        * block for the interface to work
+        */
+       if (priv->ext_phy) {
+               reg = bcmgenet_ext_readl(priv, EXT_RGMII_OOB_CTRL);
+               reg |= RGMII_MODE_EN | id_mode_dis;
+               bcmgenet_ext_writel(priv, reg, EXT_RGMII_OOB_CTRL);
+       }
+
        if (init)
                dev_info(kdev, "configuring instance for %s\n", phy_name);
 
index 594a2ab..68f3c13 100644 (file)
@@ -2414,7 +2414,7 @@ bfa_ioc_boot(struct bfa_ioc *ioc, enum bfi_fwboot_type boot_type,
        if (status == BFA_STATUS_OK)
                bfa_ioc_lpu_start(ioc);
        else
-               bfa_nw_iocpf_timeout(ioc);
+               bfa_fsm_send_event(&ioc->iocpf, IOCPF_E_TIMEOUT);
 
        return status;
 }
@@ -3029,7 +3029,7 @@ bfa_ioc_poll_fwinit(struct bfa_ioc *ioc)
        }
 
        if (ioc->iocpf.poll_time >= BFA_IOC_TOV) {
-               bfa_nw_iocpf_timeout(ioc);
+               bfa_fsm_send_event(&ioc->iocpf, IOCPF_E_TIMEOUT);
        } else {
                ioc->iocpf.poll_time += BFA_IOC_POLL_TOV;
                mod_timer(&ioc->iocpf_timer, jiffies +
index 37072a8..caae6cb 100644 (file)
@@ -3701,10 +3701,6 @@ bnad_pci_probe(struct pci_dev *pdev,
        setup_timer(&bnad->bna.ioceth.ioc.sem_timer, bnad_iocpf_sem_timeout,
                                ((unsigned long)bnad));
 
-       /* Now start the timer before calling IOC */
-       mod_timer(&bnad->bna.ioceth.ioc.iocpf_timer,
-                 jiffies + msecs_to_jiffies(BNA_IOC_TIMER_FREQ));
-
        /*
         * Start the chip
         * If the call back comes with error, we bail out.
index ebf462d..badea36 100644 (file)
@@ -30,6 +30,7 @@ cna_read_firmware(struct pci_dev *pdev, u32 **bfi_image,
                        u32 *bfi_image_size, char *fw_name)
 {
        const struct firmware *fw;
+       u32 n;
 
        if (request_firmware(&fw, fw_name, &pdev->dev)) {
                pr_alert("Can't locate firmware %s\n", fw_name);
@@ -40,6 +41,12 @@ cna_read_firmware(struct pci_dev *pdev, u32 **bfi_image,
        *bfi_image_size = fw->size/sizeof(u32);
        bfi_fw = fw;
 
+       /* Convert loaded firmware to host order as it is stored in file
+        * as sequence of LE32 integers.
+        */
+       for (n = 0; n < *bfi_image_size; n++)
+               le32_to_cpus(*bfi_image + n);
+
        return *bfi_image;
 error:
        return NULL;
index 4104d49..fc646a4 100644 (file)
@@ -350,6 +350,9 @@ static int macb_mii_probe(struct net_device *dev)
        else
                phydev->supported &= PHY_BASIC_FEATURES;
 
+       if (bp->caps & MACB_CAPS_NO_GIGABIT_HALF)
+               phydev->supported &= ~SUPPORTED_1000baseT_Half;
+
        phydev->advertising = phydev->supported;
 
        bp->link = 0;
@@ -981,7 +984,7 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id)
        struct macb_queue *queue = dev_id;
        struct macb *bp = queue->bp;
        struct net_device *dev = bp->dev;
-       u32 status;
+       u32 status, ctrl;
 
        status = queue_readl(queue, ISR);
 
@@ -1037,6 +1040,21 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id)
                 * add that if/when we get our hands on a full-blown MII PHY.
                 */
 
+               /* There is a hardware issue under heavy load where DMA can
+                * stop, this causes endless "used buffer descriptor read"
+                * interrupts but it can be cleared by re-enabling RX. See
+                * the at91 manual, section 41.3.1 or the Zynq manual
+                * section 16.7.4 for details.
+                */
+               if (status & MACB_BIT(RXUBR)) {
+                       ctrl = macb_readl(bp, NCR);
+                       macb_writel(bp, NCR, ctrl & ~MACB_BIT(RE));
+                       macb_writel(bp, NCR, ctrl | MACB_BIT(RE));
+
+                       if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
+                               macb_writel(bp, ISR, MACB_BIT(RXUBR));
+               }
+
                if (status & MACB_BIT(ISR_ROVR)) {
                        /* We missed at least one packet */
                        if (macb_is_gem(bp))
@@ -2684,6 +2702,14 @@ static const struct macb_config emac_config = {
        .init = at91ether_init,
 };
 
+static const struct macb_config zynq_config = {
+       .caps = MACB_CAPS_SG_DISABLED | MACB_CAPS_GIGABIT_MODE_AVAILABLE |
+               MACB_CAPS_NO_GIGABIT_HALF,
+       .dma_burst_length = 16,
+       .clk_init = macb_clk_init,
+       .init = macb_init,
+};
+
 static const struct of_device_id macb_dt_ids[] = {
        { .compatible = "cdns,at32ap7000-macb" },
        { .compatible = "cdns,at91sam9260-macb", .data = &at91sam9260_config },
@@ -2694,6 +2720,7 @@ static const struct of_device_id macb_dt_ids[] = {
        { .compatible = "atmel,sama5d4-gem", .data = &sama5d4_config },
        { .compatible = "cdns,at91rm9200-emac", .data = &emac_config },
        { .compatible = "cdns,emac", .data = &emac_config },
+       { .compatible = "cdns,zynq-gem", .data = &zynq_config },
        { /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, macb_dt_ids);
index eb7d76f..24b1d9b 100644 (file)
 #define MACB_CAPS_ISR_CLEAR_ON_WRITE           0x00000001
 #define MACB_CAPS_USRIO_HAS_CLKEN              0x00000002
 #define MACB_CAPS_USRIO_DEFAULT_IS_MII         0x00000004
+#define MACB_CAPS_NO_GIGABIT_HALF              0x00000008
 #define MACB_CAPS_FIFO_MODE                    0x10000000
 #define MACB_CAPS_GIGABIT_MODE_AVAILABLE       0x20000000
 #define MACB_CAPS_SG_DISABLED                  0x40000000
index 28d9ca6..68d47b1 100644 (file)
@@ -131,8 +131,15 @@ static void enic_get_drvinfo(struct net_device *netdev,
 {
        struct enic *enic = netdev_priv(netdev);
        struct vnic_devcmd_fw_info *fw_info;
+       int err;
 
-       enic_dev_fw_info(enic, &fw_info);
+       err = enic_dev_fw_info(enic, &fw_info);
+       /* return only when pci_zalloc_consistent fails in vnic_dev_fw_info
+        * For other failures, like devcmd failure, we return previously
+        * recorded info.
+        */
+       if (err == -ENOMEM)
+               return;
 
        strlcpy(drvinfo->driver, DRV_NAME, sizeof(drvinfo->driver));
        strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version));
@@ -181,8 +188,15 @@ static void enic_get_ethtool_stats(struct net_device *netdev,
        struct enic *enic = netdev_priv(netdev);
        struct vnic_stats *vstats;
        unsigned int i;
-
-       enic_dev_stats_dump(enic, &vstats);
+       int err;
+
+       err = enic_dev_stats_dump(enic, &vstats);
+       /* return only when pci_zalloc_consistent fails in vnic_dev_stats_dump
+        * For other failures, like devcmd failure, we return previously
+        * recorded stats.
+        */
+       if (err == -ENOMEM)
+               return;
 
        for (i = 0; i < enic_n_tx_stats; i++)
                *(data++) = ((u64 *)&vstats->tx)[enic_tx_stats[i].index];
index 204bd18..eadae1b 100644 (file)
@@ -615,8 +615,15 @@ static struct rtnl_link_stats64 *enic_get_stats(struct net_device *netdev,
 {
        struct enic *enic = netdev_priv(netdev);
        struct vnic_stats *stats;
+       int err;
 
-       enic_dev_stats_dump(enic, &stats);
+       err = enic_dev_stats_dump(enic, &stats);
+       /* return only when pci_zalloc_consistent fails in vnic_dev_stats_dump
+        * For other failures, like devcmd failure, we return previously
+        * recorded stats.
+        */
+       if (err == -ENOMEM)
+               return net_stats;
 
        net_stats->tx_packets = stats->tx.tx_frames_ok;
        net_stats->tx_bytes = stats->tx.tx_bytes_ok;
@@ -1407,6 +1414,7 @@ static int enic_poll_msix_rq(struct napi_struct *napi, int budget)
                 */
                enic_calc_int_moderation(enic, &enic->rq[rq]);
 
+       enic_poll_unlock_napi(&enic->rq[rq]);
        if (work_done < work_to_do) {
 
                /* Some work done, but not enough to stay in polling,
@@ -1418,7 +1426,6 @@ static int enic_poll_msix_rq(struct napi_struct *napi, int budget)
                        enic_set_int_moderation(enic, &enic->rq[rq]);
                vnic_intr_unmask(&enic->intr[intr]);
        }
-       enic_poll_unlock_napi(&enic->rq[rq]);
 
        return work_done;
 }
index 36a2ed6..c4b2183 100644 (file)
@@ -188,16 +188,15 @@ void vnic_rq_clean(struct vnic_rq *rq,
        struct vnic_rq_buf *buf;
        u32 fetch_index;
        unsigned int count = rq->ring.desc_count;
+       int i;
 
        buf = rq->to_clean;
 
-       while (vnic_rq_desc_used(rq) > 0) {
-
+       for (i = 0; i < rq->ring.desc_count; i++) {
                (*buf_clean)(rq, buf);
-
-               buf = rq->to_clean = buf->next;
-               rq->ring.desc_avail++;
+               buf = buf->next;
        }
+       rq->ring.desc_avail = rq->ring.desc_count - 1;
 
        /* Use current fetch_index as the ring starting point */
        fetch_index = ioread32(&rq->ctrl->fetch_index);
index fb140fa..c5e1d0a 100644 (file)
@@ -1720,9 +1720,9 @@ int be_cmd_get_regs(struct be_adapter *adapter, u32 buf_len, void *buf)
        total_size = buf_len;
 
        get_fat_cmd.size = sizeof(struct be_cmd_req_get_fat) + 60*1024;
-       get_fat_cmd.va = pci_alloc_consistent(adapter->pdev,
-                                             get_fat_cmd.size,
-                                             &get_fat_cmd.dma);
+       get_fat_cmd.va = dma_zalloc_coherent(&adapter->pdev->dev,
+                                            get_fat_cmd.size,
+                                            &get_fat_cmd.dma, GFP_ATOMIC);
        if (!get_fat_cmd.va) {
                dev_err(&adapter->pdev->dev,
                        "Memory allocation failure while reading FAT data\n");
@@ -1767,8 +1767,8 @@ int be_cmd_get_regs(struct be_adapter *adapter, u32 buf_len, void *buf)
                log_offset += buf_size;
        }
 err:
-       pci_free_consistent(adapter->pdev, get_fat_cmd.size,
-                           get_fat_cmd.va, get_fat_cmd.dma);
+       dma_free_coherent(&adapter->pdev->dev, get_fat_cmd.size,
+                         get_fat_cmd.va, get_fat_cmd.dma);
        spin_unlock_bh(&adapter->mcc_lock);
        return status;
 }
@@ -2215,12 +2215,12 @@ int be_cmd_read_port_transceiver_data(struct be_adapter *adapter,
                return -EINVAL;
 
        cmd.size = sizeof(struct be_cmd_resp_port_type);
-       cmd.va = pci_alloc_consistent(adapter->pdev, cmd.size, &cmd.dma);
+       cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma,
+                                    GFP_ATOMIC);
        if (!cmd.va) {
                dev_err(&adapter->pdev->dev, "Memory allocation failed\n");
                return -ENOMEM;
        }
-       memset(cmd.va, 0, cmd.size);
 
        spin_lock_bh(&adapter->mcc_lock);
 
@@ -2245,7 +2245,7 @@ int be_cmd_read_port_transceiver_data(struct be_adapter *adapter,
        }
 err:
        spin_unlock_bh(&adapter->mcc_lock);
-       pci_free_consistent(adapter->pdev, cmd.size, cmd.va, cmd.dma);
+       dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va, cmd.dma);
        return status;
 }
 
@@ -2720,7 +2720,8 @@ int be_cmd_get_phy_info(struct be_adapter *adapter)
                goto err;
        }
        cmd.size = sizeof(struct be_cmd_req_get_phy_info);
-       cmd.va = pci_alloc_consistent(adapter->pdev, cmd.size, &cmd.dma);
+       cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma,
+                                    GFP_ATOMIC);
        if (!cmd.va) {
                dev_err(&adapter->pdev->dev, "Memory alloc failure\n");
                status = -ENOMEM;
@@ -2754,7 +2755,7 @@ int be_cmd_get_phy_info(struct be_adapter *adapter)
                                BE_SUPPORTED_SPEED_1GBPS;
                }
        }
-       pci_free_consistent(adapter->pdev, cmd.size, cmd.va, cmd.dma);
+       dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va, cmd.dma);
 err:
        spin_unlock_bh(&adapter->mcc_lock);
        return status;
@@ -2805,8 +2806,9 @@ int be_cmd_get_cntl_attributes(struct be_adapter *adapter)
 
        memset(&attribs_cmd, 0, sizeof(struct be_dma_mem));
        attribs_cmd.size = sizeof(struct be_cmd_resp_cntl_attribs);
-       attribs_cmd.va = pci_alloc_consistent(adapter->pdev, attribs_cmd.size,
-                                             &attribs_cmd.dma);
+       attribs_cmd.va = dma_zalloc_coherent(&adapter->pdev->dev,
+                                            attribs_cmd.size,
+                                            &attribs_cmd.dma, GFP_ATOMIC);
        if (!attribs_cmd.va) {
                dev_err(&adapter->pdev->dev, "Memory allocation failure\n");
                status = -ENOMEM;
@@ -2833,8 +2835,8 @@ int be_cmd_get_cntl_attributes(struct be_adapter *adapter)
 err:
        mutex_unlock(&adapter->mbox_lock);
        if (attribs_cmd.va)
-               pci_free_consistent(adapter->pdev, attribs_cmd.size,
-                                   attribs_cmd.va, attribs_cmd.dma);
+               dma_free_coherent(&adapter->pdev->dev, attribs_cmd.size,
+                                 attribs_cmd.va, attribs_cmd.dma);
        return status;
 }
 
@@ -2972,9 +2974,10 @@ int be_cmd_get_mac_from_list(struct be_adapter *adapter, u8 *mac,
 
        memset(&get_mac_list_cmd, 0, sizeof(struct be_dma_mem));
        get_mac_list_cmd.size = sizeof(struct be_cmd_resp_get_mac_list);
-       get_mac_list_cmd.va = pci_alloc_consistent(adapter->pdev,
-                                                  get_mac_list_cmd.size,
-                                                  &get_mac_list_cmd.dma);
+       get_mac_list_cmd.va = dma_zalloc_coherent(&adapter->pdev->dev,
+                                                 get_mac_list_cmd.size,
+                                                 &get_mac_list_cmd.dma,
+                                                 GFP_ATOMIC);
 
        if (!get_mac_list_cmd.va) {
                dev_err(&adapter->pdev->dev,
@@ -3047,8 +3050,8 @@ int be_cmd_get_mac_from_list(struct be_adapter *adapter, u8 *mac,
 
 out:
        spin_unlock_bh(&adapter->mcc_lock);
-       pci_free_consistent(adapter->pdev, get_mac_list_cmd.size,
-                           get_mac_list_cmd.va, get_mac_list_cmd.dma);
+       dma_free_coherent(&adapter->pdev->dev, get_mac_list_cmd.size,
+                         get_mac_list_cmd.va, get_mac_list_cmd.dma);
        return status;
 }
 
@@ -3101,8 +3104,8 @@ int be_cmd_set_mac_list(struct be_adapter *adapter, u8 *mac_array,
 
        memset(&cmd, 0, sizeof(struct be_dma_mem));
        cmd.size = sizeof(struct be_cmd_req_set_mac_list);
-       cmd.va = dma_alloc_coherent(&adapter->pdev->dev, cmd.size,
-                                   &cmd.dma, GFP_KERNEL);
+       cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma,
+                                    GFP_KERNEL);
        if (!cmd.va)
                return -ENOMEM;
 
@@ -3291,7 +3294,8 @@ int be_cmd_get_acpi_wol_cap(struct be_adapter *adapter)
 
        memset(&cmd, 0, sizeof(struct be_dma_mem));
        cmd.size = sizeof(struct be_cmd_resp_acpi_wol_magic_config_v1);
-       cmd.va = pci_alloc_consistent(adapter->pdev, cmd.size, &cmd.dma);
+       cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma,
+                                    GFP_ATOMIC);
        if (!cmd.va) {
                dev_err(&adapter->pdev->dev, "Memory allocation failure\n");
                status = -ENOMEM;
@@ -3326,7 +3330,8 @@ int be_cmd_get_acpi_wol_cap(struct be_adapter *adapter)
 err:
        mutex_unlock(&adapter->mbox_lock);
        if (cmd.va)
-               pci_free_consistent(adapter->pdev, cmd.size, cmd.va, cmd.dma);
+               dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va,
+                                 cmd.dma);
        return status;
 
 }
@@ -3340,8 +3345,9 @@ int be_cmd_set_fw_log_level(struct be_adapter *adapter, u32 level)
 
        memset(&extfat_cmd, 0, sizeof(struct be_dma_mem));
        extfat_cmd.size = sizeof(struct be_cmd_resp_get_ext_fat_caps);
-       extfat_cmd.va = pci_alloc_consistent(adapter->pdev, extfat_cmd.size,
-                                            &extfat_cmd.dma);
+       extfat_cmd.va = dma_zalloc_coherent(&adapter->pdev->dev,
+                                           extfat_cmd.size, &extfat_cmd.dma,
+                                           GFP_ATOMIC);
        if (!extfat_cmd.va)
                return -ENOMEM;
 
@@ -3363,8 +3369,8 @@ int be_cmd_set_fw_log_level(struct be_adapter *adapter, u32 level)
 
        status = be_cmd_set_ext_fat_capabilites(adapter, &extfat_cmd, cfgs);
 err:
-       pci_free_consistent(adapter->pdev, extfat_cmd.size, extfat_cmd.va,
-                           extfat_cmd.dma);
+       dma_free_coherent(&adapter->pdev->dev, extfat_cmd.size, extfat_cmd.va,
+                         extfat_cmd.dma);
        return status;
 }
 
@@ -3377,8 +3383,9 @@ int be_cmd_get_fw_log_level(struct be_adapter *adapter)
 
        memset(&extfat_cmd, 0, sizeof(struct be_dma_mem));
        extfat_cmd.size = sizeof(struct be_cmd_resp_get_ext_fat_caps);
-       extfat_cmd.va = pci_alloc_consistent(adapter->pdev, extfat_cmd.size,
-                                            &extfat_cmd.dma);
+       extfat_cmd.va = dma_zalloc_coherent(&adapter->pdev->dev,
+                                           extfat_cmd.size, &extfat_cmd.dma,
+                                           GFP_ATOMIC);
 
        if (!extfat_cmd.va) {
                dev_err(&adapter->pdev->dev, "%s: Memory allocation failure\n",
@@ -3396,8 +3403,8 @@ int be_cmd_get_fw_log_level(struct be_adapter *adapter)
                                level = cfgs->module[0].trace_lvl[j].dbg_lvl;
                }
        }
-       pci_free_consistent(adapter->pdev, extfat_cmd.size, extfat_cmd.va,
-                           extfat_cmd.dma);
+       dma_free_coherent(&adapter->pdev->dev, extfat_cmd.size, extfat_cmd.va,
+                         extfat_cmd.dma);
 err:
        return level;
 }
@@ -3595,7 +3602,8 @@ int be_cmd_get_func_config(struct be_adapter *adapter, struct be_resources *res)
 
        memset(&cmd, 0, sizeof(struct be_dma_mem));
        cmd.size = sizeof(struct be_cmd_resp_get_func_config);
-       cmd.va = pci_alloc_consistent(adapter->pdev, cmd.size, &cmd.dma);
+       cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma,
+                                    GFP_ATOMIC);
        if (!cmd.va) {
                dev_err(&adapter->pdev->dev, "Memory alloc failure\n");
                status = -ENOMEM;
@@ -3635,7 +3643,8 @@ int be_cmd_get_func_config(struct be_adapter *adapter, struct be_resources *res)
 err:
        mutex_unlock(&adapter->mbox_lock);
        if (cmd.va)
-               pci_free_consistent(adapter->pdev, cmd.size, cmd.va, cmd.dma);
+               dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va,
+                                 cmd.dma);
        return status;
 }
 
@@ -3656,7 +3665,8 @@ int be_cmd_get_profile_config(struct be_adapter *adapter,
 
        memset(&cmd, 0, sizeof(struct be_dma_mem));
        cmd.size = sizeof(struct be_cmd_resp_get_profile_config);
-       cmd.va = pci_alloc_consistent(adapter->pdev, cmd.size, &cmd.dma);
+       cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma,
+                                    GFP_ATOMIC);
        if (!cmd.va)
                return -ENOMEM;
 
@@ -3702,7 +3712,8 @@ int be_cmd_get_profile_config(struct be_adapter *adapter,
                res->vf_if_cap_flags = vf_res->cap_flags;
 err:
        if (cmd.va)
-               pci_free_consistent(adapter->pdev, cmd.size, cmd.va, cmd.dma);
+               dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va,
+                                 cmd.dma);
        return status;
 }
 
@@ -3717,7 +3728,8 @@ static int be_cmd_set_profile_config(struct be_adapter *adapter, void *desc,
 
        memset(&cmd, 0, sizeof(struct be_dma_mem));
        cmd.size = sizeof(struct be_cmd_req_set_profile_config);
-       cmd.va = pci_alloc_consistent(adapter->pdev, cmd.size, &cmd.dma);
+       cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma,
+                                    GFP_ATOMIC);
        if (!cmd.va)
                return -ENOMEM;
 
@@ -3733,7 +3745,8 @@ static int be_cmd_set_profile_config(struct be_adapter *adapter, void *desc,
        status = be_cmd_notify_wait(adapter, &wrb);
 
        if (cmd.va)
-               pci_free_consistent(adapter->pdev, cmd.size, cmd.va, cmd.dma);
+               dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va,
+                                 cmd.dma);
        return status;
 }
 
index b765c24..2835dee 100644 (file)
@@ -264,8 +264,8 @@ static int lancer_cmd_read_file(struct be_adapter *adapter, u8 *file_name,
        int status = 0;
 
        read_cmd.size = LANCER_READ_FILE_CHUNK;
-       read_cmd.va = pci_alloc_consistent(adapter->pdev, read_cmd.size,
-                                          &read_cmd.dma);
+       read_cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, read_cmd.size,
+                                         &read_cmd.dma, GFP_ATOMIC);
 
        if (!read_cmd.va) {
                dev_err(&adapter->pdev->dev,
@@ -289,8 +289,8 @@ static int lancer_cmd_read_file(struct be_adapter *adapter, u8 *file_name,
                        break;
                }
        }
-       pci_free_consistent(adapter->pdev, read_cmd.size, read_cmd.va,
-                           read_cmd.dma);
+       dma_free_coherent(&adapter->pdev->dev, read_cmd.size, read_cmd.va,
+                         read_cmd.dma);
 
        return status;
 }
@@ -818,8 +818,9 @@ static int be_test_ddr_dma(struct be_adapter *adapter)
        };
 
        ddrdma_cmd.size = sizeof(struct be_cmd_req_ddrdma_test);
-       ddrdma_cmd.va = dma_alloc_coherent(&adapter->pdev->dev, ddrdma_cmd.size,
-                                          &ddrdma_cmd.dma, GFP_KERNEL);
+       ddrdma_cmd.va = dma_zalloc_coherent(&adapter->pdev->dev,
+                                           ddrdma_cmd.size, &ddrdma_cmd.dma,
+                                           GFP_KERNEL);
        if (!ddrdma_cmd.va)
                return -ENOMEM;
 
@@ -941,8 +942,9 @@ static int be_read_eeprom(struct net_device *netdev,
 
        memset(&eeprom_cmd, 0, sizeof(struct be_dma_mem));
        eeprom_cmd.size = sizeof(struct be_cmd_req_seeprom_read);
-       eeprom_cmd.va = dma_alloc_coherent(&adapter->pdev->dev, eeprom_cmd.size,
-                                          &eeprom_cmd.dma, GFP_KERNEL);
+       eeprom_cmd.va = dma_zalloc_coherent(&adapter->pdev->dev,
+                                           eeprom_cmd.size, &eeprom_cmd.dma,
+                                           GFP_KERNEL);
 
        if (!eeprom_cmd.va)
                return -ENOMEM;
index a6dcbf8..e43cc8a 100644 (file)
@@ -2358,11 +2358,11 @@ static int be_evt_queues_create(struct be_adapter *adapter)
                                    adapter->cfg_num_qs);
 
        for_all_evt_queues(adapter, eqo, i) {
+               int numa_node = dev_to_node(&adapter->pdev->dev);
                if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
                        return -ENOMEM;
-               cpumask_set_cpu_local_first(i, dev_to_node(&adapter->pdev->dev),
-                                           eqo->affinity_mask);
-
+               cpumask_set_cpu(cpumask_local_spread(i, numa_node),
+                               eqo->affinity_mask);
                netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
                               BE_NAPI_WEIGHT);
                napi_hash_add(&eqo->napi);
@@ -4605,8 +4605,8 @@ static int lancer_fw_download(struct be_adapter *adapter,
 
        flash_cmd.size = sizeof(struct lancer_cmd_req_write_object)
                                + LANCER_FW_DOWNLOAD_CHUNK;
-       flash_cmd.va = dma_alloc_coherent(dev, flash_cmd.size,
-                                         &flash_cmd.dma, GFP_KERNEL);
+       flash_cmd.va = dma_zalloc_coherent(dev, flash_cmd.size,
+                                          &flash_cmd.dma, GFP_KERNEL);
        if (!flash_cmd.va)
                return -ENOMEM;
 
@@ -4739,8 +4739,8 @@ static int be_fw_download(struct be_adapter *adapter, const struct firmware* fw)
        }
 
        flash_cmd.size = sizeof(struct be_cmd_write_flashrom);
-       flash_cmd.va = dma_alloc_coherent(dev, flash_cmd.size, &flash_cmd.dma,
-                                         GFP_KERNEL);
+       flash_cmd.va = dma_zalloc_coherent(dev, flash_cmd.size, &flash_cmd.dma,
+                                          GFP_KERNEL);
        if (!flash_cmd.va)
                return -ENOMEM;
 
@@ -5291,16 +5291,15 @@ static int be_drv_init(struct be_adapter *adapter)
        int status = 0;
 
        mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
-       mbox_mem_alloc->va = dma_alloc_coherent(dev, mbox_mem_alloc->size,
-                                               &mbox_mem_alloc->dma,
-                                               GFP_KERNEL);
+       mbox_mem_alloc->va = dma_zalloc_coherent(dev, mbox_mem_alloc->size,
+                                                &mbox_mem_alloc->dma,
+                                                GFP_KERNEL);
        if (!mbox_mem_alloc->va)
                return -ENOMEM;
 
        mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
        mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
        mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
-       memset(mbox_mem_align->va, 0, sizeof(struct be_mcc_mailbox));
 
        rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
        rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size,
index de79193..b9df0cb 100644 (file)
@@ -2084,12 +2084,8 @@ static void emac_ethtool_get_pauseparam(struct net_device *ndev,
 
 static int emac_get_regs_len(struct emac_instance *dev)
 {
-       if (emac_has_feature(dev, EMAC_FTR_EMAC4))
-               return sizeof(struct emac_ethtool_regs_subhdr) +
-                       EMAC4_ETHTOOL_REGS_SIZE(dev);
-       else
                return sizeof(struct emac_ethtool_regs_subhdr) +
-                       EMAC_ETHTOOL_REGS_SIZE(dev);
+                       sizeof(struct emac_regs);
 }
 
 static int emac_ethtool_get_regs_len(struct net_device *ndev)
@@ -2114,15 +2110,15 @@ static void *emac_dump_regs(struct emac_instance *dev, void *buf)
        struct emac_ethtool_regs_subhdr *hdr = buf;
 
        hdr->index = dev->cell_index;
-       if (emac_has_feature(dev, EMAC_FTR_EMAC4)) {
+       if (emac_has_feature(dev, EMAC_FTR_EMAC4SYNC)) {
+               hdr->version = EMAC4SYNC_ETHTOOL_REGS_VER;
+       } else if (emac_has_feature(dev, EMAC_FTR_EMAC4)) {
                hdr->version = EMAC4_ETHTOOL_REGS_VER;
-               memcpy_fromio(hdr + 1, dev->emacp, EMAC4_ETHTOOL_REGS_SIZE(dev));
-               return (void *)(hdr + 1) + EMAC4_ETHTOOL_REGS_SIZE(dev);
        } else {
                hdr->version = EMAC_ETHTOOL_REGS_VER;
-               memcpy_fromio(hdr + 1, dev->emacp, EMAC_ETHTOOL_REGS_SIZE(dev));
-               return (void *)(hdr + 1) + EMAC_ETHTOOL_REGS_SIZE(dev);
        }
+       memcpy_fromio(hdr + 1, dev->emacp, sizeof(struct emac_regs));
+       return (void *)(hdr + 1) + sizeof(struct emac_regs);
 }
 
 static void emac_ethtool_get_regs(struct net_device *ndev,
index 67f342a..28df374 100644 (file)
@@ -461,10 +461,7 @@ struct emac_ethtool_regs_subhdr {
 };
 
 #define EMAC_ETHTOOL_REGS_VER          0
-#define EMAC_ETHTOOL_REGS_SIZE(dev)    ((dev)->rsrc_regs.end - \
-                                        (dev)->rsrc_regs.start + 1)
-#define EMAC4_ETHTOOL_REGS_VER         1
-#define EMAC4_ETHTOOL_REGS_SIZE(dev)   ((dev)->rsrc_regs.end - \
-                                        (dev)->rsrc_regs.start + 1)
+#define EMAC4_ETHTOOL_REGS_VER         1
+#define EMAC4SYNC_ETHTOOL_REGS_VER     2
 
 #endif /* __IBM_NEWEMAC_CORE_H */
index 5d9ceb1..0abc942 100644 (file)
@@ -40,6 +40,7 @@
 #include <linux/ptp_classify.h>
 #include <linux/mii.h>
 #include <linux/mdio.h>
+#include <linux/pm_qos.h>
 #include "hw.h"
 
 struct e1000_info;
index 1b0661e..c754b20 100644 (file)
@@ -610,7 +610,7 @@ static bool fm10k_clean_rx_irq(struct fm10k_q_vector *q_vector,
        unsigned int total_bytes = 0, total_packets = 0;
        u16 cleaned_count = fm10k_desc_unused(rx_ring);
 
-       do {
+       while (likely(total_packets < budget)) {
                union fm10k_rx_desc *rx_desc;
 
                /* return some buffers to hardware, one at a time is too slow */
@@ -659,7 +659,7 @@ static bool fm10k_clean_rx_irq(struct fm10k_q_vector *q_vector,
 
                /* update budget accounting */
                total_packets++;
-       } while (likely(total_packets < budget));
+       }
 
        /* place incomplete frames back on ring for completion */
        rx_ring->skb = skb;
index 33c35d3..5d47307 100644 (file)
@@ -317,6 +317,7 @@ struct i40e_pf {
 #endif
 #define I40E_FLAG_PORT_ID_VALID                (u64)(1 << 28)
 #define I40E_FLAG_DCB_CAPABLE                  (u64)(1 << 29)
+#define I40E_FLAG_VEB_MODE_ENABLED             BIT_ULL(40)
 
        /* tracks features that get auto disabled by errors */
        u64 auto_disable_flags;
index 34170ea..da0faf4 100644 (file)
@@ -1021,6 +1021,15 @@ static ssize_t i40e_dbg_command_write(struct file *filp,
                        goto command_write_done;
                }
 
+               /* By default we are in VEPA mode, if this is the first VF/VMDq
+                * VSI to be added switch to VEB mode.
+                */
+               if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) {
+                       pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
+                       i40e_do_reset_safe(pf,
+                                          BIT_ULL(__I40E_PF_RESET_REQUESTED));
+               }
+
                vsi = i40e_vsi_setup(pf, I40E_VSI_VMDQ2, vsi_seid, 0);
                if (vsi)
                        dev_info(&pf->pdev->dev, "added VSI %d to relay %d\n",
index a54c144..5b5bea1 100644 (file)
@@ -6097,6 +6097,10 @@ static int i40e_reconstitute_veb(struct i40e_veb *veb)
        if (ret)
                goto end_reconstitute;
 
+       if (pf->flags & I40E_FLAG_VEB_MODE_ENABLED)
+               veb->bridge_mode = BRIDGE_MODE_VEB;
+       else
+               veb->bridge_mode = BRIDGE_MODE_VEPA;
        i40e_config_bridge_mode(veb);
 
        /* create the remaining VSIs attached to this VEB */
@@ -8031,7 +8035,12 @@ static int i40e_ndo_bridge_setlink(struct net_device *dev,
                } else if (mode != veb->bridge_mode) {
                        /* Existing HW bridge but different mode needs reset */
                        veb->bridge_mode = mode;
-                       i40e_do_reset(pf, (1 << __I40E_PF_RESET_REQUESTED));
+                       /* TODO: If no VFs or VMDq VSIs, disallow VEB mode */
+                       if (mode == BRIDGE_MODE_VEB)
+                               pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
+                       else
+                               pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED;
+                       i40e_do_reset(pf, BIT_ULL(__I40E_PF_RESET_REQUESTED));
                        break;
                }
        }
@@ -8343,11 +8352,12 @@ static int i40e_add_vsi(struct i40e_vsi *vsi)
                ctxt.uplink_seid = vsi->uplink_seid;
                ctxt.connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL;
                ctxt.flags = I40E_AQ_VSI_TYPE_PF;
-               if (i40e_is_vsi_uplink_mode_veb(vsi)) {
+               if ((pf->flags & I40E_FLAG_VEB_MODE_ENABLED) &&
+                   (i40e_is_vsi_uplink_mode_veb(vsi))) {
                        ctxt.info.valid_sections |=
-                               cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
+                            cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
                        ctxt.info.switch_id =
-                               cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB);
+                          cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB);
                }
                i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, true);
                break;
@@ -8746,6 +8756,14 @@ struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type,
                                         __func__);
                                return NULL;
                        }
+                       /* We come up by default in VEPA mode if SRIOV is not
+                        * already enabled, in which case we can't force VEPA
+                        * mode.
+                        */
+                       if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) {
+                               veb->bridge_mode = BRIDGE_MODE_VEPA;
+                               pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED;
+                       }
                        i40e_config_bridge_mode(veb);
                }
                for (i = 0; i < I40E_MAX_VEB && !veb; i++) {
@@ -9856,6 +9874,15 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                goto err_switch_setup;
        }
 
+#ifdef CONFIG_PCI_IOV
+       /* prep for VF support */
+       if ((pf->flags & I40E_FLAG_SRIOV_ENABLED) &&
+           (pf->flags & I40E_FLAG_MSIX_ENABLED) &&
+           !test_bit(__I40E_BAD_EEPROM, &pf->state)) {
+               if (pci_num_vf(pdev))
+                       pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
+       }
+#endif
        err = i40e_setup_pf_switch(pf, false);
        if (err) {
                dev_info(&pdev->dev, "setup_pf_switch failed: %d\n", err);
index 4bd3a80..9d95042 100644 (file)
@@ -2410,14 +2410,12 @@ static int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
  * i40e_chk_linearize - Check if there are more than 8 fragments per packet
  * @skb:      send buffer
  * @tx_flags: collected send information
- * @hdr_len:  size of the packet header
  *
  * Note: Our HW can't scatter-gather more than 8 fragments to build
  * a packet on the wire and so we need to figure out the cases where we
  * need to linearize the skb.
  **/
-static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags,
-                              const u8 hdr_len)
+static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags)
 {
        struct skb_frag_struct *frag;
        bool linearize = false;
@@ -2429,7 +2427,7 @@ static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags,
        gso_segs = skb_shinfo(skb)->gso_segs;
 
        if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) {
-               u16 j = 1;
+               u16 j = 0;
 
                if (num_frags < (I40E_MAX_BUFFER_TXD))
                        goto linearize_chk_done;
@@ -2440,21 +2438,18 @@ static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags,
                        goto linearize_chk_done;
                }
                frag = &skb_shinfo(skb)->frags[0];
-               size = hdr_len;
                /* we might still have more fragments per segment */
                do {
                        size += skb_frag_size(frag);
                        frag++; j++;
+                       if ((size >= skb_shinfo(skb)->gso_size) &&
+                           (j < I40E_MAX_BUFFER_TXD)) {
+                               size = (size % skb_shinfo(skb)->gso_size);
+                               j = (size) ? 1 : 0;
+                       }
                        if (j == I40E_MAX_BUFFER_TXD) {
-                               if (size < skb_shinfo(skb)->gso_size) {
-                                       linearize = true;
-                                       break;
-                               }
-                               j = 1;
-                               size -= skb_shinfo(skb)->gso_size;
-                               if (size)
-                                       j++;
-                               size += hdr_len;
+                               linearize = true;
+                               break;
                        }
                        num_frags--;
                } while (num_frags);
@@ -2724,7 +2719,7 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
        if (tsyn)
                tx_flags |= I40E_TX_FLAGS_TSYN;
 
-       if (i40e_chk_linearize(skb, tx_flags, hdr_len))
+       if (i40e_chk_linearize(skb, tx_flags))
                if (skb_linearize(skb))
                        goto out_drop;
 
index 78d1c4f..4e9376d 100644 (file)
@@ -1018,11 +1018,19 @@ int i40e_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
 {
        struct i40e_pf *pf = pci_get_drvdata(pdev);
 
-       if (num_vfs)
+       if (num_vfs) {
+               if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) {
+                       pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
+                       i40e_do_reset_safe(pf,
+                                          BIT_ULL(__I40E_PF_RESET_REQUESTED));
+               }
                return i40e_pci_sriov_enable(pdev, num_vfs);
+       }
 
        if (!pci_vfs_assigned(pf->pdev)) {
                i40e_free_vfs(pf);
+               pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED;
+               i40e_do_reset_safe(pf, BIT_ULL(__I40E_PF_RESET_REQUESTED));
        } else {
                dev_warn(&pdev->dev, "Unable to free VFs because some are assigned to VMs.\n");
                return -EINVAL;
index b077e02..458fbb4 100644 (file)
@@ -1619,14 +1619,12 @@ static void i40e_create_tx_ctx(struct i40e_ring *tx_ring,
  * i40e_chk_linearize - Check if there are more than 8 fragments per packet
  * @skb:      send buffer
  * @tx_flags: collected send information
- * @hdr_len:  size of the packet header
  *
  * Note: Our HW can't scatter-gather more than 8 fragments to build
  * a packet on the wire and so we need to figure out the cases where we
  * need to linearize the skb.
  **/
-static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags,
-                              const u8 hdr_len)
+static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags)
 {
        struct skb_frag_struct *frag;
        bool linearize = false;
@@ -1638,7 +1636,7 @@ static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags,
        gso_segs = skb_shinfo(skb)->gso_segs;
 
        if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) {
-               u16 j = 1;
+               u16 j = 0;
 
                if (num_frags < (I40E_MAX_BUFFER_TXD))
                        goto linearize_chk_done;
@@ -1649,21 +1647,18 @@ static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags,
                        goto linearize_chk_done;
                }
                frag = &skb_shinfo(skb)->frags[0];
-               size = hdr_len;
                /* we might still have more fragments per segment */
                do {
                        size += skb_frag_size(frag);
                        frag++; j++;
+                       if ((size >= skb_shinfo(skb)->gso_size) &&
+                           (j < I40E_MAX_BUFFER_TXD)) {
+                               size = (size % skb_shinfo(skb)->gso_size);
+                               j = (size) ? 1 : 0;
+                       }
                        if (j == I40E_MAX_BUFFER_TXD) {
-                               if (size < skb_shinfo(skb)->gso_size) {
-                                       linearize = true;
-                                       break;
-                               }
-                               j = 1;
-                               size -= skb_shinfo(skb)->gso_size;
-                               if (size)
-                                       j++;
-                               size += hdr_len;
+                               linearize = true;
+                               break;
                        }
                        num_frags--;
                } while (num_frags);
@@ -1950,7 +1945,7 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
        else if (tso)
                tx_flags |= I40E_TX_FLAGS_TSO;
 
-       if (i40e_chk_linearize(skb, tx_flags, hdr_len))
+       if (i40e_chk_linearize(skb, tx_flags))
                if (skb_linearize(skb))
                        goto out_drop;
 
index 8457d03..a0a9b1f 100644 (file)
@@ -1036,7 +1036,7 @@ static void igb_reset_q_vector(struct igb_adapter *adapter, int v_idx)
                adapter->tx_ring[q_vector->tx.ring->queue_index] = NULL;
 
        if (q_vector->rx.ring)
-               adapter->tx_ring[q_vector->rx.ring->queue_index] = NULL;
+               adapter->rx_ring[q_vector->rx.ring->queue_index] = NULL;
 
        netif_napi_del(&q_vector->napi);
 
@@ -1207,6 +1207,8 @@ static int igb_alloc_q_vector(struct igb_adapter *adapter,
        q_vector = adapter->q_vector[v_idx];
        if (!q_vector)
                q_vector = kzalloc(size, GFP_KERNEL);
+       else
+               memset(q_vector, 0, size);
        if (!q_vector)
                return -ENOMEM;
 
index e3b9b63..c3a9392 100644 (file)
@@ -538,8 +538,8 @@ static int igb_ptp_feature_enable_i210(struct ptp_clock_info *ptp,
                        igb->perout[i].start.tv_nsec = rq->perout.start.nsec;
                        igb->perout[i].period.tv_sec = ts.tv_sec;
                        igb->perout[i].period.tv_nsec = ts.tv_nsec;
-                       wr32(trgttiml, rq->perout.start.sec);
-                       wr32(trgttimh, rq->perout.start.nsec);
+                       wr32(trgttimh, rq->perout.start.sec);
+                       wr32(trgttiml, rq->perout.start.nsec);
                        tsauxc |= tsauxc_mask;
                        tsim |= tsim_mask;
                } else {
index a16d267..e71cdde 100644 (file)
@@ -3612,7 +3612,7 @@ static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
        u8 *dst_mac = skb_header_pointer(skb, 0, 0, NULL);
 
        if (!dst_mac || is_link_local_ether_addr(dst_mac)) {
-               dev_kfree_skb(skb);
+               dev_kfree_skb_any(skb);
                return NETDEV_TX_OK;
        }
 
index 4f7dc04..529ef05 100644 (file)
@@ -714,8 +714,13 @@ static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
                                         msecs_to_jiffies(timeout))) {
                mlx4_warn(dev, "command 0x%x timed out (go bit not cleared)\n",
                          op);
-               err = -EIO;
-               goto out_reset;
+               if (op == MLX4_CMD_NOP) {
+                       err = -EBUSY;
+                       goto out;
+               } else {
+                       err = -EIO;
+                       goto out_reset;
+               }
        }
 
        err = context->result;
index 32f5ec7..cf467a9 100644 (file)
@@ -1501,17 +1501,13 @@ static int mlx4_en_init_affinity_hint(struct mlx4_en_priv *priv, int ring_idx)
 {
        struct mlx4_en_rx_ring *ring = priv->rx_ring[ring_idx];
        int numa_node = priv->mdev->dev->numa_node;
-       int ret = 0;
 
        if (!zalloc_cpumask_var(&ring->affinity_mask, GFP_KERNEL))
                return -ENOMEM;
 
-       ret = cpumask_set_cpu_local_first(ring_idx, numa_node,
-                                         ring->affinity_mask);
-       if (ret)
-               free_cpumask_var(ring->affinity_mask);
-
-       return ret;
+       cpumask_set_cpu(cpumask_local_spread(ring_idx, numa_node),
+                       ring->affinity_mask);
+       return 0;
 }
 
 static void mlx4_en_free_affinity_hint(struct mlx4_en_priv *priv, int ring_idx)
index 54f0e5a..0a56f01 100644 (file)
@@ -139,7 +139,7 @@ static unsigned long en_stats_adder(__be64 *start, __be64 *next, int num)
        int i;
        int offset = next - start;
 
-       for (i = 0; i <= num; i++) {
+       for (i = 0; i < num; i++) {
                ret += be64_to_cpu(*curr);
                curr += offset;
        }
index f7bf312..7bed3a8 100644 (file)
@@ -144,9 +144,9 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
        ring->queue_index = queue_index;
 
        if (queue_index < priv->num_tx_rings_p_up)
-               cpumask_set_cpu_local_first(queue_index,
-                                           priv->mdev->dev->numa_node,
-                                           &ring->affinity_mask);
+               cpumask_set_cpu(cpumask_local_spread(queue_index,
+                                                    priv->mdev->dev->numa_node),
+                               &ring->affinity_mask);
 
        *pring = ring;
        return 0;
index c7f28bf..bafe218 100644 (file)
@@ -2845,7 +2845,7 @@ int mlx4_SW2HW_EQ_wrapper(struct mlx4_dev *dev, int slave,
 {
        int err;
        int eqn = vhcr->in_modifier;
-       int res_id = (slave << 8) | eqn;
+       int res_id = (slave << 10) | eqn;
        struct mlx4_eq_context *eqc = inbox->buf;
        int mtt_base = eq_get_mtt_addr(eqc) / dev->caps.mtt_entry_sz;
        int mtt_size = eq_get_mtt_size(eqc);
@@ -3051,7 +3051,7 @@ int mlx4_HW2SW_EQ_wrapper(struct mlx4_dev *dev, int slave,
                          struct mlx4_cmd_info *cmd)
 {
        int eqn = vhcr->in_modifier;
-       int res_id = eqn | (slave << 8);
+       int res_id = eqn | (slave << 10);
        struct res_eq *eq;
        int err;
 
@@ -3108,7 +3108,7 @@ int mlx4_GEN_EQE(struct mlx4_dev *dev, int slave, struct mlx4_eqe *eqe)
                return 0;
 
        mutex_lock(&priv->mfunc.master.gen_eqe_mutex[slave]);
-       res_id = (slave << 8) | event_eq->eqn;
+       res_id = (slave << 10) | event_eq->eqn;
        err = get_res(dev, slave, res_id, RES_EQ, &req);
        if (err)
                goto unlock;
@@ -3131,7 +3131,7 @@ int mlx4_GEN_EQE(struct mlx4_dev *dev, int slave, struct mlx4_eqe *eqe)
 
        memcpy(mailbox->buf, (u8 *) eqe, 28);
 
-       in_modifier = (slave & 0xff) | ((event_eq->eqn & 0xff) << 16);
+       in_modifier = (slave & 0xff) | ((event_eq->eqn & 0x3ff) << 16);
 
        err = mlx4_cmd(dev, mailbox->dma, in_modifier, 0,
                       MLX4_CMD_GEN_EQE, MLX4_CMD_TIME_CLASS_B,
@@ -3157,7 +3157,7 @@ int mlx4_QUERY_EQ_wrapper(struct mlx4_dev *dev, int slave,
                          struct mlx4_cmd_info *cmd)
 {
        int eqn = vhcr->in_modifier;
-       int res_id = eqn | (slave << 8);
+       int res_id = eqn | (slave << 10);
        struct res_eq *eq;
        int err;
 
@@ -3187,7 +3187,7 @@ int mlx4_SW2HW_CQ_wrapper(struct mlx4_dev *dev, int slave,
        int cqn = vhcr->in_modifier;
        struct mlx4_cq_context *cqc = inbox->buf;
        int mtt_base = cq_get_mtt_addr(cqc) / dev->caps.mtt_entry_sz;
-       struct res_cq *cq;
+       struct res_cq *cq = NULL;
        struct res_mtt *mtt;
 
        err = cq_res_start_move_to(dev, slave, cqn, RES_CQ_HW, &cq);
@@ -3223,7 +3223,7 @@ int mlx4_HW2SW_CQ_wrapper(struct mlx4_dev *dev, int slave,
 {
        int err;
        int cqn = vhcr->in_modifier;
-       struct res_cq *cq;
+       struct res_cq *cq = NULL;
 
        err = cq_res_start_move_to(dev, slave, cqn, RES_CQ_ALLOCATED, &cq);
        if (err)
@@ -3362,7 +3362,7 @@ int mlx4_SW2HW_SRQ_wrapper(struct mlx4_dev *dev, int slave,
        int err;
        int srqn = vhcr->in_modifier;
        struct res_mtt *mtt;
-       struct res_srq *srq;
+       struct res_srq *srq = NULL;
        struct mlx4_srq_context *srqc = inbox->buf;
        int mtt_base = srq_get_mtt_addr(srqc) / dev->caps.mtt_entry_sz;
 
@@ -3406,7 +3406,7 @@ int mlx4_HW2SW_SRQ_wrapper(struct mlx4_dev *dev, int slave,
 {
        int err;
        int srqn = vhcr->in_modifier;
-       struct res_srq *srq;
+       struct res_srq *srq = NULL;
 
        err = srq_res_start_move_to(dev, slave, srqn, RES_SRQ_ALLOCATED, &srq);
        if (err)
@@ -4714,13 +4714,13 @@ static void rem_slave_eqs(struct mlx4_dev *dev, int slave)
                                        break;
 
                                case RES_EQ_HW:
-                                       err = mlx4_cmd(dev, slave, eqn & 0xff,
+                                       err = mlx4_cmd(dev, slave, eqn & 0x3ff,
                                                       1, MLX4_CMD_HW2SW_EQ,
                                                       MLX4_CMD_TIME_CLASS_A,
                                                       MLX4_CMD_NATIVE);
                                        if (err)
                                                mlx4_dbg(dev, "rem_slave_eqs: failed to move slave %d eqs %d to SW ownership\n",
-                                                        slave, eqn);
+                                                        slave, eqn & 0x3ff);
                                        atomic_dec(&eq->mtt->ref_count);
                                        state = RES_EQ_RESERVED;
                                        break;
index 8da7c3f..7b43a3b 100644 (file)
@@ -1764,7 +1764,7 @@ int netxen_process_cmd_ring(struct netxen_adapter *adapter)
        int done = 0;
        struct nx_host_tx_ring *tx_ring = adapter->tx_ring;
 
-       if (!spin_trylock(&adapter->tx_clean_lock))
+       if (!spin_trylock_bh(&adapter->tx_clean_lock))
                return 1;
 
        sw_consumer = tx_ring->sw_consumer;
@@ -1819,7 +1819,7 @@ int netxen_process_cmd_ring(struct netxen_adapter *adapter)
         */
        hw_consumer = le32_to_cpu(*(tx_ring->hw_consumer));
        done = (sw_consumer == hw_consumer);
-       spin_unlock(&adapter->tx_clean_lock);
+       spin_unlock_bh(&adapter->tx_clean_lock);
 
        return done;
 }
index e0c31e3..6409a06 100644 (file)
@@ -3025,9 +3025,9 @@ netxen_sysfs_read_dimm(struct file *filp, struct kobject *kobj,
        u8 dw, rows, cols, banks, ranks;
        u32 val;
 
-       if (size != sizeof(struct netxen_dimm_cfg)) {
+       if (size < attr->size) {
                netdev_err(netdev, "Invalid size\n");
-               return -1;
+               return -EINVAL;
        }
 
        memset(&dimm, 0, sizeof(struct netxen_dimm_cfg));
@@ -3137,7 +3137,7 @@ out:
 
 static struct bin_attribute bin_attr_dimm = {
        .attr = { .name = "dimm", .mode = (S_IRUGO | S_IWUSR) },
-       .size = 0,
+       .size = sizeof(struct netxen_dimm_cfg),
        .read = netxen_sysfs_read_dimm,
 };
 
index f66641d..6af028d 100644 (file)
@@ -912,6 +912,8 @@ qca_spi_probe(struct spi_device *spi_device)
        qca->spi_dev = spi_device;
        qca->legacy_mode = legacy_mode;
 
+       spi_set_drvdata(spi_device, qcaspi_devs);
+
        mac = of_get_mac_address(spi_device->dev.of_node);
 
        if (mac)
@@ -944,8 +946,6 @@ qca_spi_probe(struct spi_device *spi_device)
                return -EFAULT;
        }
 
-       spi_set_drvdata(spi_device, qcaspi_devs);
-
        qcaspi_init_device_debugfs(qca);
 
        return 0;
index c70ab40..3df51fa 100644 (file)
@@ -6884,7 +6884,7 @@ static void r8169_csum_workaround(struct rtl8169_private *tp,
                        rtl8169_start_xmit(nskb, tp->dev);
                } while (segs);
 
-               dev_kfree_skb(skb);
+               dev_consume_skb_any(skb);
        } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
                if (skb_checksum_help(skb) < 0)
                        goto drop;
@@ -6896,7 +6896,7 @@ static void r8169_csum_workaround(struct rtl8169_private *tp,
 drop:
                stats = &tp->dev->stats;
                stats->tx_dropped++;
-               dev_kfree_skb(skb);
+               dev_kfree_skb_any(skb);
        }
 }
 
index ec25153..cf98cc9 100644 (file)
@@ -2921,10 +2921,11 @@ static int rocker_port_ipv4_resolve(struct rocker_port *rocker_port,
        struct neighbour *n = __ipv4_neigh_lookup(dev, (__force u32)ip_addr);
        int err = 0;
 
-       if (!n)
+       if (!n) {
                n = neigh_create(&arp_tbl, &ip_addr, dev);
-       if (!n)
-               return -ENOMEM;
+               if (IS_ERR(n))
+                       return IS_ERR(n);
+       }
 
        /* If the neigh is already resolved, then go ahead and
         * install the entry, otherwise start the ARP process to
@@ -2936,6 +2937,7 @@ static int rocker_port_ipv4_resolve(struct rocker_port *rocker_port,
        else
                neigh_event_send(n, NULL);
 
+       neigh_release(n);
        return err;
 }
 
index 4b00545..65944dd 100644 (file)
@@ -1304,7 +1304,7 @@ static unsigned int efx_wanted_parallelism(struct efx_nic *efx)
                        if (!cpumask_test_cpu(cpu, thread_mask)) {
                                ++count;
                                cpumask_or(thread_mask, thread_mask,
-                                          topology_thread_cpumask(cpu));
+                                          topology_sibling_cpumask(cpu));
                        }
                }
 
index c0ad95d..809ea46 100644 (file)
@@ -224,12 +224,17 @@ static void efx_unmap_rx_buffer(struct efx_nic *efx,
        }
 }
 
-static void efx_free_rx_buffer(struct efx_rx_buffer *rx_buf)
+static void efx_free_rx_buffers(struct efx_rx_queue *rx_queue,
+                               struct efx_rx_buffer *rx_buf,
+                               unsigned int num_bufs)
 {
-       if (rx_buf->page) {
-               put_page(rx_buf->page);
-               rx_buf->page = NULL;
-       }
+       do {
+               if (rx_buf->page) {
+                       put_page(rx_buf->page);
+                       rx_buf->page = NULL;
+               }
+               rx_buf = efx_rx_buf_next(rx_queue, rx_buf);
+       } while (--num_bufs);
 }
 
 /* Attempt to recycle the page if there is an RX recycle ring; the page can
@@ -278,7 +283,7 @@ static void efx_fini_rx_buffer(struct efx_rx_queue *rx_queue,
        /* If this is the last buffer in a page, unmap and free it. */
        if (rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE) {
                efx_unmap_rx_buffer(rx_queue->efx, rx_buf);
-               efx_free_rx_buffer(rx_buf);
+               efx_free_rx_buffers(rx_queue, rx_buf, 1);
        }
        rx_buf->page = NULL;
 }
@@ -304,10 +309,7 @@ static void efx_discard_rx_packet(struct efx_channel *channel,
 
        efx_recycle_rx_pages(channel, rx_buf, n_frags);
 
-       do {
-               efx_free_rx_buffer(rx_buf);
-               rx_buf = efx_rx_buf_next(rx_queue, rx_buf);
-       } while (--n_frags);
+       efx_free_rx_buffers(rx_queue, rx_buf, n_frags);
 }
 
 /**
@@ -431,11 +433,10 @@ efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf,
 
        skb = napi_get_frags(napi);
        if (unlikely(!skb)) {
-               while (n_frags--) {
-                       put_page(rx_buf->page);
-                       rx_buf->page = NULL;
-                       rx_buf = efx_rx_buf_next(&channel->rx_queue, rx_buf);
-               }
+               struct efx_rx_queue *rx_queue;
+
+               rx_queue = efx_channel_get_rx_queue(channel);
+               efx_free_rx_buffers(rx_queue, rx_buf, n_frags);
                return;
        }
 
@@ -622,7 +623,10 @@ static void efx_rx_deliver(struct efx_channel *channel, u8 *eh,
 
        skb = efx_rx_mk_skb(channel, rx_buf, n_frags, eh, hdr_len);
        if (unlikely(skb == NULL)) {
-               efx_free_rx_buffer(rx_buf);
+               struct efx_rx_queue *rx_queue;
+
+               rx_queue = efx_channel_get_rx_queue(channel);
+               efx_free_rx_buffers(rx_queue, rx_buf, n_frags);
                return;
        }
        skb_record_rx_queue(skb, channel->rx_queue.core_index);
@@ -661,8 +665,12 @@ void __efx_rx_packet(struct efx_channel *channel)
         * loopback layer, and free the rx_buf here
         */
        if (unlikely(efx->loopback_selftest)) {
+               struct efx_rx_queue *rx_queue;
+
                efx_loopback_rx_packet(efx, eh, rx_buf->len);
-               efx_free_rx_buffer(rx_buf);
+               rx_queue = efx_channel_get_rx_queue(channel);
+               efx_free_rx_buffers(rx_queue, rx_buf,
+                                   channel->rx_pkt_n_frags);
                goto out;
        }
 
index 14b363a..630f0b7 100644 (file)
@@ -2238,9 +2238,10 @@ static int smc_drv_probe(struct platform_device *pdev)
        const struct of_device_id *match = NULL;
        struct smc_local *lp;
        struct net_device *ndev;
-       struct resource *res, *ires;
+       struct resource *res;
        unsigned int __iomem *addr;
        unsigned long irq_flags = SMC_IRQ_FLAGS;
+       unsigned long irq_resflags;
        int ret;
 
        ndev = alloc_etherdev(sizeof(struct smc_local));
@@ -2332,16 +2333,19 @@ static int smc_drv_probe(struct platform_device *pdev)
                goto out_free_netdev;
        }
 
-       ires = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
-       if (!ires) {
+       ndev->irq = platform_get_irq(pdev, 0);
+       if (ndev->irq <= 0) {
                ret = -ENODEV;
                goto out_release_io;
        }
-
-       ndev->irq = ires->start;
-
-       if (irq_flags == -1 || ires->flags & IRQF_TRIGGER_MASK)
-               irq_flags = ires->flags & IRQF_TRIGGER_MASK;
+       /*
+        * If this platform does not specify any special irqflags, or if
+        * the resource supplies a trigger, override the irqflags with
+        * the trigger flags from the resource.
+        */
+       irq_resflags = irqd_get_trigger_type(irq_get_irq_data(ndev->irq));
+       if (irq_flags == -1 || irq_resflags & IRQF_TRIGGER_MASK)
+               irq_flags = irq_resflags & IRQF_TRIGGER_MASK;
 
        ret = smc_request_attrib(pdev, ndev);
        if (ret)
index 41047c9..959aeea 100644 (file)
@@ -2418,9 +2418,9 @@ static int smsc911x_drv_probe(struct platform_device *pdev)
        struct net_device *dev;
        struct smsc911x_data *pdata;
        struct smsc911x_platform_config *config = dev_get_platdata(&pdev->dev);
-       struct resource *res, *irq_res;
+       struct resource *res;
        unsigned int intcfg = 0;
-       int res_size, irq_flags;
+       int res_size, irq, irq_flags;
        int retval;
 
        res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
@@ -2434,8 +2434,8 @@ static int smsc911x_drv_probe(struct platform_device *pdev)
        }
        res_size = resource_size(res);
 
-       irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
-       if (!irq_res) {
+       irq = platform_get_irq(pdev, 0);
+       if (irq <= 0) {
                pr_warn("Could not allocate irq resource\n");
                retval = -ENODEV;
                goto out_0;
@@ -2455,8 +2455,8 @@ static int smsc911x_drv_probe(struct platform_device *pdev)
        SET_NETDEV_DEV(dev, &pdev->dev);
 
        pdata = netdev_priv(dev);
-       dev->irq = irq_res->start;
-       irq_flags = irq_res->flags & IRQF_TRIGGER_MASK;
+       dev->irq = irq;
+       irq_flags = irq_get_trigger_type(irq);
        pdata->ioaddr = ioremap_nocache(res->start, res_size);
 
        pdata->dev = dev;
index 2ac9552..73bab98 100644 (file)
@@ -117,6 +117,12 @@ struct stmmac_priv {
        int use_riwt;
        int irq_wake;
        spinlock_t ptp_lock;
+
+#ifdef CONFIG_DEBUG_FS
+       struct dentry *dbgfs_dir;
+       struct dentry *dbgfs_rings_status;
+       struct dentry *dbgfs_dma_cap;
+#endif
 };
 
 int stmmac_mdio_unregister(struct net_device *ndev);
index 05c146f..2c5ce2b 100644 (file)
@@ -118,7 +118,7 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id);
 
 #ifdef CONFIG_DEBUG_FS
 static int stmmac_init_fs(struct net_device *dev);
-static void stmmac_exit_fs(void);
+static void stmmac_exit_fs(struct net_device *dev);
 #endif
 
 #define STMMAC_COAL_TIMER(x) (jiffies + usecs_to_jiffies(x))
@@ -1916,7 +1916,7 @@ static int stmmac_release(struct net_device *dev)
        netif_carrier_off(dev);
 
 #ifdef CONFIG_DEBUG_FS
-       stmmac_exit_fs();
+       stmmac_exit_fs(dev);
 #endif
 
        stmmac_release_ptp(priv);
@@ -2508,8 +2508,6 @@ static int stmmac_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 
 #ifdef CONFIG_DEBUG_FS
 static struct dentry *stmmac_fs_dir;
-static struct dentry *stmmac_rings_status;
-static struct dentry *stmmac_dma_cap;
 
 static void sysfs_display_ring(void *head, int size, int extend_desc,
                               struct seq_file *seq)
@@ -2648,36 +2646,39 @@ static const struct file_operations stmmac_dma_cap_fops = {
 
 static int stmmac_init_fs(struct net_device *dev)
 {
-       /* Create debugfs entries */
-       stmmac_fs_dir = debugfs_create_dir(STMMAC_RESOURCE_NAME, NULL);
+       struct stmmac_priv *priv = netdev_priv(dev);
+
+       /* Create per netdev entries */
+       priv->dbgfs_dir = debugfs_create_dir(dev->name, stmmac_fs_dir);
 
-       if (!stmmac_fs_dir || IS_ERR(stmmac_fs_dir)) {
-               pr_err("ERROR %s, debugfs create directory failed\n",
-                      STMMAC_RESOURCE_NAME);
+       if (!priv->dbgfs_dir || IS_ERR(priv->dbgfs_dir)) {
+               pr_err("ERROR %s/%s, debugfs create directory failed\n",
+                      STMMAC_RESOURCE_NAME, dev->name);
 
                return -ENOMEM;
        }
 
        /* Entry to report DMA RX/TX rings */
-       stmmac_rings_status = debugfs_create_file("descriptors_status",
-                                                 S_IRUGO, stmmac_fs_dir, dev,
-                                                 &stmmac_rings_status_fops);
+       priv->dbgfs_rings_status =
+               debugfs_create_file("descriptors_status", S_IRUGO,
+                                   priv->dbgfs_dir, dev,
+                                   &stmmac_rings_status_fops);
 
-       if (!stmmac_rings_status || IS_ERR(stmmac_rings_status)) {
+       if (!priv->dbgfs_rings_status || IS_ERR(priv->dbgfs_rings_status)) {
                pr_info("ERROR creating stmmac ring debugfs file\n");
-               debugfs_remove(stmmac_fs_dir);
+               debugfs_remove_recursive(priv->dbgfs_dir);
 
                return -ENOMEM;
        }
 
        /* Entry to report the DMA HW features */
-       stmmac_dma_cap = debugfs_create_file("dma_cap", S_IRUGO, stmmac_fs_dir,
-                                            dev, &stmmac_dma_cap_fops);
+       priv->dbgfs_dma_cap = debugfs_create_file("dma_cap", S_IRUGO,
+                                           priv->dbgfs_dir,
+                                           dev, &stmmac_dma_cap_fops);
 
-       if (!stmmac_dma_cap || IS_ERR(stmmac_dma_cap)) {
+       if (!priv->dbgfs_dma_cap || IS_ERR(priv->dbgfs_dma_cap)) {
                pr_info("ERROR creating stmmac MMC debugfs file\n");
-               debugfs_remove(stmmac_rings_status);
-               debugfs_remove(stmmac_fs_dir);
+               debugfs_remove_recursive(priv->dbgfs_dir);
 
                return -ENOMEM;
        }
@@ -2685,11 +2686,11 @@ static int stmmac_init_fs(struct net_device *dev)
        return 0;
 }
 
-static void stmmac_exit_fs(void)
+static void stmmac_exit_fs(struct net_device *dev)
 {
-       debugfs_remove(stmmac_rings_status);
-       debugfs_remove(stmmac_dma_cap);
-       debugfs_remove(stmmac_fs_dir);
+       struct stmmac_priv *priv = netdev_priv(dev);
+
+       debugfs_remove_recursive(priv->dbgfs_dir);
 }
 #endif /* CONFIG_DEBUG_FS */
 
@@ -3149,6 +3150,35 @@ err:
 __setup("stmmaceth=", stmmac_cmdline_opt);
 #endif /* MODULE */
 
+static int __init stmmac_init(void)
+{
+#ifdef CONFIG_DEBUG_FS
+       /* Create debugfs main directory if it doesn't exist yet */
+       if (!stmmac_fs_dir) {
+               stmmac_fs_dir = debugfs_create_dir(STMMAC_RESOURCE_NAME, NULL);
+
+               if (!stmmac_fs_dir || IS_ERR(stmmac_fs_dir)) {
+                       pr_err("ERROR %s, debugfs create directory failed\n",
+                              STMMAC_RESOURCE_NAME);
+
+                       return -ENOMEM;
+               }
+       }
+#endif
+
+       return 0;
+}
+
+static void __exit stmmac_exit(void)
+{
+#ifdef CONFIG_DEBUG_FS
+       debugfs_remove_recursive(stmmac_fs_dir);
+#endif
+}
+
+module_init(stmmac_init)
+module_exit(stmmac_exit)
+
 MODULE_DESCRIPTION("STMMAC 10/100/1000 Ethernet device driver");
 MODULE_AUTHOR("Giuseppe Cavallaro <peppe.cavallaro@st.com>");
 MODULE_LICENSE("GPL");
index 705bbdf..68aec5c 100644 (file)
@@ -23,6 +23,7 @@
 *******************************************************************************/
 
 #include <linux/platform_device.h>
+#include <linux/module.h>
 #include <linux/io.h>
 #include <linux/of.h>
 #include <linux/of_net.h>
index 690a4c3..af2694d 100644 (file)
@@ -707,8 +707,8 @@ static int temac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 
        cur_p->app0 |= STS_CTRL_APP0_SOP;
        cur_p->len = skb_headlen(skb);
-       cur_p->phys = dma_map_single(ndev->dev.parent, skb->data, skb->len,
-                                    DMA_TO_DEVICE);
+       cur_p->phys = dma_map_single(ndev->dev.parent, skb->data,
+                                    skb_headlen(skb), DMA_TO_DEVICE);
        cur_p->app4 = (unsigned long)skb;
 
        for (ii = 0; ii < num_frag; ii++) {
index 2d9ef53..ea091bc 100644 (file)
@@ -826,7 +826,6 @@ int netvsc_send(struct hv_device *device,
        u16 q_idx = packet->q_idx;
        u32 pktlen = packet->total_data_buflen, msd_len = 0;
        unsigned int section_index = NETVSC_INVALID_INDEX;
-       struct sk_buff *skb = NULL;
        unsigned long flag;
        struct multi_send_data *msdp;
        struct hv_netvsc_packet *msd_send = NULL, *cur_send = NULL;
@@ -924,12 +923,8 @@ int netvsc_send(struct hv_device *device,
        if (cur_send)
                ret = netvsc_send_pkt(cur_send, net_device);
 
-       if (ret != 0) {
-               if (section_index != NETVSC_INVALID_INDEX)
-                       netvsc_free_send_slot(net_device, section_index);
-       } else if (skb) {
-               dev_kfree_skb_any(skb);
-       }
+       if (ret != 0 && section_index != NETVSC_INVALID_INDEX)
+               netvsc_free_send_slot(net_device, section_index);
 
        return ret;
 }
index 3802665..67d00fb 100644 (file)
@@ -85,6 +85,7 @@ struct at86rf230_local {
        struct ieee802154_hw *hw;
        struct at86rf2xx_chip_data *data;
        struct regmap *regmap;
+       int slp_tr;
 
        struct completion state_complete;
        struct at86rf230_state_change state;
@@ -95,163 +96,164 @@ struct at86rf230_local {
        unsigned long cal_timeout;
        s8 max_frame_retries;
        bool is_tx;
+       bool is_tx_from_off;
        u8 tx_retry;
        struct sk_buff *tx_skb;
        struct at86rf230_state_change tx;
 };
 
-#define        RG_TRX_STATUS   (0x01)
-#define        SR_TRX_STATUS           0x01, 0x1f, 0
-#define        SR_RESERVED_01_3        0x01, 0x20, 5
-#define        SR_CCA_STATUS           0x01, 0x40, 6
-#define        SR_CCA_DONE             0x01, 0x80, 7
-#define        RG_TRX_STATE    (0x02)
-#define        SR_TRX_CMD              0x02, 0x1f, 0
-#define        SR_TRAC_STATUS          0x02, 0xe0, 5
-#define        RG_TRX_CTRL_0   (0x03)
-#define        SR_CLKM_CTRL            0x03, 0x07, 0
-#define        SR_CLKM_SHA_SEL         0x03, 0x08, 3
-#define        SR_PAD_IO_CLKM          0x03, 0x30, 4
-#define        SR_PAD_IO               0x03, 0xc0, 6
-#define        RG_TRX_CTRL_1   (0x04)
-#define        SR_IRQ_POLARITY         0x04, 0x01, 0
-#define        SR_IRQ_MASK_MODE        0x04, 0x02, 1
-#define        SR_SPI_CMD_MODE         0x04, 0x0c, 2
-#define        SR_RX_BL_CTRL           0x04, 0x10, 4
-#define        SR_TX_AUTO_CRC_ON       0x04, 0x20, 5
-#define        SR_IRQ_2_EXT_EN         0x04, 0x40, 6
-#define        SR_PA_EXT_EN            0x04, 0x80, 7
-#define        RG_PHY_TX_PWR   (0x05)
-#define        SR_TX_PWR               0x05, 0x0f, 0
-#define        SR_PA_LT                0x05, 0x30, 4
-#define        SR_PA_BUF_LT            0x05, 0xc0, 6
-#define        RG_PHY_RSSI     (0x06)
-#define        SR_RSSI                 0x06, 0x1f, 0
-#define        SR_RND_VALUE            0x06, 0x60, 5
-#define        SR_RX_CRC_VALID         0x06, 0x80, 7
-#define        RG_PHY_ED_LEVEL (0x07)
-#define        SR_ED_LEVEL             0x07, 0xff, 0
-#define        RG_PHY_CC_CCA   (0x08)
-#define        SR_CHANNEL              0x08, 0x1f, 0
-#define        SR_CCA_MODE             0x08, 0x60, 5
-#define        SR_CCA_REQUEST          0x08, 0x80, 7
-#define        RG_CCA_THRES    (0x09)
-#define        SR_CCA_ED_THRES         0x09, 0x0f, 0
-#define        SR_RESERVED_09_1        0x09, 0xf0, 4
-#define        RG_RX_CTRL      (0x0a)
-#define        SR_PDT_THRES            0x0a, 0x0f, 0
-#define        SR_RESERVED_0a_1        0x0a, 0xf0, 4
-#define        RG_SFD_VALUE    (0x0b)
-#define        SR_SFD_VALUE            0x0b, 0xff, 0
-#define        RG_TRX_CTRL_2   (0x0c)
-#define        SR_OQPSK_DATA_RATE      0x0c, 0x03, 0
-#define        SR_SUB_MODE             0x0c, 0x04, 2
-#define        SR_BPSK_QPSK            0x0c, 0x08, 3
-#define        SR_OQPSK_SUB1_RC_EN     0x0c, 0x10, 4
-#define        SR_RESERVED_0c_5        0x0c, 0x60, 5
-#define        SR_RX_SAFE_MODE         0x0c, 0x80, 7
-#define        RG_ANT_DIV      (0x0d)
-#define        SR_ANT_CTRL             0x0d, 0x03, 0
-#define        SR_ANT_EXT_SW_EN        0x0d, 0x04, 2
-#define        SR_ANT_DIV_EN           0x0d, 0x08, 3
-#define        SR_RESERVED_0d_2        0x0d, 0x70, 4
-#define        SR_ANT_SEL              0x0d, 0x80, 7
-#define        RG_IRQ_MASK     (0x0e)
-#define        SR_IRQ_MASK             0x0e, 0xff, 0
-#define        RG_IRQ_STATUS   (0x0f)
-#define        SR_IRQ_0_PLL_LOCK       0x0f, 0x01, 0
-#define        SR_IRQ_1_PLL_UNLOCK     0x0f, 0x02, 1
-#define        SR_IRQ_2_RX_START       0x0f, 0x04, 2
-#define        SR_IRQ_3_TRX_END        0x0f, 0x08, 3
-#define        SR_IRQ_4_CCA_ED_DONE    0x0f, 0x10, 4
-#define        SR_IRQ_5_AMI            0x0f, 0x20, 5
-#define        SR_IRQ_6_TRX_UR         0x0f, 0x40, 6
-#define        SR_IRQ_7_BAT_LOW        0x0f, 0x80, 7
-#define        RG_VREG_CTRL    (0x10)
-#define        SR_RESERVED_10_6        0x10, 0x03, 0
-#define        SR_DVDD_OK              0x10, 0x04, 2
-#define        SR_DVREG_EXT            0x10, 0x08, 3
-#define        SR_RESERVED_10_3        0x10, 0x30, 4
-#define        SR_AVDD_OK              0x10, 0x40, 6
-#define        SR_AVREG_EXT            0x10, 0x80, 7
-#define        RG_BATMON       (0x11)
-#define        SR_BATMON_VTH           0x11, 0x0f, 0
-#define        SR_BATMON_HR            0x11, 0x10, 4
-#define        SR_BATMON_OK            0x11, 0x20, 5
-#define        SR_RESERVED_11_1        0x11, 0xc0, 6
-#define        RG_XOSC_CTRL    (0x12)
-#define        SR_XTAL_TRIM            0x12, 0x0f, 0
-#define        SR_XTAL_MODE            0x12, 0xf0, 4
-#define        RG_RX_SYN       (0x15)
-#define        SR_RX_PDT_LEVEL         0x15, 0x0f, 0
-#define        SR_RESERVED_15_2        0x15, 0x70, 4
-#define        SR_RX_PDT_DIS           0x15, 0x80, 7
-#define        RG_XAH_CTRL_1   (0x17)
-#define        SR_RESERVED_17_8        0x17, 0x01, 0
-#define        SR_AACK_PROM_MODE       0x17, 0x02, 1
-#define        SR_AACK_ACK_TIME        0x17, 0x04, 2
-#define        SR_RESERVED_17_5        0x17, 0x08, 3
-#define        SR_AACK_UPLD_RES_FT     0x17, 0x10, 4
-#define        SR_AACK_FLTR_RES_FT     0x17, 0x20, 5
-#define        SR_CSMA_LBT_MODE        0x17, 0x40, 6
-#define        SR_RESERVED_17_1        0x17, 0x80, 7
-#define        RG_FTN_CTRL     (0x18)
-#define        SR_RESERVED_18_2        0x18, 0x7f, 0
-#define        SR_FTN_START            0x18, 0x80, 7
-#define        RG_PLL_CF       (0x1a)
-#define        SR_RESERVED_1a_2        0x1a, 0x7f, 0
-#define        SR_PLL_CF_START         0x1a, 0x80, 7
-#define        RG_PLL_DCU      (0x1b)
-#define        SR_RESERVED_1b_3        0x1b, 0x3f, 0
-#define        SR_RESERVED_1b_2        0x1b, 0x40, 6
-#define        SR_PLL_DCU_START        0x1b, 0x80, 7
-#define        RG_PART_NUM     (0x1c)
-#define        SR_PART_NUM             0x1c, 0xff, 0
-#define        RG_VERSION_NUM  (0x1d)
-#define        SR_VERSION_NUM          0x1d, 0xff, 0
-#define        RG_MAN_ID_0     (0x1e)
-#define        SR_MAN_ID_0             0x1e, 0xff, 0
-#define        RG_MAN_ID_1     (0x1f)
-#define        SR_MAN_ID_1             0x1f, 0xff, 0
-#define        RG_SHORT_ADDR_0 (0x20)
-#define        SR_SHORT_ADDR_0         0x20, 0xff, 0
-#define        RG_SHORT_ADDR_1 (0x21)
-#define        SR_SHORT_ADDR_1         0x21, 0xff, 0
-#define        RG_PAN_ID_0     (0x22)
-#define        SR_PAN_ID_0             0x22, 0xff, 0
-#define        RG_PAN_ID_1     (0x23)
-#define        SR_PAN_ID_1             0x23, 0xff, 0
-#define        RG_IEEE_ADDR_0  (0x24)
-#define        SR_IEEE_ADDR_0          0x24, 0xff, 0
-#define        RG_IEEE_ADDR_1  (0x25)
-#define        SR_IEEE_ADDR_1          0x25, 0xff, 0
-#define        RG_IEEE_ADDR_2  (0x26)
-#define        SR_IEEE_ADDR_2          0x26, 0xff, 0
-#define        RG_IEEE_ADDR_3  (0x27)
-#define        SR_IEEE_ADDR_3          0x27, 0xff, 0
-#define        RG_IEEE_ADDR_4  (0x28)
-#define        SR_IEEE_ADDR_4          0x28, 0xff, 0
-#define        RG_IEEE_ADDR_5  (0x29)
-#define        SR_IEEE_ADDR_5          0x29, 0xff, 0
-#define        RG_IEEE_ADDR_6  (0x2a)
-#define        SR_IEEE_ADDR_6          0x2a, 0xff, 0
-#define        RG_IEEE_ADDR_7  (0x2b)
-#define        SR_IEEE_ADDR_7          0x2b, 0xff, 0
-#define        RG_XAH_CTRL_0   (0x2c)
-#define        SR_SLOTTED_OPERATION    0x2c, 0x01, 0
-#define        SR_MAX_CSMA_RETRIES     0x2c, 0x0e, 1
-#define        SR_MAX_FRAME_RETRIES    0x2c, 0xf0, 4
-#define        RG_CSMA_SEED_0  (0x2d)
-#define        SR_CSMA_SEED_0          0x2d, 0xff, 0
-#define        RG_CSMA_SEED_1  (0x2e)
-#define        SR_CSMA_SEED_1          0x2e, 0x07, 0
-#define        SR_AACK_I_AM_COORD      0x2e, 0x08, 3
-#define        SR_AACK_DIS_ACK         0x2e, 0x10, 4
-#define        SR_AACK_SET_PD          0x2e, 0x20, 5
-#define        SR_AACK_FVN_MODE        0x2e, 0xc0, 6
-#define        RG_CSMA_BE      (0x2f)
-#define        SR_MIN_BE               0x2f, 0x0f, 0
-#define        SR_MAX_BE               0x2f, 0xf0, 4
+#define RG_TRX_STATUS  (0x01)
+#define SR_TRX_STATUS          0x01, 0x1f, 0
+#define SR_RESERVED_01_3       0x01, 0x20, 5
+#define SR_CCA_STATUS          0x01, 0x40, 6
+#define SR_CCA_DONE            0x01, 0x80, 7
+#define RG_TRX_STATE   (0x02)
+#define SR_TRX_CMD             0x02, 0x1f, 0
+#define SR_TRAC_STATUS         0x02, 0xe0, 5
+#define RG_TRX_CTRL_0  (0x03)
+#define SR_CLKM_CTRL           0x03, 0x07, 0
+#define SR_CLKM_SHA_SEL                0x03, 0x08, 3
+#define SR_PAD_IO_CLKM         0x03, 0x30, 4
+#define SR_PAD_IO              0x03, 0xc0, 6
+#define RG_TRX_CTRL_1  (0x04)
+#define SR_IRQ_POLARITY                0x04, 0x01, 0
+#define SR_IRQ_MASK_MODE       0x04, 0x02, 1
+#define SR_SPI_CMD_MODE                0x04, 0x0c, 2
+#define SR_RX_BL_CTRL          0x04, 0x10, 4
+#define SR_TX_AUTO_CRC_ON      0x04, 0x20, 5
+#define SR_IRQ_2_EXT_EN                0x04, 0x40, 6
+#define SR_PA_EXT_EN           0x04, 0x80, 7
+#define RG_PHY_TX_PWR  (0x05)
+#define SR_TX_PWR              0x05, 0x0f, 0
+#define SR_PA_LT               0x05, 0x30, 4
+#define SR_PA_BUF_LT           0x05, 0xc0, 6
+#define RG_PHY_RSSI    (0x06)
+#define SR_RSSI                        0x06, 0x1f, 0
+#define SR_RND_VALUE           0x06, 0x60, 5
+#define SR_RX_CRC_VALID                0x06, 0x80, 7
+#define RG_PHY_ED_LEVEL        (0x07)
+#define SR_ED_LEVEL            0x07, 0xff, 0
+#define RG_PHY_CC_CCA  (0x08)
+#define SR_CHANNEL             0x08, 0x1f, 0
+#define SR_CCA_MODE            0x08, 0x60, 5
+#define SR_CCA_REQUEST         0x08, 0x80, 7
+#define RG_CCA_THRES   (0x09)
+#define SR_CCA_ED_THRES                0x09, 0x0f, 0
+#define SR_RESERVED_09_1       0x09, 0xf0, 4
+#define RG_RX_CTRL     (0x0a)
+#define SR_PDT_THRES           0x0a, 0x0f, 0
+#define SR_RESERVED_0a_1       0x0a, 0xf0, 4
+#define RG_SFD_VALUE   (0x0b)
+#define SR_SFD_VALUE           0x0b, 0xff, 0
+#define RG_TRX_CTRL_2  (0x0c)
+#define SR_OQPSK_DATA_RATE     0x0c, 0x03, 0
+#define SR_SUB_MODE            0x0c, 0x04, 2
+#define SR_BPSK_QPSK           0x0c, 0x08, 3
+#define SR_OQPSK_SUB1_RC_EN    0x0c, 0x10, 4
+#define SR_RESERVED_0c_5       0x0c, 0x60, 5
+#define SR_RX_SAFE_MODE                0x0c, 0x80, 7
+#define RG_ANT_DIV     (0x0d)
+#define SR_ANT_CTRL            0x0d, 0x03, 0
+#define SR_ANT_EXT_SW_EN       0x0d, 0x04, 2
+#define SR_ANT_DIV_EN          0x0d, 0x08, 3
+#define SR_RESERVED_0d_2       0x0d, 0x70, 4
+#define SR_ANT_SEL             0x0d, 0x80, 7
+#define RG_IRQ_MASK    (0x0e)
+#define SR_IRQ_MASK            0x0e, 0xff, 0
+#define RG_IRQ_STATUS  (0x0f)
+#define SR_IRQ_0_PLL_LOCK      0x0f, 0x01, 0
+#define SR_IRQ_1_PLL_UNLOCK    0x0f, 0x02, 1
+#define SR_IRQ_2_RX_START      0x0f, 0x04, 2
+#define SR_IRQ_3_TRX_END       0x0f, 0x08, 3
+#define SR_IRQ_4_CCA_ED_DONE   0x0f, 0x10, 4
+#define SR_IRQ_5_AMI           0x0f, 0x20, 5
+#define SR_IRQ_6_TRX_UR                0x0f, 0x40, 6
+#define SR_IRQ_7_BAT_LOW       0x0f, 0x80, 7
+#define RG_VREG_CTRL   (0x10)
+#define SR_RESERVED_10_6       0x10, 0x03, 0
+#define SR_DVDD_OK             0x10, 0x04, 2
+#define SR_DVREG_EXT           0x10, 0x08, 3
+#define SR_RESERVED_10_3       0x10, 0x30, 4
+#define SR_AVDD_OK             0x10, 0x40, 6
+#define SR_AVREG_EXT           0x10, 0x80, 7
+#define RG_BATMON      (0x11)
+#define SR_BATMON_VTH          0x11, 0x0f, 0
+#define SR_BATMON_HR           0x11, 0x10, 4
+#define SR_BATMON_OK           0x11, 0x20, 5
+#define SR_RESERVED_11_1       0x11, 0xc0, 6
+#define RG_XOSC_CTRL   (0x12)
+#define SR_XTAL_TRIM           0x12, 0x0f, 0
+#define SR_XTAL_MODE           0x12, 0xf0, 4
+#define RG_RX_SYN      (0x15)
+#define SR_RX_PDT_LEVEL                0x15, 0x0f, 0
+#define SR_RESERVED_15_2       0x15, 0x70, 4
+#define SR_RX_PDT_DIS          0x15, 0x80, 7
+#define RG_XAH_CTRL_1  (0x17)
+#define SR_RESERVED_17_8       0x17, 0x01, 0
+#define SR_AACK_PROM_MODE      0x17, 0x02, 1
+#define SR_AACK_ACK_TIME       0x17, 0x04, 2
+#define SR_RESERVED_17_5       0x17, 0x08, 3
+#define SR_AACK_UPLD_RES_FT    0x17, 0x10, 4
+#define SR_AACK_FLTR_RES_FT    0x17, 0x20, 5
+#define SR_CSMA_LBT_MODE       0x17, 0x40, 6
+#define SR_RESERVED_17_1       0x17, 0x80, 7
+#define RG_FTN_CTRL    (0x18)
+#define SR_RESERVED_18_2       0x18, 0x7f, 0
+#define SR_FTN_START           0x18, 0x80, 7
+#define RG_PLL_CF      (0x1a)
+#define SR_RESERVED_1a_2       0x1a, 0x7f, 0
+#define SR_PLL_CF_START                0x1a, 0x80, 7
+#define RG_PLL_DCU     (0x1b)
+#define SR_RESERVED_1b_3       0x1b, 0x3f, 0
+#define SR_RESERVED_1b_2       0x1b, 0x40, 6
+#define SR_PLL_DCU_START       0x1b, 0x80, 7
+#define RG_PART_NUM    (0x1c)
+#define SR_PART_NUM            0x1c, 0xff, 0
+#define RG_VERSION_NUM (0x1d)
+#define SR_VERSION_NUM         0x1d, 0xff, 0
+#define RG_MAN_ID_0    (0x1e)
+#define SR_MAN_ID_0            0x1e, 0xff, 0
+#define RG_MAN_ID_1    (0x1f)
+#define SR_MAN_ID_1            0x1f, 0xff, 0
+#define RG_SHORT_ADDR_0        (0x20)
+#define SR_SHORT_ADDR_0                0x20, 0xff, 0
+#define RG_SHORT_ADDR_1        (0x21)
+#define SR_SHORT_ADDR_1                0x21, 0xff, 0
+#define RG_PAN_ID_0    (0x22)
+#define SR_PAN_ID_0            0x22, 0xff, 0
+#define RG_PAN_ID_1    (0x23)
+#define SR_PAN_ID_1            0x23, 0xff, 0
+#define RG_IEEE_ADDR_0 (0x24)
+#define SR_IEEE_ADDR_0         0x24, 0xff, 0
+#define RG_IEEE_ADDR_1 (0x25)
+#define SR_IEEE_ADDR_1         0x25, 0xff, 0
+#define RG_IEEE_ADDR_2 (0x26)
+#define SR_IEEE_ADDR_2         0x26, 0xff, 0
+#define RG_IEEE_ADDR_3 (0x27)
+#define SR_IEEE_ADDR_3         0x27, 0xff, 0
+#define RG_IEEE_ADDR_4 (0x28)
+#define SR_IEEE_ADDR_4         0x28, 0xff, 0
+#define RG_IEEE_ADDR_5 (0x29)
+#define SR_IEEE_ADDR_5         0x29, 0xff, 0
+#define RG_IEEE_ADDR_6 (0x2a)
+#define SR_IEEE_ADDR_6         0x2a, 0xff, 0
+#define RG_IEEE_ADDR_7 (0x2b)
+#define SR_IEEE_ADDR_7         0x2b, 0xff, 0
+#define RG_XAH_CTRL_0  (0x2c)
+#define SR_SLOTTED_OPERATION   0x2c, 0x01, 0
+#define SR_MAX_CSMA_RETRIES    0x2c, 0x0e, 1
+#define SR_MAX_FRAME_RETRIES   0x2c, 0xf0, 4
+#define RG_CSMA_SEED_0 (0x2d)
+#define SR_CSMA_SEED_0         0x2d, 0xff, 0
+#define RG_CSMA_SEED_1 (0x2e)
+#define SR_CSMA_SEED_1         0x2e, 0x07, 0
+#define SR_AACK_I_AM_COORD     0x2e, 0x08, 3
+#define SR_AACK_DIS_ACK                0x2e, 0x10, 4
+#define SR_AACK_SET_PD         0x2e, 0x20, 5
+#define SR_AACK_FVN_MODE       0x2e, 0xc0, 6
+#define RG_CSMA_BE     (0x2f)
+#define SR_MIN_BE              0x2f, 0x0f, 0
+#define SR_MAX_BE              0x2f, 0xf0, 4
 
 #define CMD_REG                0x80
 #define CMD_REG_MASK   0x3f
@@ -292,6 +294,8 @@ struct at86rf230_local {
 #define STATE_BUSY_RX_AACK_NOCLK 0x1E
 #define STATE_TRANSITION_IN_PROGRESS 0x1F
 
+#define TRX_STATE_MASK         (0x1F)
+
 #define AT86RF2XX_NUMREGS 0x3F
 
 static void
@@ -336,6 +340,14 @@ at86rf230_write_subreg(struct at86rf230_local *lp,
        return regmap_update_bits(lp->regmap, addr, mask, data << shift);
 }
 
+static inline void
+at86rf230_slp_tr_rising_edge(struct at86rf230_local *lp)
+{
+       gpio_set_value(lp->slp_tr, 1);
+       udelay(1);
+       gpio_set_value(lp->slp_tr, 0);
+}
+
 static bool
 at86rf230_reg_writeable(struct device *dev, unsigned int reg)
 {
@@ -509,7 +521,7 @@ at86rf230_async_state_assert(void *context)
        struct at86rf230_state_change *ctx = context;
        struct at86rf230_local *lp = ctx->lp;
        const u8 *buf = ctx->buf;
-       const u8 trx_state = buf[1] & 0x1f;
+       const u8 trx_state = buf[1] & TRX_STATE_MASK;
 
        /* Assert state change */
        if (trx_state != ctx->to_state) {
@@ -609,11 +621,17 @@ at86rf230_async_state_delay(void *context)
                switch (ctx->to_state) {
                case STATE_RX_AACK_ON:
                        tim = ktime_set(0, c->t_off_to_aack * NSEC_PER_USEC);
+                       /* state change from TRX_OFF to RX_AACK_ON to do a
+                        * calibration, we need to reset the timeout for the
+                        * next one.
+                        */
+                       lp->cal_timeout = jiffies + AT86RF2XX_CAL_LOOP_TIMEOUT;
                        goto change;
+               case STATE_TX_ARET_ON:
                case STATE_TX_ON:
                        tim = ktime_set(0, c->t_off_to_tx_on * NSEC_PER_USEC);
-                       /* state change from TRX_OFF to TX_ON to do a
-                        * calibration, we need to reset the timeout for the
+                       /* state change from TRX_OFF to TX_ON or ARET_ON to do
+                        * calibration, we need to reset the timeout for the
                         * next one.
                         */
                        lp->cal_timeout = jiffies + AT86RF2XX_CAL_LOOP_TIMEOUT;
@@ -667,7 +685,7 @@ at86rf230_async_state_change_start(void *context)
        struct at86rf230_state_change *ctx = context;
        struct at86rf230_local *lp = ctx->lp;
        u8 *buf = ctx->buf;
-       const u8 trx_state = buf[1] & 0x1f;
+       const u8 trx_state = buf[1] & TRX_STATE_MASK;
        int rc;
 
        /* Check for "possible" STATE_TRANSITION_IN_PROGRESS */
@@ -773,16 +791,6 @@ at86rf230_tx_on(void *context)
 }
 
 static void
-at86rf230_tx_trac_error(void *context)
-{
-       struct at86rf230_state_change *ctx = context;
-       struct at86rf230_local *lp = ctx->lp;
-
-       at86rf230_async_state_change(lp, ctx, STATE_TX_ON,
-                                    at86rf230_tx_on, true);
-}
-
-static void
 at86rf230_tx_trac_check(void *context)
 {
        struct at86rf230_state_change *ctx = context;
@@ -791,12 +799,12 @@ at86rf230_tx_trac_check(void *context)
        const u8 trac = (buf[1] & 0xe0) >> 5;
 
        /* If trac status is different than zero we need to do a state change
-        * to STATE_FORCE_TRX_OFF then STATE_TX_ON to recover the transceiver
-        * state to TX_ON.
+        * to STATE_FORCE_TRX_OFF then STATE_RX_AACK_ON to recover the
+        * transceiver.
         */
        if (trac)
                at86rf230_async_state_change(lp, ctx, STATE_FORCE_TRX_OFF,
-                                            at86rf230_tx_trac_error, true);
+                                            at86rf230_tx_on, true);
        else
                at86rf230_tx_on(context);
 }
@@ -941,13 +949,18 @@ at86rf230_write_frame_complete(void *context)
        u8 *buf = ctx->buf;
        int rc;
 
-       buf[0] = (RG_TRX_STATE & CMD_REG_MASK) | CMD_REG | CMD_WRITE;
-       buf[1] = STATE_BUSY_TX;
        ctx->trx.len = 2;
-       ctx->msg.complete = NULL;
-       rc = spi_async(lp->spi, &ctx->msg);
-       if (rc)
-               at86rf230_async_error(lp, ctx, rc);
+
+       if (gpio_is_valid(lp->slp_tr)) {
+               at86rf230_slp_tr_rising_edge(lp);
+       } else {
+               buf[0] = (RG_TRX_STATE & CMD_REG_MASK) | CMD_REG | CMD_WRITE;
+               buf[1] = STATE_BUSY_TX;
+               ctx->msg.complete = NULL;
+               rc = spi_async(lp->spi, &ctx->msg);
+               if (rc)
+                       at86rf230_async_error(lp, ctx, rc);
+       }
 }
 
 static void
@@ -993,12 +1006,21 @@ at86rf230_xmit_start(void *context)
         * are in STATE_TX_ON. The pfad differs here, so we change
         * the complete handler.
         */
-       if (lp->tx_aret)
-               at86rf230_async_state_change(lp, ctx, STATE_TX_ON,
-                                            at86rf230_xmit_tx_on, false);
-       else
+       if (lp->tx_aret) {
+               if (lp->is_tx_from_off) {
+                       lp->is_tx_from_off = false;
+                       at86rf230_async_state_change(lp, ctx, STATE_TX_ARET_ON,
+                                                    at86rf230_xmit_tx_on,
+                                                    false);
+               } else {
+                       at86rf230_async_state_change(lp, ctx, STATE_TX_ON,
+                                                    at86rf230_xmit_tx_on,
+                                                    false);
+               }
+       } else {
                at86rf230_async_state_change(lp, ctx, STATE_TX_ON,
                                             at86rf230_write_frame, false);
+       }
 }
 
 static int
@@ -1017,11 +1039,13 @@ at86rf230_xmit(struct ieee802154_hw *hw, struct sk_buff *skb)
         * to TX_ON, the lp->cal_timeout should be reinit by state_delay
         * function then to start in the next 5 minutes.
         */
-       if (time_is_before_jiffies(lp->cal_timeout))
+       if (time_is_before_jiffies(lp->cal_timeout)) {
+               lp->is_tx_from_off = true;
                at86rf230_async_state_change(lp, ctx, STATE_TRX_OFF,
                                             at86rf230_xmit_start, false);
-       else
+       } else {
                at86rf230_xmit_start(ctx);
+       }
 
        return 0;
 }
@@ -1037,9 +1061,6 @@ at86rf230_ed(struct ieee802154_hw *hw, u8 *level)
 static int
 at86rf230_start(struct ieee802154_hw *hw)
 {
-       struct at86rf230_local *lp = hw->priv;
-
-       lp->cal_timeout = jiffies + AT86RF2XX_CAL_LOOP_TIMEOUT;
        return at86rf230_sync_state_change(hw->priv, STATE_RX_AACK_ON);
 }
 
@@ -1673,6 +1694,7 @@ static int at86rf230_probe(struct spi_device *spi)
        lp = hw->priv;
        lp->hw = hw;
        lp->spi = spi;
+       lp->slp_tr = slp_tr;
        hw->parent = &spi->dev;
        hw->vif_data_size = sizeof(*lp);
        ieee802154_random_extended_addr(&hw->phy->perm_extended_addr);
index b227a13..9f59f17 100644 (file)
@@ -599,10 +599,18 @@ static int macvlan_open(struct net_device *dev)
                        goto del_unicast;
        }
 
+       if (dev->flags & IFF_PROMISC) {
+               err = dev_set_promiscuity(lowerdev, 1);
+               if (err < 0)
+                       goto clear_multi;
+       }
+
 hash_add:
        macvlan_hash_add(vlan);
        return 0;
 
+clear_multi:
+       dev_set_allmulti(lowerdev, -1);
 del_unicast:
        dev_uc_del(lowerdev, dev->dev_addr);
 out:
@@ -638,6 +646,9 @@ static int macvlan_stop(struct net_device *dev)
        if (dev->flags & IFF_ALLMULTI)
                dev_set_allmulti(lowerdev, -1);
 
+       if (dev->flags & IFF_PROMISC)
+               dev_set_promiscuity(lowerdev, -1);
+
        dev_uc_del(lowerdev, dev->dev_addr);
 
 hash_del:
@@ -696,6 +707,10 @@ static void macvlan_change_rx_flags(struct net_device *dev, int change)
        if (dev->flags & IFF_UP) {
                if (change & IFF_ALLMULTI)
                        dev_set_allmulti(lowerdev, dev->flags & IFF_ALLMULTI ? 1 : -1);
+               if (change & IFF_PROMISC)
+                       dev_set_promiscuity(lowerdev,
+                                           dev->flags & IFF_PROMISC ? 1 : -1);
+
        }
 }
 
index 8fadaa1..70641d2 100644 (file)
@@ -27,6 +27,7 @@ config AMD_PHY
 config AMD_XGBE_PHY
        tristate "Driver for the AMD 10GbE (amd-xgbe) PHYs"
        depends on (OF || ACPI) && HAS_IOMEM
+       depends on ARM64 || COMPILE_TEST
        ---help---
          Currently supports the AMD 10GbE PHY
 
index fb276f6..34a75cb 100644 (file)
@@ -755,6 +755,45 @@ static int amd_xgbe_phy_set_mode(struct phy_device *phydev,
        return ret;
 }
 
+static bool amd_xgbe_phy_use_xgmii_mode(struct phy_device *phydev)
+{
+       if (phydev->autoneg == AUTONEG_ENABLE) {
+               if (phydev->advertising & ADVERTISED_10000baseKR_Full)
+                       return true;
+       } else {
+               if (phydev->speed == SPEED_10000)
+                       return true;
+       }
+
+       return false;
+}
+
+static bool amd_xgbe_phy_use_gmii_2500_mode(struct phy_device *phydev)
+{
+       if (phydev->autoneg == AUTONEG_ENABLE) {
+               if (phydev->advertising & ADVERTISED_2500baseX_Full)
+                       return true;
+       } else {
+               if (phydev->speed == SPEED_2500)
+                       return true;
+       }
+
+       return false;
+}
+
+static bool amd_xgbe_phy_use_gmii_mode(struct phy_device *phydev)
+{
+       if (phydev->autoneg == AUTONEG_ENABLE) {
+               if (phydev->advertising & ADVERTISED_1000baseKX_Full)
+                       return true;
+       } else {
+               if (phydev->speed == SPEED_1000)
+                       return true;
+       }
+
+       return false;
+}
+
 static int amd_xgbe_phy_set_an(struct phy_device *phydev, bool enable,
                               bool restart)
 {
@@ -1235,11 +1274,11 @@ static int amd_xgbe_phy_config_init(struct phy_device *phydev)
        /* Set initial mode - call the mode setting routines
         * directly to insure we are properly configured
         */
-       if (phydev->advertising & SUPPORTED_10000baseKR_Full)
+       if (amd_xgbe_phy_use_xgmii_mode(phydev))
                ret = amd_xgbe_phy_xgmii_mode(phydev);
-       else if (phydev->advertising & SUPPORTED_1000baseKX_Full)
+       else if (amd_xgbe_phy_use_gmii_mode(phydev))
                ret = amd_xgbe_phy_gmii_mode(phydev);
-       else if (phydev->advertising & SUPPORTED_2500baseX_Full)
+       else if (amd_xgbe_phy_use_gmii_2500_mode(phydev))
                ret = amd_xgbe_phy_gmii_2500_mode(phydev);
        else
                ret = -EINVAL;
index 64c74c6..b5dc59d 100644 (file)
@@ -404,7 +404,7 @@ static struct phy_driver bcm7xxx_driver[] = {
        .name           = "Broadcom BCM7425",
        .features       = PHY_GBIT_FEATURES |
                          SUPPORTED_Pause | SUPPORTED_Asym_Pause,
-       .flags          = 0,
+       .flags          = PHY_IS_INTERNAL,
        .config_init    = bcm7xxx_config_init,
        .config_aneg    = genphy_config_aneg,
        .read_status    = genphy_read_status,
index 496e02f..00cb41e 100644 (file)
@@ -47,7 +47,7 @@
 #define PSF_TX         0x1000
 #define EXT_EVENT      1
 #define CAL_EVENT      7
-#define CAL_TRIGGER    7
+#define CAL_TRIGGER    1
 #define DP83640_N_PINS 12
 
 #define MII_DP83640_MICR 0x11
@@ -496,7 +496,9 @@ static int ptp_dp83640_enable(struct ptp_clock_info *ptp,
                        else
                                evnt |= EVNT_RISE;
                }
+               mutex_lock(&clock->extreg_lock);
                ext_write(0, phydev, PAGE5, PTP_EVNT, evnt);
+               mutex_unlock(&clock->extreg_lock);
                return 0;
 
        case PTP_CLK_REQ_PEROUT:
@@ -532,6 +534,8 @@ static u8 status_frame_src[6] = { 0x08, 0x00, 0x17, 0x0B, 0x6B, 0x0F };
 
 static void enable_status_frames(struct phy_device *phydev, bool on)
 {
+       struct dp83640_private *dp83640 = phydev->priv;
+       struct dp83640_clock *clock = dp83640->clock;
        u16 cfg0 = 0, ver;
 
        if (on)
@@ -539,9 +543,13 @@ static void enable_status_frames(struct phy_device *phydev, bool on)
 
        ver = (PSF_PTPVER & VERSIONPTP_MASK) << VERSIONPTP_SHIFT;
 
+       mutex_lock(&clock->extreg_lock);
+
        ext_write(0, phydev, PAGE5, PSF_CFG0, cfg0);
        ext_write(0, phydev, PAGE6, PSF_CFG1, ver);
 
+       mutex_unlock(&clock->extreg_lock);
+
        if (!phydev->attached_dev) {
                pr_warn("expected to find an attached netdevice\n");
                return;
@@ -838,7 +846,7 @@ static void decode_rxts(struct dp83640_private *dp83640,
        list_del_init(&rxts->list);
        phy2rxts(phy_rxts, rxts);
 
-       spin_lock_irqsave(&dp83640->rx_queue.lock, flags);
+       spin_lock(&dp83640->rx_queue.lock);
        skb_queue_walk(&dp83640->rx_queue, skb) {
                struct dp83640_skb_info *skb_info;
 
@@ -853,7 +861,7 @@ static void decode_rxts(struct dp83640_private *dp83640,
                        break;
                }
        }
-       spin_unlock_irqrestore(&dp83640->rx_queue.lock, flags);
+       spin_unlock(&dp83640->rx_queue.lock);
 
        if (!shhwtstamps)
                list_add_tail(&rxts->list, &dp83640->rxts);
@@ -1173,11 +1181,18 @@ static int dp83640_config_init(struct phy_device *phydev)
 
        if (clock->chosen && !list_empty(&clock->phylist))
                recalibrate(clock);
-       else
+       else {
+               mutex_lock(&clock->extreg_lock);
                enable_broadcast(phydev, clock->page, 1);
+               mutex_unlock(&clock->extreg_lock);
+       }
 
        enable_status_frames(phydev, true);
+
+       mutex_lock(&clock->extreg_lock);
        ext_write(0, phydev, PAGE4, PTP_CTL, PTP_ENABLE);
+       mutex_unlock(&clock->extreg_lock);
+
        return 0;
 }
 
index c9cb486..53d1815 100644 (file)
@@ -168,7 +168,10 @@ static struct mii_bus *mdio_gpio_bus_init(struct device *dev,
                if (!new_bus->irq[i])
                        new_bus->irq[i] = PHY_POLL;
 
-       snprintf(new_bus->id, MII_BUS_ID_SIZE, "gpio-%x", bus_id);
+       if (bus_id != -1)
+               snprintf(new_bus->id, MII_BUS_ID_SIZE, "gpio-%x", bus_id);
+       else
+               strncpy(new_bus->id, "gpio", MII_BUS_ID_SIZE);
 
        if (devm_gpio_request(dev, bitbang->mdc, "mdc"))
                goto out_free_bus;
index 1190fd8..ebdc357 100644 (file)
@@ -548,7 +548,8 @@ static int kszphy_probe(struct phy_device *phydev)
        }
 
        clk = devm_clk_get(&phydev->dev, "rmii-ref");
-       if (!IS_ERR(clk)) {
+       /* NOTE: clk may be NULL if building without CONFIG_HAVE_CLK */
+       if (!IS_ERR_OR_NULL(clk)) {
                unsigned long rate = clk_get_rate(clk);
                bool rmii_ref_clk_sel_25_mhz;
 
index 52cd8db..47cd578 100644 (file)
@@ -742,6 +742,9 @@ EXPORT_SYMBOL(phy_stop);
  */
 void phy_start(struct phy_device *phydev)
 {
+       bool do_resume = false;
+       int err = 0;
+
        mutex_lock(&phydev->lock);
 
        switch (phydev->state) {
@@ -752,11 +755,22 @@ void phy_start(struct phy_device *phydev)
                phydev->state = PHY_UP;
                break;
        case PHY_HALTED:
+               /* make sure interrupts are re-enabled for the PHY */
+               err = phy_enable_interrupts(phydev);
+               if (err < 0)
+                       break;
+
                phydev->state = PHY_RESUMING;
+               do_resume = true;
+               break;
        default:
                break;
        }
        mutex_unlock(&phydev->lock);
+
+       /* if phy was suspended, bring the physical link up again */
+       if (do_resume)
+               phy_resume(phydev);
 }
 EXPORT_SYMBOL(phy_start);
 
@@ -769,7 +783,7 @@ void phy_state_machine(struct work_struct *work)
        struct delayed_work *dwork = to_delayed_work(work);
        struct phy_device *phydev =
                        container_of(dwork, struct phy_device, state_queue);
-       bool needs_aneg = false, do_suspend = false, do_resume = false;
+       bool needs_aneg = false, do_suspend = false;
        int err = 0;
 
        mutex_lock(&phydev->lock);
@@ -888,14 +902,6 @@ void phy_state_machine(struct work_struct *work)
                }
                break;
        case PHY_RESUMING:
-               err = phy_clear_interrupt(phydev);
-               if (err)
-                       break;
-
-               err = phy_config_interrupt(phydev, PHY_INTERRUPT_ENABLED);
-               if (err)
-                       break;
-
                if (AUTONEG_ENABLE == phydev->autoneg) {
                        err = phy_aneg_done(phydev);
                        if (err < 0)
@@ -933,7 +939,6 @@ void phy_state_machine(struct work_struct *work)
                        }
                        phydev->adjust_link(phydev->attached_dev);
                }
-               do_resume = true;
                break;
        }
 
@@ -943,8 +948,6 @@ void phy_state_machine(struct work_struct *work)
                err = phy_start_aneg(phydev);
        else if (do_suspend)
                phy_suspend(phydev);
-       else if (do_resume)
-               phy_resume(phydev);
 
        if (err < 0)
                phy_error(phydev);
@@ -1053,13 +1056,14 @@ int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable)
 {
        /* According to 802.3az,the EEE is supported only in full duplex-mode.
         * Also EEE feature is active when core is operating with MII, GMII
-        * or RGMII. Internal PHYs are also allowed to proceed and should
-        * return an error if they do not support EEE.
+        * or RGMII (all kinds). Internal PHYs are also allowed to proceed and
+        * should return an error if they do not support EEE.
         */
        if ((phydev->duplex == DUPLEX_FULL) &&
            ((phydev->interface == PHY_INTERFACE_MODE_MII) ||
            (phydev->interface == PHY_INTERFACE_MODE_GMII) ||
-           (phydev->interface == PHY_INTERFACE_MODE_RGMII) ||
+           (phydev->interface >= PHY_INTERFACE_MODE_RGMII &&
+            phydev->interface <= PHY_INTERFACE_MODE_RGMII_TXID) ||
             phy_is_internal(phydev))) {
                int eee_lp, eee_cap, eee_adv;
                u32 lp, cap, adv;
index aa1dd92..b62a5e3 100644 (file)
@@ -465,6 +465,10 @@ static void pppoe_unbind_sock_work(struct work_struct *work)
        struct sock *sk = sk_pppox(po);
 
        lock_sock(sk);
+       if (po->pppoe_dev) {
+               dev_put(po->pppoe_dev);
+               po->pppoe_dev = NULL;
+       }
        pppox_unbind_sock(sk);
        release_sock(sk);
        sock_put(sk);
index c3e4da9..8067b8f 100644 (file)
@@ -1182,7 +1182,7 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
         * payload data instead.
         */
        usbnet_set_skb_tx_stats(skb_out, n,
-                               ctx->tx_curr_frame_payload - skb_out->len);
+                               (long)ctx->tx_curr_frame_payload - skb_out->len);
 
        return skb_out;
 
index ac4d03b..aafa1a1 100644 (file)
@@ -4116,6 +4116,7 @@ static struct usb_device_id rtl8152_table[] = {
        {REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8153)},
        {REALTEK_USB_DEVICE(VENDOR_ID_SAMSUNG, 0xa101)},
        {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0x7205)},
+       {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0x304f)},
        {}
 };
 
index 733f4fe..3c86b10 100644 (file)
@@ -1285,7 +1285,7 @@ netdev_tx_t usbnet_start_xmit (struct sk_buff *skb,
                                     struct net_device *net)
 {
        struct usbnet           *dev = netdev_priv(net);
-       int                     length;
+       unsigned int                    length;
        struct urb              *urb = NULL;
        struct skb_data         *entry;
        struct driver_info      *info = dev->driver_info;
@@ -1413,7 +1413,7 @@ not_drop:
                }
        } else
                netif_dbg(dev, tx_queued, dev->net,
-                         "> tx, len %d, type 0x%x\n", length, skb->protocol);
+                         "> tx, len %u, type 0x%x\n", length, skb->protocol);
 #ifdef CONFIG_PM
 deferred:
 #endif
index 27a5f95..21a0fbf 100644 (file)
@@ -2961,7 +2961,7 @@ static void __net_exit vxlan_exit_net(struct net *net)
                 * to the list by the previous loop.
                 */
                if (!net_eq(dev_net(vxlan->dev), net))
-                       unregister_netdevice_queue(dev, &list);
+                       unregister_netdevice_queue(vxlan->dev, &list);
        }
 
        unregister_netdevice_many(&list);
index 0acd079..3ad79bb 100644 (file)
@@ -1103,28 +1103,14 @@ static u8 ath_get_rate_txpower(struct ath_softc *sc, struct ath_buf *bf,
        struct sk_buff *skb;
        struct ath_frame_info *fi;
        struct ieee80211_tx_info *info;
-       struct ieee80211_vif *vif;
        struct ath_hw *ah = sc->sc_ah;
 
        if (sc->tx99_state || !ah->tpc_enabled)
                return MAX_RATE_POWER;
 
        skb = bf->bf_mpdu;
-       info = IEEE80211_SKB_CB(skb);
-       vif = info->control.vif;
-
-       if (!vif) {
-               max_power = sc->cur_chan->cur_txpower;
-               goto out;
-       }
-
-       if (vif->bss_conf.txpower_type != NL80211_TX_POWER_LIMITED) {
-               max_power = min_t(u8, sc->cur_chan->cur_txpower,
-                                 2 * vif->bss_conf.txpower);
-               goto out;
-       }
-
        fi = get_frame_info(skb);
+       info = IEEE80211_SKB_CB(skb);
 
        if (!AR_SREV_9300_20_OR_LATER(ah)) {
                int txpower = fi->tx_power;
@@ -1161,25 +1147,26 @@ static u8 ath_get_rate_txpower(struct ath_softc *sc, struct ath_buf *bf,
                        txpower -= 2;
 
                txpower = max(txpower, 0);
-               max_power = min_t(u8, ah->tx_power[rateidx],
-                                 2 * vif->bss_conf.txpower);
-               max_power = min_t(u8, max_power, txpower);
+               max_power = min_t(u8, ah->tx_power[rateidx], txpower);
+
+               /* XXX: clamp minimum TX power at 1 for AR9160 since if
+                * max_power is set to 0, frames are transmitted at max
+                * TX power
+                */
+               if (!max_power && !AR_SREV_9280_20_OR_LATER(ah))
+                       max_power = 1;
        } else if (!bf->bf_state.bfs_paprd) {
                if (rateidx < 8 && (info->flags & IEEE80211_TX_CTL_STBC))
                        max_power = min_t(u8, ah->tx_power_stbc[rateidx],
-                                         2 * vif->bss_conf.txpower);
+                                         fi->tx_power);
                else
                        max_power = min_t(u8, ah->tx_power[rateidx],
-                                         2 * vif->bss_conf.txpower);
-               max_power = min(max_power, fi->tx_power);
+                                         fi->tx_power);
        } else {
                max_power = ah->paprd_training_power;
        }
-out:
-       /* XXX: clamp minimum TX power at 1 for AR9160 since if max_power
-        * is set to 0, frames are transmitted at max TX power
-        */
-       return (!max_power && !AR_SREV_9280_20_OR_LATER(ah)) ? 1 : max_power;
+
+       return max_power;
 }
 
 static void ath_buf_set_rate(struct ath_softc *sc, struct ath_buf *bf,
@@ -2129,6 +2116,7 @@ static void setup_frame_info(struct ieee80211_hw *hw,
        struct ath_node *an = NULL;
        enum ath9k_key_type keytype;
        bool short_preamble = false;
+       u8 txpower;
 
        /*
         * We check if Short Preamble is needed for the CTS rate by
@@ -2145,6 +2133,16 @@ static void setup_frame_info(struct ieee80211_hw *hw,
        if (sta)
                an = (struct ath_node *) sta->drv_priv;
 
+       if (tx_info->control.vif) {
+               struct ieee80211_vif *vif = tx_info->control.vif;
+
+               txpower = 2 * vif->bss_conf.txpower;
+       } else {
+               struct ath_softc *sc = hw->priv;
+
+               txpower = sc->cur_chan->cur_txpower;
+       }
+
        memset(fi, 0, sizeof(*fi));
        fi->txq = -1;
        if (hw_key)
@@ -2155,7 +2153,7 @@ static void setup_frame_info(struct ieee80211_hw *hw,
                fi->keyix = ATH9K_TXKEYIX_INVALID;
        fi->keytype = keytype;
        fi->framelen = framelen;
-       fi->tx_power = MAX_RATE_POWER;
+       fi->tx_power = txpower;
 
        if (!rate)
                return;
index 4ec9811..65efb14 100644 (file)
@@ -511,11 +511,9 @@ static int brcmf_msgbuf_query_dcmd(struct brcmf_pub *drvr, int ifidx,
                                     msgbuf->rx_pktids,
                                     msgbuf->ioctl_resp_pktid);
        if (msgbuf->ioctl_resp_ret_len != 0) {
-               if (!skb) {
-                       brcmf_err("Invalid packet id idx recv'd %d\n",
-                                 msgbuf->ioctl_resp_pktid);
+               if (!skb)
                        return -EBADF;
-               }
+
                memcpy(buf, skb->data, (len < msgbuf->ioctl_resp_ret_len) ?
                                       len : msgbuf->ioctl_resp_ret_len);
        }
@@ -874,10 +872,8 @@ brcmf_msgbuf_process_txstatus(struct brcmf_msgbuf *msgbuf, void *buf)
        flowid -= BRCMF_NROF_H2D_COMMON_MSGRINGS;
        skb = brcmf_msgbuf_get_pktid(msgbuf->drvr->bus_if->dev,
                                     msgbuf->tx_pktids, idx);
-       if (!skb) {
-               brcmf_err("Invalid packet id idx recv'd %d\n", idx);
+       if (!skb)
                return;
-       }
 
        set_bit(flowid, msgbuf->txstatus_done_map);
        commonring = msgbuf->flowrings[flowid];
@@ -1156,6 +1152,8 @@ brcmf_msgbuf_process_rx_complete(struct brcmf_msgbuf *msgbuf, void *buf)
 
        skb = brcmf_msgbuf_get_pktid(msgbuf->drvr->bus_if->dev,
                                     msgbuf->rx_pktids, idx);
+       if (!skb)
+               return;
 
        if (data_offset)
                skb_pull(skb, data_offset);
index ab019b4..f89f446 100644 (file)
@@ -21,6 +21,7 @@ config IWLWIFI
                Intel 7260 Wi-Fi Adapter
                Intel 3160 Wi-Fi Adapter
                Intel 7265 Wi-Fi Adapter
+               Intel 3165 Wi-Fi Adapter
 
 
          This driver uses the kernel's mac80211 subsystem.
index 36e786f..74ad278 100644 (file)
 
 /* Highest firmware API version supported */
 #define IWL7260_UCODE_API_MAX  13
-#define IWL3160_UCODE_API_MAX  13
 
 /* Oldest version we won't warn about */
 #define IWL7260_UCODE_API_OK   12
-#define IWL3160_UCODE_API_OK   12
+#define IWL3165_UCODE_API_OK   13
 
 /* Lowest firmware API version supported */
 #define IWL7260_UCODE_API_MIN  10
-#define IWL3160_UCODE_API_MIN  10
+#define IWL3165_UCODE_API_MIN  13
 
 /* NVM versions */
 #define IWL7260_NVM_VERSION            0x0a1d
 #define IWL3160_FW_PRE "iwlwifi-3160-"
 #define IWL3160_MODULE_FIRMWARE(api) IWL3160_FW_PRE __stringify(api) ".ucode"
 
-#define IWL3165_FW_PRE "iwlwifi-3165-"
-#define IWL3165_MODULE_FIRMWARE(api) IWL3165_FW_PRE __stringify(api) ".ucode"
-
 #define IWL7265_FW_PRE "iwlwifi-7265-"
 #define IWL7265_MODULE_FIRMWARE(api) IWL7265_FW_PRE __stringify(api) ".ucode"
 
@@ -248,8 +244,13 @@ static const struct iwl_ht_params iwl7265_ht_params = {
 
 const struct iwl_cfg iwl3165_2ac_cfg = {
        .name = "Intel(R) Dual Band Wireless AC 3165",
-       .fw_name_pre = IWL3165_FW_PRE,
+       .fw_name_pre = IWL7265D_FW_PRE,
        IWL_DEVICE_7000,
+       /* sparse doens't like the re-assignment but it is safe */
+#ifndef __CHECKER__
+       .ucode_api_ok = IWL3165_UCODE_API_OK,
+       .ucode_api_min = IWL3165_UCODE_API_MIN,
+#endif
        .ht_params = &iwl7000_ht_params,
        .nvm_ver = IWL3165_NVM_VERSION,
        .nvm_calib_ver = IWL3165_TX_POWER_VERSION,
@@ -325,6 +326,5 @@ const struct iwl_cfg iwl7265d_n_cfg = {
 
 MODULE_FIRMWARE(IWL7260_MODULE_FIRMWARE(IWL7260_UCODE_API_OK));
 MODULE_FIRMWARE(IWL3160_MODULE_FIRMWARE(IWL3160_UCODE_API_OK));
-MODULE_FIRMWARE(IWL3165_MODULE_FIRMWARE(IWL3160_UCODE_API_OK));
 MODULE_FIRMWARE(IWL7265_MODULE_FIRMWARE(IWL7260_UCODE_API_OK));
 MODULE_FIRMWARE(IWL7265D_MODULE_FIRMWARE(IWL7260_UCODE_API_OK));
index 41ff85d..21302b6 100644 (file)
@@ -6,6 +6,7 @@
  * GPL LICENSE SUMMARY
  *
  * Copyright(c) 2008 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2015 Intel Mobile Communications GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -31,6 +32,7 @@
  * BSD LICENSE
  *
  * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2015 Intel Mobile Communications GmbH
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -748,6 +750,9 @@ void iwl_init_ht_hw_capab(const struct iwl_cfg *cfg,
                return;
        }
 
+       if (data->sku_cap_mimo_disabled)
+               rx_chains = 1;
+
        ht_info->ht_supported = true;
        ht_info->cap = IEEE80211_HT_CAP_DSSSCCK40;
 
index 5234a0b..750c8c9 100644 (file)
@@ -6,6 +6,7 @@
  * GPL LICENSE SUMMARY
  *
  * Copyright(c) 2008 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2015 Intel Mobile Communications GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -31,6 +32,7 @@
  * BSD LICENSE
  *
  * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2015 Intel Mobile Communications GmbH
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -84,6 +86,7 @@ struct iwl_nvm_data {
        bool sku_cap_11ac_enable;
        bool sku_cap_amt_enable;
        bool sku_cap_ipan_enable;
+       bool sku_cap_mimo_disabled;
 
        u16 radio_cfg_type;
        u8 radio_cfg_step;
index bfdf3fa..62db2e5 100644 (file)
@@ -244,6 +244,7 @@ enum iwl_ucode_tlv_flag {
  *     longer than the passive one, which is essential for fragmented scan.
  * @IWL_UCODE_TLV_API_WIFI_MCC_UPDATE: ucode supports MCC updates with source.
  * IWL_UCODE_TLV_API_HDC_PHASE_0: ucode supports finer configuration of LTR
+ * @IWL_UCODE_TLV_API_TX_POWER_DEV: new API for tx power.
  * @IWL_UCODE_TLV_API_BASIC_DWELL: use only basic dwell time in scan command,
  *     regardless of the band or the number of the probes. FW will calculate
  *     the actual dwell time.
@@ -260,6 +261,7 @@ enum iwl_ucode_tlv_api {
        IWL_UCODE_TLV_API_FRAGMENTED_SCAN       = BIT(8),
        IWL_UCODE_TLV_API_WIFI_MCC_UPDATE       = BIT(9),
        IWL_UCODE_TLV_API_HDC_PHASE_0           = BIT(10),
+       IWL_UCODE_TLV_API_TX_POWER_DEV          = BIT(11),
        IWL_UCODE_TLV_API_BASIC_DWELL           = BIT(13),
        IWL_UCODE_TLV_API_SCD_CFG               = BIT(15),
        IWL_UCODE_TLV_API_SINGLE_SCAN_EBS       = BIT(16),
index 83903a5..8e604a3 100644 (file)
@@ -6,7 +6,7 @@
  * GPL LICENSE SUMMARY
  *
  * Copyright(c) 2008 - 2014 Intel Corporation. All rights reserved.
- * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -32,7 +32,7 @@
  * BSD LICENSE
  *
  * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved.
- * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -116,10 +116,11 @@ enum family_8000_nvm_offsets {
 
 /* SKU Capabilities (actual values from NVM definition) */
 enum nvm_sku_bits {
-       NVM_SKU_CAP_BAND_24GHZ  = BIT(0),
-       NVM_SKU_CAP_BAND_52GHZ  = BIT(1),
-       NVM_SKU_CAP_11N_ENABLE  = BIT(2),
-       NVM_SKU_CAP_11AC_ENABLE = BIT(3),
+       NVM_SKU_CAP_BAND_24GHZ          = BIT(0),
+       NVM_SKU_CAP_BAND_52GHZ          = BIT(1),
+       NVM_SKU_CAP_11N_ENABLE          = BIT(2),
+       NVM_SKU_CAP_11AC_ENABLE         = BIT(3),
+       NVM_SKU_CAP_MIMO_DISABLE        = BIT(5),
 };
 
 /*
@@ -368,6 +369,11 @@ static void iwl_init_vht_hw_capab(const struct iwl_cfg *cfg,
        if (cfg->ht_params->ldpc)
                vht_cap->cap |= IEEE80211_VHT_CAP_RXLDPC;
 
+       if (data->sku_cap_mimo_disabled) {
+               num_rx_ants = 1;
+               num_tx_ants = 1;
+       }
+
        if (num_tx_ants > 1)
                vht_cap->cap |= IEEE80211_VHT_CAP_TXSTBC;
        else
@@ -465,7 +471,7 @@ static int iwl_get_radio_cfg(const struct iwl_cfg *cfg, const __le16 *nvm_sw,
        if (cfg->device_family != IWL_DEVICE_FAMILY_8000)
                return le16_to_cpup(nvm_sw + RADIO_CFG);
 
-       return le32_to_cpup((__le32 *)(nvm_sw + RADIO_CFG_FAMILY_8000));
+       return le32_to_cpup((__le32 *)(phy_sku + RADIO_CFG_FAMILY_8000));
 
 }
 
@@ -527,6 +533,10 @@ static void iwl_set_hw_address_family_8000(struct device *dev,
        const u8 *hw_addr;
 
        if (mac_override) {
+               static const u8 reserved_mac[] = {
+                       0x02, 0xcc, 0xaa, 0xff, 0xee, 0x00
+               };
+
                hw_addr = (const u8 *)(mac_override +
                                 MAC_ADDRESS_OVERRIDE_FAMILY_8000);
 
@@ -538,7 +548,12 @@ static void iwl_set_hw_address_family_8000(struct device *dev,
                data->hw_addr[4] = hw_addr[5];
                data->hw_addr[5] = hw_addr[4];
 
-               if (is_valid_ether_addr(data->hw_addr))
+               /*
+                * Force the use of the OTP MAC address in case of reserved MAC
+                * address in the NVM, or if address is given but invalid.
+                */
+               if (is_valid_ether_addr(data->hw_addr) &&
+                   memcmp(reserved_mac, hw_addr, ETH_ALEN) != 0)
                        return;
 
                IWL_ERR_DEV(dev,
@@ -610,6 +625,7 @@ iwl_parse_nvm_data(struct device *dev, const struct iwl_cfg *cfg,
                data->sku_cap_11n_enable = false;
        data->sku_cap_11ac_enable = data->sku_cap_11n_enable &&
                                    (sku & NVM_SKU_CAP_11AC_ENABLE);
+       data->sku_cap_mimo_disabled = sku & NVM_SKU_CAP_MIMO_DISABLE;
 
        data->n_hw_addrs = iwl_get_n_hw_addrs(cfg, nvm_sw);
 
index 6dfed12..56254a8 100644 (file)
@@ -6,7 +6,7 @@
  * GPL LICENSE SUMMARY
  *
  * Copyright(c) 2007 - 2014 Intel Corporation. All rights reserved.
- * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -32,7 +32,7 @@
  * BSD LICENSE
  *
  * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved.
- * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -421,8 +421,9 @@ struct iwl_trans_txq_scd_cfg {
  *
  * All the handlers MUST be implemented
  *
- * @start_hw: starts the HW- from that point on, the HW can send interrupts
- *     May sleep
+ * @start_hw: starts the HW. If low_power is true, the NIC needs to be taken
+ *     out of a low power state. From that point on, the HW can send
+ *     interrupts. May sleep.
  * @op_mode_leave: Turn off the HW RF kill indication if on
  *     May sleep
  * @start_fw: allocates and inits all the resources for the transport
@@ -432,10 +433,11 @@ struct iwl_trans_txq_scd_cfg {
  *     the SCD base address in SRAM, then provide it here, or 0 otherwise.
  *     May sleep
  * @stop_device: stops the whole device (embedded CPU put to reset) and stops
- *     the HW. From that point on, the HW will be in low power but will still
- *     issue interrupt if the HW RF kill is triggered. This callback must do
- *     the right thing and not crash even if start_hw() was called but not
- *     start_fw(). May sleep
+ *     the HW. If low_power is true, the NIC will be put in low power state.
+ *     From that point on, the HW will be stopped but will still issue an
+ *     interrupt if the HW RF kill switch is triggered.
+ *     This callback must do the right thing and not crash even if %start_hw()
+ *     was called but not &start_fw(). May sleep.
  * @d3_suspend: put the device into the correct mode for WoWLAN during
  *     suspend. This is optional, if not implemented WoWLAN will not be
  *     supported. This callback may sleep.
@@ -491,14 +493,14 @@ struct iwl_trans_txq_scd_cfg {
  */
 struct iwl_trans_ops {
 
-       int (*start_hw)(struct iwl_trans *iwl_trans);
+       int (*start_hw)(struct iwl_trans *iwl_trans, bool low_power);
        void (*op_mode_leave)(struct iwl_trans *iwl_trans);
        int (*start_fw)(struct iwl_trans *trans, const struct fw_img *fw,
                        bool run_in_rfkill);
        int (*update_sf)(struct iwl_trans *trans,
                         struct iwl_sf_region *st_fwrd_space);
        void (*fw_alive)(struct iwl_trans *trans, u32 scd_addr);
-       void (*stop_device)(struct iwl_trans *trans);
+       void (*stop_device)(struct iwl_trans *trans, bool low_power);
 
        void (*d3_suspend)(struct iwl_trans *trans, bool test);
        int (*d3_resume)(struct iwl_trans *trans, enum iwl_d3_status *status,
@@ -652,11 +654,16 @@ static inline void iwl_trans_configure(struct iwl_trans *trans,
        trans->ops->configure(trans, trans_cfg);
 }
 
-static inline int iwl_trans_start_hw(struct iwl_trans *trans)
+static inline int _iwl_trans_start_hw(struct iwl_trans *trans, bool low_power)
 {
        might_sleep();
 
-       return trans->ops->start_hw(trans);
+       return trans->ops->start_hw(trans, low_power);
+}
+
+static inline int iwl_trans_start_hw(struct iwl_trans *trans)
+{
+       return trans->ops->start_hw(trans, true);
 }
 
 static inline void iwl_trans_op_mode_leave(struct iwl_trans *trans)
@@ -703,15 +710,21 @@ static inline int iwl_trans_update_sf(struct iwl_trans *trans,
        return 0;
 }
 
-static inline void iwl_trans_stop_device(struct iwl_trans *trans)
+static inline void _iwl_trans_stop_device(struct iwl_trans *trans,
+                                         bool low_power)
 {
        might_sleep();
 
-       trans->ops->stop_device(trans);
+       trans->ops->stop_device(trans, low_power);
 
        trans->state = IWL_TRANS_NO_FW;
 }
 
+static inline void iwl_trans_stop_device(struct iwl_trans *trans)
+{
+       _iwl_trans_stop_device(trans, true);
+}
+
 static inline void iwl_trans_d3_suspend(struct iwl_trans *trans, bool test)
 {
        might_sleep();
index d954591..6ac6de2 100644 (file)
@@ -776,7 +776,7 @@ static int iwl_mvm_bt_coex_reduced_txp(struct iwl_mvm *mvm, u8 sta_id,
        struct iwl_host_cmd cmd = {
                .id = BT_CONFIG,
                .len = { sizeof(*bt_cmd), },
-               .dataflags = { IWL_HCMD_DFL_NOCOPY, },
+               .dataflags = { IWL_HCMD_DFL_DUP, },
                .flags = CMD_ASYNC,
        };
        struct iwl_mvm_sta *mvmsta;
index a6c48c7..4310cf1 100644 (file)
@@ -1726,7 +1726,7 @@ iwl_mvm_netdetect_query_results(struct iwl_mvm *mvm,
        results->matched_profiles = le32_to_cpu(query->matched_profiles);
        memcpy(results->matches, query->matches, sizeof(results->matches));
 
-#ifdef CPTCFG_IWLWIFI_DEBUGFS
+#ifdef CONFIG_IWLWIFI_DEBUGFS
        mvm->last_netdetect_scans = le32_to_cpu(query->n_scans_done);
 #endif
 
@@ -1750,8 +1750,10 @@ static void iwl_mvm_query_netdetect_reasons(struct iwl_mvm *mvm,
        int i, j, n_matches, ret;
 
        fw_status = iwl_mvm_get_wakeup_status(mvm, vif);
-       if (!IS_ERR_OR_NULL(fw_status))
+       if (!IS_ERR_OR_NULL(fw_status)) {
                reasons = le32_to_cpu(fw_status->wakeup_reasons);
+               kfree(fw_status);
+       }
 
        if (reasons & IWL_WOWLAN_WAKEUP_BY_RFKILL_DEASSERTED)
                wakeup.rfkill_release = true;
@@ -1868,15 +1870,15 @@ static int __iwl_mvm_resume(struct iwl_mvm *mvm, bool test)
        /* get the BSS vif pointer again */
        vif = iwl_mvm_get_bss_vif(mvm);
        if (IS_ERR_OR_NULL(vif))
-               goto out_unlock;
+               goto err;
 
        ret = iwl_trans_d3_resume(mvm->trans, &d3_status, test);
        if (ret)
-               goto out_unlock;
+               goto err;
 
        if (d3_status != IWL_D3_STATUS_ALIVE) {
                IWL_INFO(mvm, "Device was reset during suspend\n");
-               goto out_unlock;
+               goto err;
        }
 
        /* query SRAM first in case we want event logging */
@@ -1902,7 +1904,8 @@ static int __iwl_mvm_resume(struct iwl_mvm *mvm, bool test)
                goto out_iterate;
        }
 
- out_unlock:
+err:
+       iwl_mvm_free_nd(mvm);
        mutex_unlock(&mvm->mutex);
 
 out_iterate:
@@ -1915,6 +1918,14 @@ out:
        /* return 1 to reconfigure the device */
        set_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status);
        set_bit(IWL_MVM_STATUS_D3_RECONFIG, &mvm->status);
+
+       /* We always return 1, which causes mac80211 to do a reconfig
+        * with IEEE80211_RECONFIG_TYPE_RESTART.  This type of
+        * reconfig calls iwl_mvm_restart_complete(), where we unref
+        * the IWL_MVM_REF_UCODE_DOWN, so we need to take the
+        * reference here.
+        */
+       iwl_mvm_ref(mvm, IWL_MVM_REF_UCODE_DOWN);
        return 1;
 }
 
@@ -2021,7 +2032,6 @@ static int iwl_mvm_d3_test_release(struct inode *inode, struct file *file)
        __iwl_mvm_resume(mvm, true);
        rtnl_unlock();
        iwl_abort_notification_waits(&mvm->notif_wait);
-       iwl_mvm_ref(mvm, IWL_MVM_REF_UCODE_DOWN);
        ieee80211_restart_hw(mvm->hw);
 
        /* wait for restart and disconnect all interfaces */
index 4fc0938..b1baa33 100644 (file)
@@ -298,6 +298,40 @@ struct iwl_uapsd_misbehaving_ap_notif {
 } __packed;
 
 /**
+ * struct iwl_reduce_tx_power_cmd - TX power reduction command
+ * REDUCE_TX_POWER_CMD = 0x9f
+ * @flags: (reserved for future implementation)
+ * @mac_context_id: id of the mac ctx for which we are reducing TX power.
+ * @pwr_restriction: TX power restriction in dBms.
+ */
+struct iwl_reduce_tx_power_cmd {
+       u8 flags;
+       u8 mac_context_id;
+       __le16 pwr_restriction;
+} __packed; /* TX_REDUCED_POWER_API_S_VER_1 */
+
+/**
+ * struct iwl_dev_tx_power_cmd - TX power reduction command
+ * REDUCE_TX_POWER_CMD = 0x9f
+ * @set_mode: 0 - MAC tx power, 1 - device tx power
+ * @mac_context_id: id of the mac ctx for which we are reducing TX power.
+ * @pwr_restriction: TX power restriction in 1/8 dBms.
+ * @dev_24: device TX power restriction in 1/8 dBms
+ * @dev_52_low: device TX power restriction upper band - low
+ * @dev_52_high: device TX power restriction upper band - high
+ */
+struct iwl_dev_tx_power_cmd {
+       __le32 set_mode;
+       __le32 mac_context_id;
+       __le16 pwr_restriction;
+       __le16 dev_24;
+       __le16 dev_52_low;
+       __le16 dev_52_high;
+} __packed; /* TX_REDUCED_POWER_API_S_VER_2 */
+
+#define IWL_DEV_MAX_TX_POWER 0x7FFF
+
+/**
  * struct iwl_beacon_filter_cmd
  * REPLY_BEACON_FILTERING_CMD = 0xd2 (command)
  * @id_and_color: MAC contex identifier
index 4f81dcf..d6cced4 100644 (file)
@@ -122,46 +122,6 @@ enum iwl_scan_complete_status {
        SCAN_COMP_STATUS_ERR_ALLOC_TE = 0x0C,
 };
 
-/**
- * struct iwl_scan_results_notif - scan results for one channel
- * ( SCAN_RESULTS_NOTIFICATION = 0x83 )
- * @channel: which channel the results are from
- * @band: 0 for 5.2 GHz, 1 for 2.4 GHz
- * @probe_status: SCAN_PROBE_STATUS_*, indicates success of probe request
- * @num_probe_not_sent: # of request that weren't sent due to not enough time
- * @duration: duration spent in channel, in usecs
- * @statistics: statistics gathered for this channel
- */
-struct iwl_scan_results_notif {
-       u8 channel;
-       u8 band;
-       u8 probe_status;
-       u8 num_probe_not_sent;
-       __le32 duration;
-       __le32 statistics[SCAN_RESULTS_STATISTICS];
-} __packed; /* SCAN_RESULT_NTF_API_S_VER_2 */
-
-/**
- * struct iwl_scan_complete_notif - notifies end of scanning (all channels)
- * ( SCAN_COMPLETE_NOTIFICATION = 0x84 )
- * @scanned_channels: number of channels scanned (and number of valid results)
- * @status: one of SCAN_COMP_STATUS_*
- * @bt_status: BT on/off status
- * @last_channel: last channel that was scanned
- * @tsf_low: TSF timer (lower half) in usecs
- * @tsf_high: TSF timer (higher half) in usecs
- * @results: array of scan results, only "scanned_channels" of them are valid
- */
-struct iwl_scan_complete_notif {
-       u8 scanned_channels;
-       u8 status;
-       u8 bt_status;
-       u8 last_channel;
-       __le32 tsf_low;
-       __le32 tsf_high;
-       struct iwl_scan_results_notif results[];
-} __packed; /* SCAN_COMPLETE_NTF_API_S_VER_2 */
-
 /* scan offload */
 #define IWL_SCAN_MAX_BLACKLIST_LEN     64
 #define IWL_SCAN_SHORT_BLACKLIST_LEN   16
@@ -554,7 +514,7 @@ struct iwl_scan_req_unified_lmac {
 } __packed;
 
 /**
- * struct iwl_lmac_scan_results_notif - scan results for one channel -
+ * struct iwl_scan_results_notif - scan results for one channel -
  *     SCAN_RESULT_NTF_API_S_VER_3
  * @channel: which channel the results are from
  * @band: 0 for 5.2 GHz, 1 for 2.4 GHz
@@ -562,7 +522,7 @@ struct iwl_scan_req_unified_lmac {
  * @num_probe_not_sent: # of request that weren't sent due to not enough time
  * @duration: duration spent in channel, in usecs
  */
-struct iwl_lmac_scan_results_notif {
+struct iwl_scan_results_notif {
        u8 channel;
        u8 band;
        u8 probe_status;
index aab68cb..01b1da6 100644 (file)
@@ -281,19 +281,6 @@ struct iwl_tx_ant_cfg_cmd {
        __le32 valid;
 } __packed;
 
-/**
- * struct iwl_reduce_tx_power_cmd - TX power reduction command
- * REDUCE_TX_POWER_CMD = 0x9f
- * @flags: (reserved for future implementation)
- * @mac_context_id: id of the mac ctx for which we are reducing TX power.
- * @pwr_restriction: TX power restriction in dBms.
- */
-struct iwl_reduce_tx_power_cmd {
-       u8 flags;
-       u8 mac_context_id;
-       __le16 pwr_restriction;
-} __packed; /* TX_REDUCED_POWER_API_S_VER_1 */
-
 /*
  * Calibration control struct.
  * Sent as part of the phy configuration command.
index bc5eac4..df86963 100644 (file)
@@ -6,7 +6,7 @@
  * GPL LICENSE SUMMARY
  *
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
- * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -32,7 +32,7 @@
  * BSD LICENSE
  *
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
- * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -322,7 +322,7 @@ int iwl_run_init_mvm_ucode(struct iwl_mvm *mvm, bool read_nvm)
 
        lockdep_assert_held(&mvm->mutex);
 
-       if (WARN_ON_ONCE(mvm->init_ucode_complete || mvm->calibrating))
+       if (WARN_ON_ONCE(mvm->calibrating))
                return 0;
 
        iwl_init_notification_wait(&mvm->notif_wait,
@@ -396,8 +396,6 @@ int iwl_run_init_mvm_ucode(struct iwl_mvm *mvm, bool read_nvm)
         */
        ret = iwl_wait_notification(&mvm->notif_wait, &calib_wait,
                        MVM_UCODE_CALIB_TIMEOUT);
-       if (!ret)
-               mvm->init_ucode_complete = true;
 
        if (ret && iwl_mvm_is_radio_killed(mvm)) {
                IWL_DEBUG_RF_KILL(mvm, "RFKILL while calibrating.\n");
@@ -494,15 +492,6 @@ int iwl_mvm_fw_dbg_collect_desc(struct iwl_mvm *mvm,
 
        mvm->fw_dump_desc = desc;
 
-       /* stop recording */
-       if (mvm->cfg->device_family == IWL_DEVICE_FAMILY_7000) {
-               iwl_set_bits_prph(mvm->trans, MON_BUFF_SAMPLE_CTL, 0x100);
-       } else {
-               iwl_write_prph(mvm->trans, DBGC_IN_SAMPLE, 0);
-               /* wait before we collect the data till the DBGC stop */
-               udelay(100);
-       }
-
        queue_delayed_work(system_wq, &mvm->fw_dump_wk, delay);
 
        return 0;
@@ -658,25 +647,24 @@ int iwl_mvm_up(struct iwl_mvm *mvm)
         * module loading, load init ucode now
         * (for example, if we were in RFKILL)
         */
-       if (!mvm->init_ucode_complete) {
-               ret = iwl_run_init_mvm_ucode(mvm, false);
-               if (ret && !iwlmvm_mod_params.init_dbg) {
-                       IWL_ERR(mvm, "Failed to run INIT ucode: %d\n", ret);
-                       /* this can't happen */
-                       if (WARN_ON(ret > 0))
-                               ret = -ERFKILL;
-                       goto error;
-               }
-               if (!iwlmvm_mod_params.init_dbg) {
-                       /*
-                        * should stop and start HW since that INIT
-                        * image just loaded
-                        */
-                       iwl_trans_stop_device(mvm->trans);
-                       ret = iwl_trans_start_hw(mvm->trans);
-                       if (ret)
-                               return ret;
-               }
+       ret = iwl_run_init_mvm_ucode(mvm, false);
+       if (ret && !iwlmvm_mod_params.init_dbg) {
+               IWL_ERR(mvm, "Failed to run INIT ucode: %d\n", ret);
+               /* this can't happen */
+               if (WARN_ON(ret > 0))
+                       ret = -ERFKILL;
+               goto error;
+       }
+       if (!iwlmvm_mod_params.init_dbg) {
+               /*
+                * Stop and start the transport without entering low power
+                * mode. This will save the state of other components on the
+                * device that are triggered by the INIT firwmare (MFUART).
+                */
+               _iwl_trans_stop_device(mvm->trans, false);
+               _iwl_trans_start_hw(mvm->trans, false);
+               if (ret)
+                       return ret;
        }
 
        if (iwlmvm_mod_params.init_dbg)
index 8455517..dda9f7b 100644 (file)
@@ -1322,7 +1322,7 @@ static void iwl_mvm_restart_complete(struct iwl_mvm *mvm)
 
        clear_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status);
        iwl_mvm_d0i3_enable_tx(mvm, NULL);
-       ret = iwl_mvm_update_quotas(mvm, false, NULL);
+       ret = iwl_mvm_update_quotas(mvm, true, NULL);
        if (ret)
                IWL_ERR(mvm, "Failed to update quotas after restart (%d)\n",
                        ret);
@@ -1471,8 +1471,8 @@ static struct iwl_mvm_phy_ctxt *iwl_mvm_get_free_phy_ctxt(struct iwl_mvm *mvm)
        return NULL;
 }
 
-static int iwl_mvm_set_tx_power(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
-                               s8 tx_power)
+static int iwl_mvm_set_tx_power_old(struct iwl_mvm *mvm,
+                                   struct ieee80211_vif *vif, s8 tx_power)
 {
        /* FW is in charge of regulatory enforcement */
        struct iwl_reduce_tx_power_cmd reduce_txpwr_cmd = {
@@ -1485,6 +1485,26 @@ static int iwl_mvm_set_tx_power(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
                                    &reduce_txpwr_cmd);
 }
 
+static int iwl_mvm_set_tx_power(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
+                               s16 tx_power)
+{
+       struct iwl_dev_tx_power_cmd cmd = {
+               .set_mode = 0,
+               .mac_context_id =
+                       cpu_to_le32(iwl_mvm_vif_from_mac80211(vif)->id),
+               .pwr_restriction = cpu_to_le16(8 * tx_power),
+       };
+
+       if (!(mvm->fw->ucode_capa.api[0] & IWL_UCODE_TLV_API_TX_POWER_DEV))
+               return iwl_mvm_set_tx_power_old(mvm, vif, tx_power);
+
+       if (tx_power == IWL_DEFAULT_MAX_TX_POWER)
+               cmd.pwr_restriction = cpu_to_le16(IWL_DEV_MAX_TX_POWER);
+
+       return iwl_mvm_send_cmd_pdu(mvm, REDUCE_TX_POWER_CMD, 0,
+                                   sizeof(cmd), &cmd);
+}
+
 static int iwl_mvm_mac_add_interface(struct ieee80211_hw *hw,
                                     struct ieee80211_vif *vif)
 {
@@ -3975,9 +3995,6 @@ static void iwl_mvm_mac_event_callback(struct ieee80211_hw *hw,
        if (!iwl_fw_dbg_trigger_enabled(mvm->fw, FW_DBG_TRIGGER_MLME))
                return;
 
-       if (event->u.mlme.status == MLME_SUCCESS)
-               return;
-
        trig = iwl_fw_dbg_get_trigger(mvm->fw, FW_DBG_TRIGGER_MLME);
        trig_mlme = (void *)trig->data;
        if (!iwl_fw_dbg_trigger_check_stop(mvm, vif, trig))
index d5522a1..cf70f68 100644 (file)
@@ -603,7 +603,6 @@ struct iwl_mvm {
 
        enum iwl_ucode_type cur_ucode;
        bool ucode_loaded;
-       bool init_ucode_complete;
        bool calibrating;
        u32 error_event_table;
        u32 log_event_table;
index a08b03d..2ea0123 100644 (file)
@@ -865,6 +865,16 @@ static void iwl_mvm_fw_error_dump_wk(struct work_struct *work)
                return;
 
        mutex_lock(&mvm->mutex);
+
+       /* stop recording */
+       if (mvm->cfg->device_family == IWL_DEVICE_FAMILY_7000) {
+               iwl_set_bits_prph(mvm->trans, MON_BUFF_SAMPLE_CTL, 0x100);
+       } else {
+               iwl_write_prph(mvm->trans, DBGC_IN_SAMPLE, 0);
+               /* wait before we collect the data till the DBGC stop */
+               udelay(100);
+       }
+
        iwl_mvm_fw_error_dump(mvm);
 
        /* start recording again if the firmware is not crashed */
@@ -1253,11 +1263,13 @@ static void iwl_mvm_d0i3_exit_work(struct work_struct *wk)
                ieee80211_iterate_active_interfaces(
                        mvm->hw, IEEE80211_IFACE_ITER_NORMAL,
                        iwl_mvm_d0i3_disconnect_iter, mvm);
-
-       iwl_free_resp(&get_status_cmd);
 out:
        iwl_mvm_d0i3_enable_tx(mvm, qos_seq);
 
+       /* qos_seq might point inside resp_pkt, so free it only now */
+       if (get_status_cmd.resp_pkt)
+               iwl_free_resp(&get_status_cmd);
+
        /* the FW might have updated the regdomain */
        iwl_mvm_update_changed_regdom(mvm);
 
index f9928f2..33cd68a 100644 (file)
@@ -180,6 +180,9 @@ static bool rs_mimo_allow(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
        if (iwl_mvm_vif_low_latency(mvmvif) && mvmsta->vif->p2p)
                return false;
 
+       if (mvm->nvm_data->sku_cap_mimo_disabled)
+               return false;
+
        return true;
 }
 
index 78ec7db..d6314dd 100644 (file)
@@ -478,6 +478,11 @@ static void iwl_mvm_stat_iterator(void *_data, u8 *mac,
        if (vif->type != NL80211_IFTYPE_STATION)
                return;
 
+       if (sig == 0) {
+               IWL_DEBUG_RX(mvm, "RSSI is 0 - skip signal based decision\n");
+               return;
+       }
+
        mvmvif->bf_data.ave_beacon_signal = sig;
 
        /* BT Coex */
index 74e1c86..1075a21 100644 (file)
@@ -319,7 +319,7 @@ int iwl_mvm_rx_scan_offload_iter_complete_notif(struct iwl_mvm *mvm,
                                                struct iwl_device_cmd *cmd)
 {
        struct iwl_rx_packet *pkt = rxb_addr(rxb);
-       struct iwl_scan_complete_notif *notif = (void *)pkt->data;
+       struct iwl_lmac_scan_complete_notif *notif = (void *)pkt->data;
 
        IWL_DEBUG_SCAN(mvm,
                       "Scan offload iteration complete: status=0x%x scanned channels=%d\n",
index 01996c9..376b84e 100644 (file)
@@ -1,7 +1,7 @@
 /******************************************************************************
  *
- * Copyright(c) 2003 - 2014 Intel Corporation. All rights reserved.
- * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
+ * Copyright(c) 2003 - 2015 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  *
  * Portions of this file are derived from the ipw3945 project, as well
  * as portions of the ieee80211 subsystem header files.
@@ -320,7 +320,7 @@ struct iwl_trans_pcie {
 
        /*protect hw register */
        spinlock_t reg_lock;
-       bool cmd_in_flight;
+       bool cmd_hold_nic_awake;
        bool ref_cmd_in_flight;
 
        /* protect ref counter */
index 2de8fbf..dc17909 100644 (file)
@@ -5,8 +5,8 @@
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2007 - 2014 Intel Corporation. All rights reserved.
- * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
+ * Copyright(c) 2007 - 2015 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -31,8 +31,8 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved.
- * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
+ * Copyright(c) 2005 - 2015 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -104,7 +104,7 @@ static void iwl_pcie_free_fw_monitor(struct iwl_trans *trans)
 static void iwl_pcie_alloc_fw_monitor(struct iwl_trans *trans)
 {
        struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-       struct page *page;
+       struct page *page = NULL;
        dma_addr_t phys;
        u32 size;
        u8 power;
@@ -131,6 +131,7 @@ static void iwl_pcie_alloc_fw_monitor(struct iwl_trans *trans)
                                    DMA_FROM_DEVICE);
                if (dma_mapping_error(trans->dev, phys)) {
                        __free_pages(page, order);
+                       page = NULL;
                        continue;
                }
                IWL_INFO(trans,
@@ -1020,7 +1021,7 @@ static void iwl_trans_pcie_fw_alive(struct iwl_trans *trans, u32 scd_addr)
        iwl_pcie_tx_start(trans, scd_addr);
 }
 
-static void iwl_trans_pcie_stop_device(struct iwl_trans *trans)
+static void iwl_trans_pcie_stop_device(struct iwl_trans *trans, bool low_power)
 {
        struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
        bool hw_rfkill, was_hw_rfkill;
@@ -1048,9 +1049,11 @@ static void iwl_trans_pcie_stop_device(struct iwl_trans *trans)
                iwl_pcie_rx_stop(trans);
 
                /* Power-down device's busmaster DMA clocks */
-               iwl_write_prph(trans, APMG_CLK_DIS_REG,
-                              APMG_CLK_VAL_DMA_CLK_RQT);
-               udelay(5);
+               if (trans->cfg->device_family != IWL_DEVICE_FAMILY_8000) {
+                       iwl_write_prph(trans, APMG_CLK_DIS_REG,
+                                      APMG_CLK_VAL_DMA_CLK_RQT);
+                       udelay(5);
+               }
        }
 
        /* Make sure (redundant) we've released our request to stay awake */
@@ -1115,7 +1118,7 @@ static void iwl_trans_pcie_stop_device(struct iwl_trans *trans)
 void iwl_trans_pcie_rf_kill(struct iwl_trans *trans, bool state)
 {
        if (iwl_op_mode_hw_rf_kill(trans->op_mode, state))
-               iwl_trans_pcie_stop_device(trans);
+               iwl_trans_pcie_stop_device(trans, true);
 }
 
 static void iwl_trans_pcie_d3_suspend(struct iwl_trans *trans, bool test)
@@ -1200,7 +1203,7 @@ static int iwl_trans_pcie_d3_resume(struct iwl_trans *trans,
        return 0;
 }
 
-static int iwl_trans_pcie_start_hw(struct iwl_trans *trans)
+static int iwl_trans_pcie_start_hw(struct iwl_trans *trans, bool low_power)
 {
        bool hw_rfkill;
        int err;
@@ -1369,7 +1372,7 @@ static bool iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans, bool silent,
 
        spin_lock_irqsave(&trans_pcie->reg_lock, *flags);
 
-       if (trans_pcie->cmd_in_flight)
+       if (trans_pcie->cmd_hold_nic_awake)
                goto out;
 
        /* this bit wakes up the NIC */
@@ -1435,7 +1438,7 @@ static void iwl_trans_pcie_release_nic_access(struct iwl_trans *trans,
         */
        __acquire(&trans_pcie->reg_lock);
 
-       if (trans_pcie->cmd_in_flight)
+       if (trans_pcie->cmd_hold_nic_awake)
                goto out;
 
        __iwl_trans_pcie_clear_bit(trans, CSR_GP_CNTRL,
index 06952aa..5ef8044 100644 (file)
@@ -1039,18 +1039,14 @@ static int iwl_pcie_set_cmd_in_flight(struct iwl_trans *trans,
                iwl_trans_pcie_ref(trans);
        }
 
-       if (trans_pcie->cmd_in_flight)
-               return 0;
-
-       trans_pcie->cmd_in_flight = true;
-
        /*
         * wake up the NIC to make sure that the firmware will see the host
         * command - we will let the NIC sleep once all the host commands
         * returned. This needs to be done only on NICs that have
         * apmg_wake_up_wa set.
         */
-       if (trans->cfg->base_params->apmg_wake_up_wa) {
+       if (trans->cfg->base_params->apmg_wake_up_wa &&
+           !trans_pcie->cmd_hold_nic_awake) {
                __iwl_trans_pcie_set_bit(trans, CSR_GP_CNTRL,
                                         CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
                if (trans->cfg->device_family == IWL_DEVICE_FAMILY_8000)
@@ -1064,10 +1060,10 @@ static int iwl_pcie_set_cmd_in_flight(struct iwl_trans *trans,
                if (ret < 0) {
                        __iwl_trans_pcie_clear_bit(trans, CSR_GP_CNTRL,
                                        CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
-                       trans_pcie->cmd_in_flight = false;
                        IWL_ERR(trans, "Failed to wake NIC for hcmd\n");
                        return -EIO;
                }
+               trans_pcie->cmd_hold_nic_awake = true;
        }
 
        return 0;
@@ -1085,15 +1081,14 @@ static int iwl_pcie_clear_cmd_in_flight(struct iwl_trans *trans)
                iwl_trans_pcie_unref(trans);
        }
 
-       if (WARN_ON(!trans_pcie->cmd_in_flight))
-               return 0;
-
-       trans_pcie->cmd_in_flight = false;
+       if (trans->cfg->base_params->apmg_wake_up_wa) {
+               if (WARN_ON(!trans_pcie->cmd_hold_nic_awake))
+                       return 0;
 
-       if (trans->cfg->base_params->apmg_wake_up_wa)
+               trans_pcie->cmd_hold_nic_awake = false;
                __iwl_trans_pcie_clear_bit(trans, CSR_GP_CNTRL,
-                                       CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
-
+                                          CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
+       }
        return 0;
 }
 
index f0188c8..2721cf8 100644 (file)
@@ -126,7 +126,7 @@ static int _usbctrl_vendorreq_sync_read(struct usb_device *udev, u8 request,
 
        do {
                status = usb_control_msg(udev, pipe, request, reqtype, value,
-                                        index, pdata, len, 0); /*max. timeout*/
+                                        index, pdata, len, 1000);
                if (status < 0) {
                        /* firmware download is checksumed, don't retry */
                        if ((value >= FW_8192C_START_ADDRESS &&
index 4de46aa..0d25943 100644 (file)
@@ -1250,7 +1250,7 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue,
                        netdev_err(queue->vif->dev,
                                   "txreq.offset: %x, size: %u, end: %lu\n",
                                   txreq.offset, txreq.size,
-                                  (txreq.offset&~PAGE_MASK) + txreq.size);
+                                  (unsigned long)(txreq.offset&~PAGE_MASK) + txreq.size);
                        xenvif_fatal_tx_err(queue->vif);
                        break;
                }
index 3d8dbf5..968787a 100644 (file)
@@ -34,6 +34,8 @@ struct backend_info {
        enum xenbus_state frontend_state;
        struct xenbus_watch hotplug_status_watch;
        u8 have_hotplug_status_watch:1;
+
+       const char *hotplug_script;
 };
 
 static int connect_rings(struct backend_info *be, struct xenvif_queue *queue);
@@ -238,6 +240,7 @@ static int netback_remove(struct xenbus_device *dev)
                xenvif_free(be->vif);
                be->vif = NULL;
        }
+       kfree(be->hotplug_script);
        kfree(be);
        dev_set_drvdata(&dev->dev, NULL);
        return 0;
@@ -255,6 +258,7 @@ static int netback_probe(struct xenbus_device *dev,
        struct xenbus_transaction xbt;
        int err;
        int sg;
+       const char *script;
        struct backend_info *be = kzalloc(sizeof(struct backend_info),
                                          GFP_KERNEL);
        if (!be) {
@@ -347,6 +351,15 @@ static int netback_probe(struct xenbus_device *dev,
        if (err)
                pr_debug("Error writing multi-queue-max-queues\n");
 
+       script = xenbus_read(XBT_NIL, dev->nodename, "script", NULL);
+       if (IS_ERR(script)) {
+               err = PTR_ERR(script);
+               xenbus_dev_fatal(dev, err, "reading script");
+               goto fail;
+       }
+
+       be->hotplug_script = script;
+
        err = xenbus_switch_state(dev, XenbusStateInitWait);
        if (err)
                goto fail;
@@ -379,22 +392,14 @@ static int netback_uevent(struct xenbus_device *xdev,
                          struct kobj_uevent_env *env)
 {
        struct backend_info *be = dev_get_drvdata(&xdev->dev);
-       char *val;
 
-       val = xenbus_read(XBT_NIL, xdev->nodename, "script", NULL);
-       if (IS_ERR(val)) {
-               int err = PTR_ERR(val);
-               xenbus_dev_fatal(xdev, err, "reading script");
-               return err;
-       } else {
-               if (add_uevent_var(env, "script=%s", val)) {
-                       kfree(val);
-                       return -ENOMEM;
-               }
-               kfree(val);
-       }
+       if (!be)
+               return 0;
 
-       if (!be || !be->vif)
+       if (add_uevent_var(env, "script=%s", be->hotplug_script))
+               return -ENOMEM;
+
+       if (!be->vif)
                return 0;
 
        return add_uevent_var(env, "vif=%s", be->vif->dev->name);
@@ -793,6 +798,7 @@ static void connect(struct backend_info *be)
                        goto err;
                }
 
+               queue->credit_bytes = credit_bytes;
                queue->remaining_credit = credit_bytes;
                queue->credit_usec = credit_usec;
 
index 3f45afd..e031c94 100644 (file)
@@ -1698,6 +1698,7 @@ static void xennet_destroy_queues(struct netfront_info *info)
 
                if (netif_running(info->netdev))
                        napi_disable(&queue->napi);
+               del_timer_sync(&queue->rx_refill_timer);
                netif_napi_del(&queue->napi);
        }
 
@@ -2102,9 +2103,6 @@ static const struct attribute_group xennet_dev_group = {
 static int xennet_remove(struct xenbus_device *dev)
 {
        struct netfront_info *info = dev_get_drvdata(&dev->dev);
-       unsigned int num_queues = info->netdev->real_num_tx_queues;
-       struct netfront_queue *queue = NULL;
-       unsigned int i = 0;
 
        dev_dbg(&dev->dev, "%s\n", dev->nodename);
 
@@ -2112,16 +2110,7 @@ static int xennet_remove(struct xenbus_device *dev)
 
        unregister_netdev(info->netdev);
 
-       for (i = 0; i < num_queues; ++i) {
-               queue = &info->queues[i];
-               del_timer_sync(&queue->rx_refill_timer);
-       }
-
-       if (num_queues) {
-               kfree(info->queues);
-               info->queues = NULL;
-       }
-
+       xennet_destroy_queues(info);
        xennet_free_netdev(info->netdev);
 
        return 0;
index cd29b10..15f9b7c 100644 (file)
@@ -1660,6 +1660,7 @@ static int ntb_atom_detect(struct ntb_device *ndev)
        u32 ppd;
 
        ndev->hw_type = BWD_HW;
+       ndev->limits.max_mw = BWD_MAX_MW;
 
        rc = pci_read_config_dword(ndev->pdev, NTB_PPD_OFFSET, &ppd);
        if (rc)
@@ -1778,7 +1779,7 @@ static int ntb_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
                        dev_warn(&pdev->dev, "Cannot remap BAR %d\n",
                                 MW_TO_BAR(i));
                        rc = -EIO;
-                       goto err3;
+                       goto err4;
                }
        }
 
index 99764db..f065026 100644 (file)
@@ -189,7 +189,7 @@ int __of_attach_node_sysfs(struct device_node *np)
        return 0;
 }
 
-static int __init of_init(void)
+void __init of_core_init(void)
 {
        struct device_node *np;
 
@@ -198,7 +198,8 @@ static int __init of_init(void)
        of_kset = kset_create_and_add("devicetree", NULL, firmware_kobj);
        if (!of_kset) {
                mutex_unlock(&of_mutex);
-               return -ENOMEM;
+               pr_err("devicetree: failed to register existing nodes\n");
+               return;
        }
        for_each_of_allnodes(np)
                __of_attach_node_sysfs(np);
@@ -207,10 +208,7 @@ static int __init of_init(void)
        /* Symlink in /proc as required by userspace ABI */
        if (of_root)
                proc_symlink("device-tree", NULL, "/sys/firmware/devicetree/base");
-
-       return 0;
 }
-core_initcall(of_init);
 
 static struct property *__of_find_property(const struct device_node *np,
                                           const char *name, int *lenp)
index 3351ef4..53826b8 100644 (file)
@@ -225,7 +225,7 @@ void __of_attach_node(struct device_node *np)
        phandle = __of_get_property(np, "phandle", &sz);
        if (!phandle)
                phandle = __of_get_property(np, "linux,phandle", &sz);
-       if (IS_ENABLED(PPC_PSERIES) && !phandle)
+       if (IS_ENABLED(CONFIG_PPC_PSERIES) && !phandle)
                phandle = __of_get_property(np, "ibm,phandle", &sz);
        np->phandle = (phandle && (sz >= 4)) ? be32_to_cpup(phandle) : 0;
 
index 8be2096..deeaed5 100644 (file)
@@ -348,7 +348,7 @@ int superio_fixup_irq(struct pci_dev *pcidev)
                BUG();
                return -1;
        }
-       printk("superio_fixup_irq(%s) ven 0x%x dev 0x%x from %pf\n",
+       printk(KERN_DEBUG "superio_fixup_irq(%s) ven 0x%x dev 0x%x from %ps\n",
                pci_name(pcidev),
                pcidev->vendor, pcidev->device,
                __builtin_return_address(0));
index 4fd0cac..508cc56 100644 (file)
@@ -428,16 +428,19 @@ static void __assign_resources_sorted(struct list_head *head,
                 * consistent.
                 */
                if (add_align > dev_res->res->start) {
+                       resource_size_t r_size = resource_size(dev_res->res);
+
                        dev_res->res->start = add_align;
-                       dev_res->res->end = add_align +
-                                           resource_size(dev_res->res);
+                       dev_res->res->end = add_align + r_size - 1;
 
                        list_for_each_entry(dev_res2, head, list) {
                                align = pci_resource_alignment(dev_res2->dev,
                                                               dev_res2->res);
-                               if (add_align > align)
+                               if (add_align > align) {
                                        list_move_tail(&dev_res->list,
                                                       &dev_res2->list);
+                                       break;
+                               }
                        }
                }
 
index a53bd5b..fc9b9f0 100644 (file)
@@ -38,7 +38,9 @@ config ARMADA375_USBCLUSTER_PHY
 config PHY_DM816X_USB
        tristate "TI dm816x USB PHY driver"
        depends on ARCH_OMAP2PLUS
+       depends on USB_SUPPORT
        select GENERIC_PHY
+       select USB_PHY
        help
          Enable this for dm816x USB to work.
 
@@ -97,8 +99,9 @@ config OMAP_CONTROL_PHY
 config OMAP_USB2
        tristate "OMAP USB2 PHY Driver"
        depends on ARCH_OMAP2PLUS
-       depends on USB_PHY
+       depends on USB_SUPPORT
        select GENERIC_PHY
+       select USB_PHY
        select OMAP_CONTROL_PHY
        depends on OMAP_OCP2SCP
        help
@@ -122,8 +125,9 @@ config TI_PIPE3
 config TWL4030_USB
        tristate "TWL4030 USB Transceiver Driver"
        depends on TWL4030_CORE && REGULATOR_TWL4030 && USB_MUSB_OMAP2PLUS
-       depends on USB_PHY
+       depends on USB_SUPPORT
        select GENERIC_PHY
+       select USB_PHY
        help
          Enable this to support the USB OTG transceiver on TWL4030
          family chips (including the TWL5030 and TPS659x0 devices).
@@ -304,7 +308,7 @@ config PHY_STIH41X_USB
 
 config PHY_QCOM_UFS
        tristate "Qualcomm UFS PHY driver"
-       depends on OF && ARCH_MSM
+       depends on OF && ARCH_QCOM
        select GENERIC_PHY
        help
          Support for UFS PHY on QCOM chipsets.
index 3791838..63bc12d 100644 (file)
@@ -530,7 +530,7 @@ struct phy *phy_optional_get(struct device *dev, const char *string)
 {
        struct phy *phy = phy_get(dev, string);
 
-       if (PTR_ERR(phy) == -ENODEV)
+       if (IS_ERR(phy) && (PTR_ERR(phy) == -ENODEV))
                phy = NULL;
 
        return phy;
@@ -584,7 +584,7 @@ struct phy *devm_phy_optional_get(struct device *dev, const char *string)
 {
        struct phy *phy = devm_phy_get(dev, string);
 
-       if (PTR_ERR(phy) == -ENODEV)
+       if (IS_ERR(phy) && (PTR_ERR(phy) == -ENODEV))
                phy = NULL;
 
        return phy;
index 183ef43..c1a4686 100644 (file)
@@ -275,6 +275,7 @@ static int omap_usb2_probe(struct platform_device *pdev)
                phy->wkupclk = devm_clk_get(phy->dev, "usb_phy_cm_clk32k");
                if (IS_ERR(phy->wkupclk)) {
                        dev_err(&pdev->dev, "unable to get usb_phy_cm_clk32k\n");
+                       pm_runtime_disable(phy->dev);
                        return PTR_ERR(phy->wkupclk);
                } else {
                        dev_warn(&pdev->dev,
index 778276a..97d45f4 100644 (file)
@@ -23,7 +23,7 @@
 #define USBHS_LPSTS                    0x02
 #define USBHS_UGCTRL                   0x80
 #define USBHS_UGCTRL2                  0x84
-#define USBHS_UGSTS                    0x88    /* The manuals have 0x90 */
+#define USBHS_UGSTS                    0x88    /* From technical update */
 
 /* Low Power Status register (LPSTS) */
 #define USBHS_LPSTS_SUSPM              0x4000
@@ -41,7 +41,7 @@
 #define USBHS_UGCTRL2_USB0SEL_HS_USB   0x00000030
 
 /* USB General status register (UGSTS) */
-#define USBHS_UGSTS_LOCK               0x00000300 /* The manuals have 0x3 */
+#define USBHS_UGSTS_LOCK               0x00000100 /* From technical update */
 
 #define PHYS_PER_CHANNEL       2
 
index 4ad5c1a..e406e3d 100644 (file)
@@ -643,7 +643,9 @@ static const struct cygnus_gpio_pin_range cygnus_gpio_pintable[] = {
        CYGNUS_PINRANGE(87, 104, 12),
        CYGNUS_PINRANGE(99, 102, 2),
        CYGNUS_PINRANGE(101, 90, 4),
-       CYGNUS_PINRANGE(105, 116, 10),
+       CYGNUS_PINRANGE(105, 116, 6),
+       CYGNUS_PINRANGE(111, 100, 2),
+       CYGNUS_PINRANGE(113, 122, 4),
        CYGNUS_PINRANGE(123, 11, 1),
        CYGNUS_PINRANGE(124, 38, 4),
        CYGNUS_PINRANGE(128, 43, 1),
index 89dca77..18ee208 100644 (file)
@@ -1110,7 +1110,7 @@ void devm_pinctrl_put(struct pinctrl *p)
 EXPORT_SYMBOL_GPL(devm_pinctrl_put);
 
 int pinctrl_register_map(struct pinctrl_map const *maps, unsigned num_maps,
-                        bool dup, bool locked)
+                        bool dup)
 {
        int i, ret;
        struct pinctrl_maps *maps_node;
@@ -1178,11 +1178,9 @@ int pinctrl_register_map(struct pinctrl_map const *maps, unsigned num_maps,
                maps_node->maps = maps;
        }
 
-       if (!locked)
-               mutex_lock(&pinctrl_maps_mutex);
+       mutex_lock(&pinctrl_maps_mutex);
        list_add_tail(&maps_node->node, &pinctrl_maps);
-       if (!locked)
-               mutex_unlock(&pinctrl_maps_mutex);
+       mutex_unlock(&pinctrl_maps_mutex);
 
        return 0;
 }
@@ -1197,7 +1195,7 @@ int pinctrl_register_map(struct pinctrl_map const *maps, unsigned num_maps,
 int pinctrl_register_mappings(struct pinctrl_map const *maps,
                              unsigned num_maps)
 {
-       return pinctrl_register_map(maps, num_maps, true, false);
+       return pinctrl_register_map(maps, num_maps, true);
 }
 
 void pinctrl_unregister_map(struct pinctrl_map const *map)
index 75476b3..b24ea84 100644 (file)
@@ -183,7 +183,7 @@ static inline struct pin_desc *pin_desc_get(struct pinctrl_dev *pctldev,
 }
 
 int pinctrl_register_map(struct pinctrl_map const *maps, unsigned num_maps,
-                        bool dup, bool locked);
+                        bool dup);
 void pinctrl_unregister_map(struct pinctrl_map const *map);
 
 extern int pinctrl_force_sleep(struct pinctrl_dev *pctldev);
index eda13de..0bbf7d7 100644 (file)
@@ -92,7 +92,7 @@ static int dt_remember_or_free_map(struct pinctrl *p, const char *statename,
        dt_map->num_maps = num_maps;
        list_add_tail(&dt_map->node, &p->dt_maps);
 
-       return pinctrl_register_map(map, num_maps, false, true);
+       return pinctrl_register_map(map, num_maps, false);
 }
 
 struct pinctrl_dev *of_pinctrl_get(struct device_node *np)
index 82f691e..732ff75 100644 (file)
@@ -1292,6 +1292,49 @@ static void chv_gpio_irq_unmask(struct irq_data *d)
        chv_gpio_irq_mask_unmask(d, false);
 }
 
+static unsigned chv_gpio_irq_startup(struct irq_data *d)
+{
+       /*
+        * Check if the interrupt has been requested with 0 as triggering
+        * type. In that case it is assumed that the current values
+        * programmed to the hardware are used (e.g BIOS configured
+        * defaults).
+        *
+        * In that case ->irq_set_type() will never be called so we need to
+        * read back the values from hardware now, set correct flow handler
+        * and update mappings before the interrupt is being used.
+        */
+       if (irqd_get_trigger_type(d) == IRQ_TYPE_NONE) {
+               struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+               struct chv_pinctrl *pctrl = gpiochip_to_pinctrl(gc);
+               unsigned offset = irqd_to_hwirq(d);
+               int pin = chv_gpio_offset_to_pin(pctrl, offset);
+               irq_flow_handler_t handler;
+               unsigned long flags;
+               u32 intsel, value;
+
+               intsel = readl(chv_padreg(pctrl, pin, CHV_PADCTRL0));
+               intsel &= CHV_PADCTRL0_INTSEL_MASK;
+               intsel >>= CHV_PADCTRL0_INTSEL_SHIFT;
+
+               value = readl(chv_padreg(pctrl, pin, CHV_PADCTRL1));
+               if (value & CHV_PADCTRL1_INTWAKECFG_LEVEL)
+                       handler = handle_level_irq;
+               else
+                       handler = handle_edge_irq;
+
+               spin_lock_irqsave(&pctrl->lock, flags);
+               if (!pctrl->intr_lines[intsel]) {
+                       __irq_set_handler_locked(d->irq, handler);
+                       pctrl->intr_lines[intsel] = offset;
+               }
+               spin_unlock_irqrestore(&pctrl->lock, flags);
+       }
+
+       chv_gpio_irq_unmask(d);
+       return 0;
+}
+
 static int chv_gpio_irq_type(struct irq_data *d, unsigned type)
 {
        struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
@@ -1357,6 +1400,7 @@ static int chv_gpio_irq_type(struct irq_data *d, unsigned type)
 
 static struct irq_chip chv_gpio_irqchip = {
        .name = "chv-gpio",
+       .irq_startup = chv_gpio_irq_startup,
        .irq_ack = chv_gpio_irq_ack,
        .irq_mask = chv_gpio_irq_mask,
        .irq_unmask = chv_gpio_irq_unmask,
index 493294c..474812e 100644 (file)
@@ -881,6 +881,8 @@ static int mtk_gpio_set_debounce(struct gpio_chip *chip, unsigned offset,
        if (!mtk_eint_get_mask(pctl, eint_num)) {
                mtk_eint_mask(d);
                unmask = 1;
+       } else {
+               unmask = 0;
        }
 
        clr_bit = 0xff << eint_offset;
index edcd140..a70a5fe 100644 (file)
@@ -569,7 +569,7 @@ static int meson_gpiolib_register(struct meson_pinctrl *pc)
                domain->chip.direction_output = meson_gpio_direction_output;
                domain->chip.get = meson_gpio_get;
                domain->chip.set = meson_gpio_set;
-               domain->chip.base = -1;
+               domain->chip.base = domain->data->pin_base;
                domain->chip.ngpio = domain->data->num_pins;
                domain->chip.can_sleep = false;
                domain->chip.of_node = domain->of_node;
index 2f7ea62..9677807 100644 (file)
@@ -876,13 +876,13 @@ static struct meson_domain_data meson8b_domain_data[] = {
                .banks          = meson8b_banks,
                .num_banks      = ARRAY_SIZE(meson8b_banks),
                .pin_base       = 0,
-               .num_pins       = 83,
+               .num_pins       = 130,
        },
        {
                .name           = "ao-bank",
                .banks          = meson8b_ao_banks,
                .num_banks      = ARRAY_SIZE(meson8b_ao_banks),
-               .pin_base       = 83,
+               .pin_base       = 130,
                .num_pins       = 16,
        },
 };
index 42f930f..03aa58c 100644 (file)
@@ -364,7 +364,7 @@ static struct mvebu_mpp_mode mv88f6710_mpp_modes[] = {
           MPP_FUNCTION(0x5, "audio", "mclk"),
           MPP_FUNCTION(0x6, "uart0", "cts")),
        MPP_MODE(63,
-          MPP_FUNCTION(0x0, "gpo", NULL),
+          MPP_FUNCTION(0x0, "gpio", NULL),
           MPP_FUNCTION(0x1, "spi0", "sck"),
           MPP_FUNCTION(0x2, "tclk", NULL)),
        MPP_MODE(64,
index b2d2221..ae4115e 100644 (file)
@@ -260,6 +260,7 @@ static int pmic_gpio_set_mux(struct pinctrl_dev *pctldev, unsigned function,
                        val = 1;
        }
 
+       val = val << PMIC_GPIO_REG_MODE_DIR_SHIFT;
        val |= pad->function << PMIC_GPIO_REG_MODE_FUNCTION_SHIFT;
        val |= pad->out_value & PMIC_GPIO_REG_MODE_VALUE_SHIFT;
 
@@ -417,7 +418,7 @@ static int pmic_gpio_config_set(struct pinctrl_dev *pctldev, unsigned int pin,
                return ret;
 
        val = pad->buffer_type << PMIC_GPIO_REG_OUT_TYPE_SHIFT;
-       val = pad->strength << PMIC_GPIO_REG_OUT_STRENGTH_SHIFT;
+       val |= pad->strength << PMIC_GPIO_REG_OUT_STRENGTH_SHIFT;
 
        ret = pmic_gpio_write(state, pad, PMIC_GPIO_REG_DIG_OUT_CTL, val);
        if (ret < 0)
@@ -466,12 +467,13 @@ static void pmic_gpio_config_dbg_show(struct pinctrl_dev *pctldev,
                seq_puts(s, " ---");
        } else {
 
-               if (!pad->input_enabled) {
+               if (pad->input_enabled) {
                        ret = pmic_gpio_read(state, pad, PMIC_MPP_REG_RT_STS);
-                       if (!ret) {
-                               ret &= PMIC_MPP_REG_RT_STS_VAL_MASK;
-                               pad->out_value = ret;
-                       }
+                       if (ret < 0)
+                               return;
+
+                       ret &= PMIC_MPP_REG_RT_STS_VAL_MASK;
+                       pad->out_value = ret;
                }
 
                seq_printf(s, " %-4s", pad->output_enabled ? "out" : "in");
index 8f36c5f..211b942 100644 (file)
@@ -370,6 +370,7 @@ static int pmic_mpp_set_mux(struct pinctrl_dev *pctldev, unsigned function,
                }
        }
 
+       val = val << PMIC_MPP_REG_MODE_DIR_SHIFT;
        val |= pad->function << PMIC_MPP_REG_MODE_FUNCTION_SHIFT;
        val |= pad->out_value & PMIC_MPP_REG_MODE_VALUE_MASK;
 
@@ -576,10 +577,11 @@ static void pmic_mpp_config_dbg_show(struct pinctrl_dev *pctldev,
 
                if (pad->input_enabled) {
                        ret = pmic_mpp_read(state, pad, PMIC_MPP_REG_RT_STS);
-                       if (!ret) {
-                               ret &= PMIC_MPP_REG_RT_STS_VAL_MASK;
-                               pad->out_value = ret;
-                       }
+                       if (ret < 0)
+                               return;
+
+                       ret &= PMIC_MPP_REG_RT_STS_VAL_MASK;
+                       pad->out_value = ret;
                }
 
                seq_printf(s, " %-4s", pad->output_enabled ? "out" : "in");
index 9bb9ad6..28f3281 100644 (file)
@@ -2897,7 +2897,7 @@ static ssize_t hotkey_wakeup_reason_show(struct device *dev,
        return snprintf(buf, PAGE_SIZE, "%d\n", hotkey_wakeup_reason);
 }
 
-static DEVICE_ATTR_RO(hotkey_wakeup_reason);
+static DEVICE_ATTR(wakeup_reason, S_IRUGO, hotkey_wakeup_reason_show, NULL);
 
 static void hotkey_wakeup_reason_notify_change(void)
 {
@@ -2913,7 +2913,8 @@ static ssize_t hotkey_wakeup_hotunplug_complete_show(struct device *dev,
        return snprintf(buf, PAGE_SIZE, "%d\n", hotkey_autosleep_ack);
 }
 
-static DEVICE_ATTR_RO(hotkey_wakeup_hotunplug_complete);
+static DEVICE_ATTR(wakeup_hotunplug_complete, S_IRUGO,
+                  hotkey_wakeup_hotunplug_complete_show, NULL);
 
 static void hotkey_wakeup_hotunplug_complete_notify_change(void)
 {
@@ -2978,8 +2979,8 @@ static struct attribute *hotkey_attributes[] __initdata = {
        &dev_attr_hotkey_enable.attr,
        &dev_attr_hotkey_bios_enabled.attr,
        &dev_attr_hotkey_bios_mask.attr,
-       &dev_attr_hotkey_wakeup_reason.attr,
-       &dev_attr_hotkey_wakeup_hotunplug_complete.attr,
+       &dev_attr_wakeup_reason.attr,
+       &dev_attr_wakeup_hotunplug_complete.attr,
        &dev_attr_hotkey_mask.attr,
        &dev_attr_hotkey_all_mask.attr,
        &dev_attr_hotkey_recommended_mask.attr,
@@ -4393,12 +4394,13 @@ static ssize_t wan_enable_store(struct device *dev,
                        attr, buf, count);
 }
 
-static DEVICE_ATTR_RW(wan_enable);
+static DEVICE_ATTR(wwan_enable, S_IWUSR | S_IRUGO,
+                  wan_enable_show, wan_enable_store);
 
 /* --------------------------------------------------------------------- */
 
 static struct attribute *wan_attributes[] = {
-       &dev_attr_wan_enable.attr,
+       &dev_attr_wwan_enable.attr,
        NULL
 };
 
@@ -8138,7 +8140,8 @@ static ssize_t fan_pwm1_enable_store(struct device *dev,
        return count;
 }
 
-static DEVICE_ATTR_RW(fan_pwm1_enable);
+static DEVICE_ATTR(pwm1_enable, S_IWUSR | S_IRUGO,
+                  fan_pwm1_enable_show, fan_pwm1_enable_store);
 
 /* sysfs fan pwm1 ------------------------------------------------------ */
 static ssize_t fan_pwm1_show(struct device *dev,
@@ -8198,7 +8201,7 @@ static ssize_t fan_pwm1_store(struct device *dev,
        return (rc) ? rc : count;
 }
 
-static DEVICE_ATTR_RW(fan_pwm1);
+static DEVICE_ATTR(pwm1, S_IWUSR | S_IRUGO, fan_pwm1_show, fan_pwm1_store);
 
 /* sysfs fan fan1_input ------------------------------------------------ */
 static ssize_t fan_fan1_input_show(struct device *dev,
@@ -8215,7 +8218,7 @@ static ssize_t fan_fan1_input_show(struct device *dev,
        return snprintf(buf, PAGE_SIZE, "%u\n", speed);
 }
 
-static DEVICE_ATTR_RO(fan_fan1_input);
+static DEVICE_ATTR(fan1_input, S_IRUGO, fan_fan1_input_show, NULL);
 
 /* sysfs fan fan2_input ------------------------------------------------ */
 static ssize_t fan_fan2_input_show(struct device *dev,
@@ -8232,7 +8235,7 @@ static ssize_t fan_fan2_input_show(struct device *dev,
        return snprintf(buf, PAGE_SIZE, "%u\n", speed);
 }
 
-static DEVICE_ATTR_RO(fan_fan2_input);
+static DEVICE_ATTR(fan2_input, S_IRUGO, fan_fan2_input_show, NULL);
 
 /* sysfs fan fan_watchdog (hwmon driver) ------------------------------- */
 static ssize_t fan_fan_watchdog_show(struct device_driver *drv,
@@ -8265,8 +8268,8 @@ static DRIVER_ATTR(fan_watchdog, S_IWUSR | S_IRUGO,
 
 /* --------------------------------------------------------------------- */
 static struct attribute *fan_attributes[] = {
-       &dev_attr_fan_pwm1_enable.attr, &dev_attr_fan_pwm1.attr,
-       &dev_attr_fan_fan1_input.attr,
+       &dev_attr_pwm1_enable.attr, &dev_attr_pwm1.attr,
+       &dev_attr_fan1_input.attr,
        NULL, /* for fan2_input */
        NULL
 };
@@ -8400,7 +8403,7 @@ static int __init fan_init(struct ibm_init_struct *iibm)
                if (tp_features.second_fan) {
                        /* attach second fan tachometer */
                        fan_attributes[ARRAY_SIZE(fan_attributes)-2] =
-                                       &dev_attr_fan_fan2_input.attr;
+                                       &dev_attr_fan2_input.attr;
                }
                rc = sysfs_create_group(&tpacpi_sensors_pdev->dev.kobj,
                                         &fan_attr_group);
@@ -8848,7 +8851,7 @@ static ssize_t thinkpad_acpi_pdev_name_show(struct device *dev,
        return snprintf(buf, PAGE_SIZE, "%s\n", TPACPI_NAME);
 }
 
-static DEVICE_ATTR_RO(thinkpad_acpi_pdev_name);
+static DEVICE_ATTR(name, S_IRUGO, thinkpad_acpi_pdev_name_show, NULL);
 
 /* --------------------------------------------------------------------- */
 
@@ -9390,8 +9393,7 @@ static void thinkpad_acpi_module_exit(void)
                hwmon_device_unregister(tpacpi_hwmon);
 
        if (tp_features.sensors_pdev_attrs_registered)
-               device_remove_file(&tpacpi_sensors_pdev->dev,
-                                  &dev_attr_thinkpad_acpi_pdev_name);
+               device_remove_file(&tpacpi_sensors_pdev->dev, &dev_attr_name);
        if (tpacpi_sensors_pdev)
                platform_device_unregister(tpacpi_sensors_pdev);
        if (tpacpi_pdev)
@@ -9512,8 +9514,7 @@ static int __init thinkpad_acpi_module_init(void)
                thinkpad_acpi_module_exit();
                return ret;
        }
-       ret = device_create_file(&tpacpi_sensors_pdev->dev,
-                                &dev_attr_thinkpad_acpi_pdev_name);
+       ret = device_create_file(&tpacpi_sensors_pdev->dev, &dev_attr_name);
        if (ret) {
                pr_err("unable to create sysfs hwmon device attributes\n");
                thinkpad_acpi_module_exit();
index ca1cc5a..bd1dbfe 100644 (file)
@@ -1149,6 +1149,7 @@ static struct platform_driver axp288_fuel_gauge_driver = {
 
 module_platform_driver(axp288_fuel_gauge_driver);
 
+MODULE_AUTHOR("Ramakrishna Pallala <ramakrishna.pallala@intel.com>");
 MODULE_AUTHOR("Todd Brandt <todd.e.brandt@linux.intel.com>");
 MODULE_DESCRIPTION("Xpower AXP288 Fuel Gauge Driver");
 MODULE_LICENSE("GPL");
index a57433d..b6b9837 100644 (file)
@@ -1109,6 +1109,14 @@ static void __exit bq27x00_battery_exit(void)
 }
 module_exit(bq27x00_battery_exit);
 
+#ifdef CONFIG_BATTERY_BQ27X00_PLATFORM
+MODULE_ALIAS("platform:bq27000-battery");
+#endif
+
+#ifdef CONFIG_BATTERY_BQ27X00_I2C
+MODULE_ALIAS("i2c:bq27000-battery");
+#endif
+
 MODULE_AUTHOR("Rodolfo Giometti <giometti@linux.it>");
 MODULE_DESCRIPTION("BQ27x00 battery monitor driver");
 MODULE_LICENSE("GPL");
index 2da9ed8..8a971b3 100644 (file)
@@ -347,7 +347,7 @@ static int collie_bat_probe(struct ucb1x00_dev *dev)
                goto err_psy_reg_main;
        }
 
-       psy_main_cfg.drv_data = &collie_bat_bu;
+       psy_bu_cfg.drv_data = &collie_bat_bu;
        collie_bat_bu.psy = power_supply_register(&dev->ucb->dev,
                                                  &collie_bat_bu_desc,
                                                  &psy_bu_cfg);
index aad9c33..17d93a7 100644 (file)
@@ -41,6 +41,7 @@ config POWER_RESET_AXXIA
 config POWER_RESET_BRCMSTB
        bool "Broadcom STB reset driver"
        depends on ARM || MIPS || COMPILE_TEST
+       depends on MFD_SYSCON
        default ARCH_BRCMSTB
        help
          This driver provides restart support for Broadcom STB boards.
index 01c7055..ca461eb 100644 (file)
@@ -212,9 +212,9 @@ static int at91_reset_platform_probe(struct platform_device *pdev)
                res = platform_get_resource(pdev, IORESOURCE_MEM, idx + 1 );
                at91_ramc_base[idx] = devm_ioremap(&pdev->dev, res->start,
                                                   resource_size(res));
-               if (IS_ERR(at91_ramc_base[idx])) {
+               if (!at91_ramc_base[idx]) {
                        dev_err(&pdev->dev, "Could not map ram controller address\n");
-                       return PTR_ERR(at91_ramc_base[idx]);
+                       return -ENOMEM;
                }
        }
 
index 7ef193b..1e08195 100644 (file)
@@ -120,18 +120,7 @@ static enum hrtimer_restart ltc2952_poweroff_timer_wde(struct hrtimer *timer)
 
 static void ltc2952_poweroff_start_wde(struct ltc2952_poweroff *data)
 {
-       if (hrtimer_start(&data->timer_wde, data->wde_interval,
-                         HRTIMER_MODE_REL)) {
-               /*
-                * The device will not toggle the watchdog reset,
-                * thus shut down is only safe if the PowerPath controller
-                * has a long enough time-off before triggering a hardware
-                * power-off.
-                *
-                * Only sending a warning as the system will power-off anyway
-                */
-               dev_err(data->dev, "unable to start the timer\n");
-       }
+       hrtimer_start(&data->timer_wde, data->wde_interval, HRTIMER_MODE_REL);
 }
 
 static enum hrtimer_restart
@@ -165,9 +154,8 @@ static irqreturn_t ltc2952_poweroff_handler(int irq, void *dev_id)
        }
 
        if (gpiod_get_value(data->gpio_trigger)) {
-               if (hrtimer_start(&data->timer_trigger, data->trigger_delay,
-                                 HRTIMER_MODE_REL))
-                       dev_err(data->dev, "unable to start the wait timer\n");
+               hrtimer_start(&data->timer_trigger, data->trigger_delay,
+                             HRTIMER_MODE_REL);
        } else {
                hrtimer_cancel(&data->timer_trigger);
                /* omitting return value check, timer should have been valid */
index 476171a..8a029f9 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/mfd/syscon.h>
 #include <linux/module.h>
 #include <linux/of.h>
+#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pwm.h>
 #include <linux/regmap.h>
 #define PERIP_PWM_PDM_CONTROL_CH_MASK          0x1
 #define PERIP_PWM_PDM_CONTROL_CH_SHIFT(ch)     ((ch) * 4)
 
-#define MAX_TMBASE_STEPS                       65536
+/*
+ * PWM period is specified with a timebase register,
+ * in number of step periods. The PWM duty cycle is also
+ * specified in step periods, in the [0, $timebase] range.
+ * In other words, the timebase imposes the duty cycle
+ * resolution. Therefore, let's constraint the timebase to
+ * a minimum value to allow a sane range of duty cycle values.
+ * Imposing a minimum timebase, will impose a maximum PWM frequency.
+ *
+ * The value chosen is completely arbitrary.
+ */
+#define MIN_TMBASE_STEPS                       16
+
+struct img_pwm_soc_data {
+       u32 max_timebase;
+};
 
 struct img_pwm_chip {
        struct device   *dev;
@@ -47,6 +63,9 @@ struct img_pwm_chip {
        struct clk      *sys_clk;
        void __iomem    *base;
        struct regmap   *periph_regs;
+       int             max_period_ns;
+       int             min_period_ns;
+       const struct img_pwm_soc_data   *data;
 };
 
 static inline struct img_pwm_chip *to_img_pwm_chip(struct pwm_chip *chip)
@@ -72,24 +91,31 @@ static int img_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
        u32 val, div, duty, timebase;
        unsigned long mul, output_clk_hz, input_clk_hz;
        struct img_pwm_chip *pwm_chip = to_img_pwm_chip(chip);
+       unsigned int max_timebase = pwm_chip->data->max_timebase;
+
+       if (period_ns < pwm_chip->min_period_ns ||
+           period_ns > pwm_chip->max_period_ns) {
+               dev_err(chip->dev, "configured period not in range\n");
+               return -ERANGE;
+       }
 
        input_clk_hz = clk_get_rate(pwm_chip->pwm_clk);
        output_clk_hz = DIV_ROUND_UP(NSEC_PER_SEC, period_ns);
 
        mul = DIV_ROUND_UP(input_clk_hz, output_clk_hz);
-       if (mul <= MAX_TMBASE_STEPS) {
+       if (mul <= max_timebase) {
                div = PWM_CTRL_CFG_NO_SUB_DIV;
                timebase = DIV_ROUND_UP(mul, 1);
-       } else if (mul <= MAX_TMBASE_STEPS * 8) {
+       } else if (mul <= max_timebase * 8) {
                div = PWM_CTRL_CFG_SUB_DIV0;
                timebase = DIV_ROUND_UP(mul, 8);
-       } else if (mul <= MAX_TMBASE_STEPS * 64) {
+       } else if (mul <= max_timebase * 64) {
                div = PWM_CTRL_CFG_SUB_DIV1;
                timebase = DIV_ROUND_UP(mul, 64);
-       } else if (mul <= MAX_TMBASE_STEPS * 512) {
+       } else if (mul <= max_timebase * 512) {
                div = PWM_CTRL_CFG_SUB_DIV0_DIV1;
                timebase = DIV_ROUND_UP(mul, 512);
-       } else if (mul > MAX_TMBASE_STEPS * 512) {
+       } else if (mul > max_timebase * 512) {
                dev_err(chip->dev,
                        "failed to configure timebase steps/divider value\n");
                return -EINVAL;
@@ -143,11 +169,27 @@ static const struct pwm_ops img_pwm_ops = {
        .owner = THIS_MODULE,
 };
 
+static const struct img_pwm_soc_data pistachio_pwm = {
+       .max_timebase = 255,
+};
+
+static const struct of_device_id img_pwm_of_match[] = {
+       {
+               .compatible = "img,pistachio-pwm",
+               .data = &pistachio_pwm,
+       },
+       { }
+};
+MODULE_DEVICE_TABLE(of, img_pwm_of_match);
+
 static int img_pwm_probe(struct platform_device *pdev)
 {
        int ret;
+       u64 val;
+       unsigned long clk_rate;
        struct resource *res;
        struct img_pwm_chip *pwm;
+       const struct of_device_id *of_dev_id;
 
        pwm = devm_kzalloc(&pdev->dev, sizeof(*pwm), GFP_KERNEL);
        if (!pwm)
@@ -160,6 +202,11 @@ static int img_pwm_probe(struct platform_device *pdev)
        if (IS_ERR(pwm->base))
                return PTR_ERR(pwm->base);
 
+       of_dev_id = of_match_device(img_pwm_of_match, &pdev->dev);
+       if (!of_dev_id)
+               return -ENODEV;
+       pwm->data = of_dev_id->data;
+
        pwm->periph_regs = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
                                                           "img,cr-periph");
        if (IS_ERR(pwm->periph_regs))
@@ -189,6 +236,17 @@ static int img_pwm_probe(struct platform_device *pdev)
                goto disable_sysclk;
        }
 
+       clk_rate = clk_get_rate(pwm->pwm_clk);
+
+       /* The maximum input clock divider is 512 */
+       val = (u64)NSEC_PER_SEC * 512 * pwm->data->max_timebase;
+       do_div(val, clk_rate);
+       pwm->max_period_ns = val;
+
+       val = (u64)NSEC_PER_SEC * MIN_TMBASE_STEPS;
+       do_div(val, clk_rate);
+       pwm->min_period_ns = val;
+
        pwm->chip.dev = &pdev->dev;
        pwm->chip.ops = &img_pwm_ops;
        pwm->chip.base = -1;
@@ -228,12 +286,6 @@ static int img_pwm_remove(struct platform_device *pdev)
        return pwmchip_remove(&pwm_chip->chip);
 }
 
-static const struct of_device_id img_pwm_of_match[] = {
-       { .compatible = "img,pistachio-pwm", },
-       { }
-};
-MODULE_DEVICE_TABLE(of, img_pwm_of_match);
-
 static struct platform_driver img_pwm_driver = {
        .driver = {
                .name = "img-pwm",
index 8a4df7a..e628d4c 100644 (file)
@@ -394,6 +394,7 @@ static inline struct da9052_regulator_info *find_regulator_info(u8 chip_id,
 
 static int da9052_regulator_probe(struct platform_device *pdev)
 {
+       const struct mfd_cell *cell = mfd_get_cell(pdev);
        struct regulator_config config = { };
        struct da9052_regulator *regulator;
        struct da9052 *da9052;
@@ -409,7 +410,7 @@ static int da9052_regulator_probe(struct platform_device *pdev)
        regulator->da9052 = da9052;
 
        regulator->info = find_regulator_info(regulator->da9052->chip_id,
-                                             pdev->id);
+                                             cell->id);
        if (regulator->info == NULL) {
                dev_err(&pdev->dev, "invalid regulator ID specified\n");
                return -EINVAL;
@@ -419,7 +420,7 @@ static int da9052_regulator_probe(struct platform_device *pdev)
        config.driver_data = regulator;
        config.regmap = da9052->regmap;
        if (pdata && pdata->regulators) {
-               config.init_data = pdata->regulators[pdev->id];
+               config.init_data = pdata->regulators[cell->id];
        } else {
 #ifdef CONFIG_OF
                struct device_node *nproot = da9052->dev->of_node;
index cb70ced..4b62d1a 100644 (file)
@@ -64,7 +64,7 @@ static void rtc_delayed_write(u32 val, struct armada38x_rtc *rtc, int offset)
 static int armada38x_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
        struct armada38x_rtc *rtc = dev_get_drvdata(dev);
-       unsigned long time, time_check, flags;
+       unsigned long time, time_check;
 
        mutex_lock(&rtc->mutex_time);
        time = readl(rtc->regs + RTC_TIME);
index f0b9871..3ba6114 100644 (file)
@@ -1158,11 +1158,12 @@ static ssize_t poll_timeout_store(struct bus_type *bus, const char *buf,
        poll_timeout = time;
        hr_time = ktime_set(0, poll_timeout);
 
-       if (!hrtimer_is_queued(&ap_poll_timer) ||
-           !hrtimer_forward(&ap_poll_timer, hrtimer_get_expires(&ap_poll_timer), hr_time)) {
-               hrtimer_set_expires(&ap_poll_timer, hr_time);
-               hrtimer_start_expires(&ap_poll_timer, HRTIMER_MODE_ABS);
-       }
+       spin_lock_bh(&ap_poll_timer_lock);
+       hrtimer_cancel(&ap_poll_timer);
+       hrtimer_set_expires(&ap_poll_timer, hr_time);
+       hrtimer_start_expires(&ap_poll_timer, HRTIMER_MODE_ABS);
+       spin_unlock_bh(&ap_poll_timer_lock);
+
        return count;
 }
 
@@ -1528,14 +1529,11 @@ static inline void __ap_schedule_poll_timer(void)
        ktime_t hr_time;
 
        spin_lock_bh(&ap_poll_timer_lock);
-       if (hrtimer_is_queued(&ap_poll_timer) || ap_suspend_flag)
-               goto out;
-       if (ktime_to_ns(hrtimer_expires_remaining(&ap_poll_timer)) <= 0) {
+       if (!hrtimer_is_queued(&ap_poll_timer) && !ap_suspend_flag) {
                hr_time = ktime_set(0, poll_timeout);
                hrtimer_forward_now(&ap_poll_timer, hr_time);
                hrtimer_restart(&ap_poll_timer);
        }
-out:
        spin_unlock_bh(&ap_poll_timer_lock);
 }
 
@@ -1952,7 +1950,7 @@ static void ap_reset_domain(void)
 {
        int i;
 
-       if (ap_domain_index != -1)
+       if ((ap_domain_index != -1) && (ap_test_config_domain(ap_domain_index)))
                for (i = 0; i < AP_DEVICES; i++)
                        ap_reset_queue(AP_MKQID(i, ap_domain_index));
 }
@@ -2097,7 +2095,6 @@ void ap_module_exit(void)
        hrtimer_cancel(&ap_poll_timer);
        destroy_workqueue(ap_work_queue);
        tasklet_kill(&ap_tasklet);
-       root_device_unregister(ap_root_device);
        while ((dev = bus_find_device(&ap_bus_type, NULL, NULL,
                    __ap_match_all)))
        {
@@ -2106,6 +2103,7 @@ void ap_module_exit(void)
        }
        for (i = 0; ap_bus_attrs[i]; i++)
                bus_remove_file(&ap_bus_type, ap_bus_attrs[i]);
+       root_device_unregister(ap_root_device);
        bus_unregister(&ap_bus_type);
        unregister_reset_call(&ap_reset_call);
        if (ap_using_interrupts())
index 81e83a6..3207009 100644 (file)
@@ -1,5 +1,5 @@
 /**
- * Copyright (C) 2005 - 2014 Emulex
+ * Copyright (C) 2005 - 2015 Avago Technologies
  * All rights reserved.
  *
  * This program is free software; you can redistribute it and/or
@@ -8,9 +8,9 @@
  * Public License is included in this distribution in the file called COPYING.
  *
  * Contact Information:
- * linux-drivers@emulex.com
+ * linux-drivers@avagotech.com
  *
- * Emulex
+ * Avago Technologies
  * 3333 Susan Street
  * Costa Mesa, CA 92626
  */
index 1028760..447cf7c 100644 (file)
@@ -1,5 +1,5 @@
 /**
- * Copyright (C) 2005 - 2014 Emulex
+ * Copyright (C) 2005 - 2015 Avago Technologies
  * All rights reserved.
  *
  * This program is free software; you can redistribute it and/or
@@ -8,9 +8,9 @@
  * Public License is included in this distribution in the file called COPYING.
  *
  * Contact Information:
- * linux-drivers@emulex.com
+ * linux-drivers@avagotech.com
  *
- * Emulex
+ * Avago Technologies
  * 3333 Susan Street
  * Costa Mesa, CA 92626
  */
index 9889743..f11d325 100644 (file)
@@ -1,5 +1,5 @@
 /**
- * Copyright (C) 2005 - 2014 Emulex
+ * Copyright (C) 2005 - 2015 Avago Technologies
  * All rights reserved.
  *
  * This program is free software; you can redistribute it and/or
@@ -8,9 +8,9 @@
  * Public License is included in this distribution in the file called COPYING.
  *
  * Contact Information:
- * linux-drivers@emulex.com
+ * linux-drivers@avagotech.com
  *
- * Emulex
+ * Avago Technologies
  * 3333 Susan Street
  * Costa Mesa, CA 92626
  */
index b7391a3..2f07007 100644 (file)
@@ -1,5 +1,5 @@
 /**
- * Copyright (C) 2005 - 2014 Emulex
+ * Copyright (C) 2005 - 2015 Avago Technologies
  * All rights reserved.
  *
  * This program is free software; you can redistribute it and/or
@@ -7,12 +7,12 @@
  * as published by the Free Software Foundation.  The full GNU General
  * Public License is included in this distribution in the file called COPYING.
  *
- * Written by: Jayamohan Kallickal (jayamohan.kallickal@emulex.com)
+ * Written by: Jayamohan Kallickal (jayamohan.kallickal@avagotech.com)
  *
  * Contact Information:
- * linux-drivers@emulex.com
+ * linux-drivers@avagotech.com
  *
- * Emulex
+ * Avago Technologies
  * 3333 Susan Street
  * Costa Mesa, CA 92626
  */
index e0b3b2d..0c84e1c 100644 (file)
@@ -1,5 +1,5 @@
 /**
- * Copyright (C) 2005 - 2014 Emulex
+ * Copyright (C) 2005 - 2015 Avago Technologies
  * All rights reserved.
  *
  * This program is free software; you can redistribute it and/or
@@ -7,12 +7,12 @@
  * as published by the Free Software Foundation.  The full GNU General
  * Public License is included in this distribution in the file called COPYING.
  *
- * Written by: Jayamohan Kallickal (jayamohan.kallickal@emulex.com)
+ * Written by: Jayamohan Kallickal (jayamohan.kallickal@avagotech.com)
  *
  * Contact Information:
- * linux-drivers@emulex.com
+ * linux-drivers@avagotech.com
  *
- * Emulex
+ * Avago Technologies
  * 3333 Susan Street
  * Costa Mesa, CA 92626
  */
index 923a2b5..1f74760 100644 (file)
@@ -1,5 +1,5 @@
 /**
- * Copyright (C) 2005 - 2014 Emulex
+ * Copyright (C) 2005 - 2015 Avago Technologies
  * All rights reserved.
  *
  * This program is free software; you can redistribute it and/or
@@ -7,12 +7,12 @@
  * as published by the Free Software Foundation.  The full GNU General
  * Public License is included in this distribution in the file called COPYING.
  *
- * Written by: Jayamohan Kallickal (jayamohan.kallickal@emulex.com)
+ * Written by: Jayamohan Kallickal (jayamohan.kallickal@avagotech.com)
  *
  * Contact Information:
- * linux-drivers@emulex.com
+ * linux-drivers@avagotech.com
  *
- * Emulex
+ * Avago Technologies
  * 3333 Susan Street
  * Costa Mesa, CA 92626
  */
@@ -50,7 +50,7 @@ static unsigned int enable_msix = 1;
 
 MODULE_DESCRIPTION(DRV_DESC " " BUILD_STR);
 MODULE_VERSION(BUILD_STR);
-MODULE_AUTHOR("Emulex Corporation");
+MODULE_AUTHOR("Avago Technologies");
 MODULE_LICENSE("GPL");
 module_param(be_iopoll_budget, int, 0);
 module_param(enable_msix, int, 0);
@@ -552,7 +552,7 @@ MODULE_DEVICE_TABLE(pci, beiscsi_pci_id_table);
 
 static struct scsi_host_template beiscsi_sht = {
        .module = THIS_MODULE,
-       .name = "Emulex 10Gbe open-iscsi Initiator Driver",
+       .name = "Avago Technologies 10Gbe open-iscsi Initiator Driver",
        .proc_name = DRV_NAME,
        .queuecommand = iscsi_queuecommand,
        .change_queue_depth = scsi_change_queue_depth,
index 7ee0ffc..e70ea26 100644 (file)
@@ -1,5 +1,5 @@
 /**
- * Copyright (C) 2005 - 2014 Emulex
+ * Copyright (C) 2005 - 2015 Avago Technologies
  * All rights reserved.
  *
  * This program is free software; you can redistribute it and/or
@@ -7,12 +7,12 @@
  * as published by the Free Software Foundation.  The full GNU General
  * Public License is included in this distribution in the file called COPYING.
  *
- * Written by: Jayamohan Kallickal (jayamohan.kallickal@emulex.com)
+ * Written by: Jayamohan Kallickal (jayamohan.kallickal@avagotech.com)
  *
  * Contact Information:
- * linux-drivers@emulex.com
+ * linux-drivers@avagotech.com
  *
- * Emulex
+ * Avago Technologies
  * 3333 Susan Street
  * Costa Mesa, CA 92626
  */
@@ -37,7 +37,7 @@
 
 #define DRV_NAME               "be2iscsi"
 #define BUILD_STR              "10.4.114.0"
-#define BE_NAME                        "Emulex OneConnect" \
+#define BE_NAME                        "Avago Technologies OneConnect" \
                                "Open-iSCSI Driver version" BUILD_STR
 #define DRV_DESC               BE_NAME " " "Driver"
 
index 681d4e8..c2c4d69 100644 (file)
@@ -1,5 +1,5 @@
 /**
- * Copyright (C) 2005 - 2014 Emulex
+ * Copyright (C) 2005 - 2015 Avago Technologies
  * All rights reserved.
  *
  * This program is free software; you can redistribute it and/or
@@ -7,12 +7,12 @@
  * as published by the Free Software Foundation.  The full GNU General
  * Public License is included in this distribution in the file called COPYING.
  *
- * Written by: Jayamohan Kallickal (jayamohan.kallickal@emulex.com)
+ * Written by: Jayamohan Kallickal (jayamohan.kallickal@avagotech.com)
  *
  * Contact Information:
- * linux-drivers@emulex.com
+ * linux-drivers@avagotech.com
  *
- * Emulex
+ * Avago Technologies
  * 3333 Susan Street
  * Costa Mesa, CA 92626
  */
index bd81446..9356b9a 100644 (file)
@@ -1,5 +1,5 @@
 /**
- * Copyright (C) 2005 - 2014 Emulex
+ * Copyright (C) 2005 - 2015 Avago Technologies
  * All rights reserved.
  *
  * This program is free software; you can redistribute it and/or
@@ -7,12 +7,12 @@
  * as published by the Free Software Foundation.  The full GNU General
  * Public License is included in this distribution in the file called COPYING.
  *
- * Written by: Jayamohan Kallickal (jayamohan.kallickal@emulex.com)
+ * Written by: Jayamohan Kallickal (jayamohan.kallickal@avagotech.com)
  *
  * Contact Information:
- * linux-drivers@emulex.com
+ * linux-drivers@avagotech.com
  *
- * Emulex
+ * Avago Technologies
  * 3333 Susan Street
  * Costa Mesa, CA 92626
  */
index cb73cf9..c140f99 100644 (file)
@@ -1130,25 +1130,6 @@ lpfc_release_scsi_buf(struct lpfc_hba *phba, struct lpfc_scsi_buf *psb)
 }
 
 /**
- * lpfc_fcpcmd_to_iocb - copy the fcp_cmd data into the IOCB
- * @data: A pointer to the immediate command data portion of the IOCB.
- * @fcp_cmnd: The FCP Command that is provided by the SCSI layer.
- *
- * The routine copies the entire FCP command from @fcp_cmnd to @data while
- * byte swapping the data to big endian format for transmission on the wire.
- **/
-static void
-lpfc_fcpcmd_to_iocb(uint8_t *data, struct fcp_cmnd *fcp_cmnd)
-{
-       int i, j;
-
-       for (i = 0, j = 0; i < sizeof(struct fcp_cmnd);
-            i += sizeof(uint32_t), j++) {
-               ((uint32_t *)data)[j] = cpu_to_be32(((uint32_t *)fcp_cmnd)[j]);
-       }
-}
-
-/**
  * lpfc_scsi_prep_dma_buf_s3 - DMA mapping for scsi buffer to SLI3 IF spec
  * @phba: The Hba for which this call is being executed.
  * @lpfc_cmd: The scsi buffer which is going to be mapped.
@@ -1283,7 +1264,6 @@ lpfc_scsi_prep_dma_buf_s3(struct lpfc_hba *phba, struct lpfc_scsi_buf *lpfc_cmd)
         * we need to set word 4 of IOCB here
         */
        iocb_cmd->un.fcpi.fcpi_parm = scsi_bufflen(scsi_cmnd);
-       lpfc_fcpcmd_to_iocb(iocb_cmd->unsli3.fcp_ext.icd, fcp_cmnd);
        return 0;
 }
 
@@ -4147,6 +4127,24 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn,
 }
 
 /**
+ * lpfc_fcpcmd_to_iocb - copy the fcp_cmd data into the IOCB
+ * @data: A pointer to the immediate command data portion of the IOCB.
+ * @fcp_cmnd: The FCP Command that is provided by the SCSI layer.
+ *
+ * The routine copies the entire FCP command from @fcp_cmnd to @data while
+ * byte swapping the data to big endian format for transmission on the wire.
+ **/
+static void
+lpfc_fcpcmd_to_iocb(uint8_t *data, struct fcp_cmnd *fcp_cmnd)
+{
+       int i, j;
+       for (i = 0, j = 0; i < sizeof(struct fcp_cmnd);
+            i += sizeof(uint32_t), j++) {
+               ((uint32_t *)data)[j] = cpu_to_be32(((uint32_t *)fcp_cmnd)[j]);
+       }
+}
+
+/**
  * lpfc_scsi_prep_cmnd - Wrapper func for convert scsi cmnd to FCP info unit
  * @vport: The virtual port for which this call is being executed.
  * @lpfc_cmd: The scsi command which needs to send.
@@ -4225,6 +4223,9 @@ lpfc_scsi_prep_cmnd(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd,
                fcp_cmnd->fcpCntl3 = 0;
                phba->fc4ControlRequests++;
        }
+       if (phba->sli_rev == 3 &&
+           !(phba->sli3_options & LPFC_SLI3_BG_ENABLED))
+               lpfc_fcpcmd_to_iocb(iocb_cmd->unsli3.fcp_ext.icd, fcp_cmnd);
        /*
         * Finish initializing those IOCB fields that are independent
         * of the scsi_cmnd request_buffer
index 68c2002..5c9e680 100644 (file)
@@ -1020,8 +1020,7 @@ static void tcm_qla2xxx_depend_tpg(struct work_struct *work)
        struct se_portal_group *se_tpg = &base_tpg->se_tpg;
        struct scsi_qla_host *base_vha = base_tpg->lport->qla_vha;
 
-       if (!configfs_depend_item(se_tpg->se_tpg_tfo->tf_subsys,
-                                 &se_tpg->tpg_group.cg_item)) {
+       if (!target_depend_item(&se_tpg->tpg_group.cg_item)) {
                atomic_set(&base_tpg->lport_tpg_enabled, 1);
                qlt_enable_vha(base_vha);
        }
@@ -1037,8 +1036,7 @@ static void tcm_qla2xxx_undepend_tpg(struct work_struct *work)
 
        if (!qlt_stop_phase1(base_vha->vha_tgt.qla_tgt)) {
                atomic_set(&base_tpg->lport_tpg_enabled, 0);
-               configfs_undepend_item(se_tpg->se_tpg_tfo->tf_subsys,
-                                      &se_tpg->tpg_group.cg_item);
+               target_undepend_item(&se_tpg->tpg_group.cg_item);
        }
        complete(&base_tpg->tpg_base_comp);
 }
index 79beebf..7f9d65f 100644 (file)
@@ -1600,6 +1600,7 @@ static unsigned int sd_completed_bytes(struct scsi_cmnd *scmd)
 {
        u64 start_lba = blk_rq_pos(scmd->request);
        u64 end_lba = blk_rq_pos(scmd->request) + (scsi_bufflen(scmd) / 512);
+       u64 factor = scmd->device->sector_size / 512;
        u64 bad_lba;
        int info_valid;
        /*
@@ -1621,16 +1622,9 @@ static unsigned int sd_completed_bytes(struct scsi_cmnd *scmd)
        if (scsi_bufflen(scmd) <= scmd->device->sector_size)
                return 0;
 
-       if (scmd->device->sector_size < 512) {
-               /* only legitimate sector_size here is 256 */
-               start_lba <<= 1;
-               end_lba <<= 1;
-       } else {
-               /* be careful ... don't want any overflows */
-               unsigned int factor = scmd->device->sector_size / 512;
-               do_div(start_lba, factor);
-               do_div(end_lba, factor);
-       }
+       /* be careful ... don't want any overflows */
+       do_div(start_lba, factor);
+       do_div(end_lba, factor);
 
        /* The bad lba was reported incorrectly, we have no idea where
         * the error is.
@@ -2188,8 +2182,7 @@ got_data:
        if (sector_size != 512 &&
            sector_size != 1024 &&
            sector_size != 2048 &&
-           sector_size != 4096 &&
-           sector_size != 256) {
+           sector_size != 4096) {
                sd_printk(KERN_NOTICE, sdkp, "Unsupported sector size %d.\n",
                          sector_size);
                /*
@@ -2244,8 +2237,6 @@ got_data:
                sdkp->capacity <<= 2;
        else if (sector_size == 1024)
                sdkp->capacity <<= 1;
-       else if (sector_size == 256)
-               sdkp->capacity >>= 1;
 
        blk_queue_physical_block_size(sdp->request_queue,
                                      sdkp->physical_block_size);
index d9dad90..3c6584f 100644 (file)
@@ -1600,8 +1600,7 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd)
                break;
        default:
                vm_srb->data_in = UNKNOWN_TYPE;
-               vm_srb->win8_extension.srb_flags |= (SRB_FLAGS_DATA_IN |
-                                                    SRB_FLAGS_DATA_OUT);
+               vm_srb->win8_extension.srb_flags |= SRB_FLAGS_NO_DATA_TRANSFER;
                break;
        }
 
index bcdb22d..3c18503 100644 (file)
@@ -4,6 +4,7 @@
 config MTK_PMIC_WRAP
        tristate "MediaTek PMIC Wrapper Support"
        depends on ARCH_MEDIATEK
+       depends on RESET_CONTROLLER
        select REGMAP
        help
          Say yes here to add support for MediaTek PMIC Wrapper found
index db5be1e..f432291 100644 (file)
@@ -443,11 +443,6 @@ static int pwrap_wait_for_state(struct pmic_wrapper *wrp,
 static int pwrap_write(struct pmic_wrapper *wrp, u32 adr, u32 wdata)
 {
        int ret;
-       u32 val;
-
-       val = pwrap_readl(wrp, PWRAP_WACS2_RDATA);
-       if (PWRAP_GET_WACS_FSM(val) == PWRAP_WACS_FSM_WFVLDCLR)
-               pwrap_writel(wrp, 1, PWRAP_WACS2_VLDCLR);
 
        ret = pwrap_wait_for_state(wrp, pwrap_is_fsm_idle);
        if (ret)
@@ -462,11 +457,6 @@ static int pwrap_write(struct pmic_wrapper *wrp, u32 adr, u32 wdata)
 static int pwrap_read(struct pmic_wrapper *wrp, u32 adr, u32 *rdata)
 {
        int ret;
-       u32 val;
-
-       val = pwrap_readl(wrp, PWRAP_WACS2_RDATA);
-       if (PWRAP_GET_WACS_FSM(val) == PWRAP_WACS_FSM_WFVLDCLR)
-               pwrap_writel(wrp, 1, PWRAP_WACS2_VLDCLR);
 
        ret = pwrap_wait_for_state(wrp, pwrap_is_fsm_idle);
        if (ret)
@@ -480,6 +470,8 @@ static int pwrap_read(struct pmic_wrapper *wrp, u32 adr, u32 *rdata)
 
        *rdata = PWRAP_GET_WACS_RDATA(pwrap_readl(wrp, PWRAP_WACS2_RDATA));
 
+       pwrap_writel(wrp, 1, PWRAP_WACS2_VLDCLR);
+
        return 0;
 }
 
@@ -563,45 +555,17 @@ static int pwrap_init_sidly(struct pmic_wrapper *wrp)
 
 static int pwrap_init_reg_clock(struct pmic_wrapper *wrp)
 {
-       unsigned long rate_spi;
-       int ck_mhz;
-
-       rate_spi = clk_get_rate(wrp->clk_spi);
-
-       if (rate_spi > 26000000)
-               ck_mhz = 26;
-       else if (rate_spi > 18000000)
-               ck_mhz = 18;
-       else
-               ck_mhz = 0;
-
-       switch (ck_mhz) {
-       case 18:
-               if (pwrap_is_mt8135(wrp))
-                       pwrap_writel(wrp, 0xc, PWRAP_CSHEXT);
-               pwrap_writel(wrp, 0x4, PWRAP_CSHEXT_WRITE);
-               pwrap_writel(wrp, 0xc, PWRAP_CSHEXT_READ);
-               pwrap_writel(wrp, 0x0, PWRAP_CSLEXT_START);
-               pwrap_writel(wrp, 0x0, PWRAP_CSLEXT_END);
-               break;
-       case 26:
-               if (pwrap_is_mt8135(wrp))
-                       pwrap_writel(wrp, 0x4, PWRAP_CSHEXT);
+       if (pwrap_is_mt8135(wrp)) {
+               pwrap_writel(wrp, 0x4, PWRAP_CSHEXT);
                pwrap_writel(wrp, 0x0, PWRAP_CSHEXT_WRITE);
                pwrap_writel(wrp, 0x4, PWRAP_CSHEXT_READ);
                pwrap_writel(wrp, 0x0, PWRAP_CSLEXT_START);
                pwrap_writel(wrp, 0x0, PWRAP_CSLEXT_END);
-               break;
-       case 0:
-               if (pwrap_is_mt8135(wrp))
-                       pwrap_writel(wrp, 0xf, PWRAP_CSHEXT);
-               pwrap_writel(wrp, 0xf, PWRAP_CSHEXT_WRITE);
-               pwrap_writel(wrp, 0xf, PWRAP_CSHEXT_READ);
-               pwrap_writel(wrp, 0xf, PWRAP_CSLEXT_START);
-               pwrap_writel(wrp, 0xf, PWRAP_CSLEXT_END);
-               break;
-       default:
-               return -EINVAL;
+       } else {
+               pwrap_writel(wrp, 0x0, PWRAP_CSHEXT_WRITE);
+               pwrap_writel(wrp, 0x4, PWRAP_CSHEXT_READ);
+               pwrap_writel(wrp, 0x2, PWRAP_CSLEXT_START);
+               pwrap_writel(wrp, 0x2, PWRAP_CSLEXT_END);
        }
 
        return 0;
index 198f96b..72b0590 100644 (file)
@@ -78,6 +78,7 @@ config SPI_ATMEL
 config SPI_BCM2835
        tristate "BCM2835 SPI controller"
        depends on ARCH_BCM2835 || COMPILE_TEST
+       depends on GPIOLIB
        help
          This selects a driver for the Broadcom BCM2835 SPI master.
 
@@ -302,7 +303,7 @@ config SPI_FSL_SPI
 config SPI_FSL_DSPI
        tristate "Freescale DSPI controller"
        select REGMAP_MMIO
-       depends on SOC_VF610 || COMPILE_TEST
+       depends on SOC_VF610 || SOC_LS1021A || COMPILE_TEST
        help
          This enables support for the Freescale DSPI controller in master
          mode. VF610 platform uses the controller.
index f63864a..37875cf 100644 (file)
@@ -164,13 +164,12 @@ static int bcm2835_spi_transfer_one_poll(struct spi_master *master,
                                         unsigned long xfer_time_us)
 {
        struct bcm2835_spi *bs = spi_master_get_devdata(master);
-       unsigned long timeout = jiffies +
-               max(4 * xfer_time_us * HZ / 1000000, 2uL);
+       /* set timeout to 1 second of maximum polling */
+       unsigned long timeout = jiffies + HZ;
 
        /* enable HW block without interrupts */
        bcm2835_wr(bs, BCM2835_SPI_CS, cs | BCM2835_SPI_CS_TA);
 
-       /* set timeout to 4x the expected time, or 2 jiffies */
        /* loop until finished the transfer */
        while (bs->rx_len) {
                /* read from fifo as much as possible */
index 5ef6638..840a498 100644 (file)
@@ -180,7 +180,6 @@ int spi_bitbang_setup(struct spi_device *spi)
 {
        struct spi_bitbang_cs   *cs = spi->controller_state;
        struct spi_bitbang      *bitbang;
-       int                     retval;
        unsigned long           flags;
 
        bitbang = spi_master_get_devdata(spi->master);
@@ -197,9 +196,11 @@ int spi_bitbang_setup(struct spi_device *spi)
        if (!cs->txrx_word)
                return -EINVAL;
 
-       retval = bitbang->setup_transfer(spi, NULL);
-       if (retval < 0)
-               return retval;
+       if (bitbang->setup_transfer) {
+               int retval = bitbang->setup_transfer(spi, NULL);
+               if (retval < 0)
+                       return retval;
+       }
 
        dev_dbg(&spi->dev, "%s, %u nsec/bit\n", __func__, 2 * cs->nsecs);
 
@@ -295,9 +296,11 @@ static int spi_bitbang_transfer_one(struct spi_master *master,
 
                /* init (-1) or override (1) transfer params */
                if (do_setup != 0) {
-                       status = bitbang->setup_transfer(spi, t);
-                       if (status < 0)
-                               break;
+                       if (bitbang->setup_transfer) {
+                               status = bitbang->setup_transfer(spi, t);
+                               if (status < 0)
+                                       break;
+                       }
                        if (do_setup == -1)
                                do_setup = 0;
                }
index 9c46a30..896add8 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/of_address.h>
 #include <linux/spi/spi.h>
 #include <linux/types.h>
+#include <linux/platform_device.h>
 
 #include "spi-fsl-cpm.h"
 #include "spi-fsl-lib.h"
@@ -269,17 +270,6 @@ static unsigned long fsl_spi_cpm_get_pram(struct mpc8xxx_spi *mspi)
        if (mspi->flags & SPI_CPM2) {
                pram_ofs = cpm_muram_alloc(SPI_PRAM_SIZE, 64);
                out_be16(spi_base, pram_ofs);
-       } else {
-               struct spi_pram __iomem *pram = spi_base;
-               u16 rpbase = in_be16(&pram->rpbase);
-
-               /* Microcode relocation patch applied? */
-               if (rpbase) {
-                       pram_ofs = rpbase;
-               } else {
-                       pram_ofs = cpm_muram_alloc(SPI_PRAM_SIZE, 64);
-                       out_be16(spi_base, pram_ofs);
-               }
        }
 
        iounmap(spi_base);
@@ -292,7 +282,6 @@ int fsl_spi_cpm_init(struct mpc8xxx_spi *mspi)
        struct device_node *np = dev->of_node;
        const u32 *iprop;
        int size;
-       unsigned long pram_ofs;
        unsigned long bds_ofs;
 
        if (!(mspi->flags & SPI_CPM_MODE))
@@ -319,8 +308,26 @@ int fsl_spi_cpm_init(struct mpc8xxx_spi *mspi)
                }
        }
 
-       pram_ofs = fsl_spi_cpm_get_pram(mspi);
-       if (IS_ERR_VALUE(pram_ofs)) {
+       if (mspi->flags & SPI_CPM1) {
+               struct resource *res;
+               void *pram;
+
+               res = platform_get_resource(to_platform_device(dev),
+                                           IORESOURCE_MEM, 1);
+               pram = devm_ioremap_resource(dev, res);
+               if (IS_ERR(pram))
+                       mspi->pram = NULL;
+               else
+                       mspi->pram = pram;
+       } else {
+               unsigned long pram_ofs = fsl_spi_cpm_get_pram(mspi);
+
+               if (IS_ERR_VALUE(pram_ofs))
+                       mspi->pram = NULL;
+               else
+                       mspi->pram = cpm_muram_addr(pram_ofs);
+       }
+       if (mspi->pram == NULL) {
                dev_err(dev, "can't allocate spi parameter ram\n");
                goto err_pram;
        }
@@ -346,8 +353,6 @@ int fsl_spi_cpm_init(struct mpc8xxx_spi *mspi)
                goto err_dummy_rx;
        }
 
-       mspi->pram = cpm_muram_addr(pram_ofs);
-
        mspi->tx_bd = cpm_muram_addr(bds_ofs);
        mspi->rx_bd = cpm_muram_addr(bds_ofs + sizeof(*mspi->tx_bd));
 
@@ -375,7 +380,8 @@ err_dummy_rx:
 err_dummy_tx:
        cpm_muram_free(bds_ofs);
 err_bds:
-       cpm_muram_free(pram_ofs);
+       if (!(mspi->flags & SPI_CPM1))
+               cpm_muram_free(cpm_muram_offset(mspi->pram));
 err_pram:
        fsl_spi_free_dummy_rx();
        return -ENOMEM;
index d0a73a0..80d245a 100644 (file)
@@ -359,14 +359,16 @@ static void fsl_espi_rw_trans(struct spi_message *m,
                                struct fsl_espi_transfer *trans, u8 *rx_buff)
 {
        struct fsl_espi_transfer *espi_trans = trans;
-       unsigned int n_tx = espi_trans->n_tx;
-       unsigned int n_rx = espi_trans->n_rx;
+       unsigned int total_len = espi_trans->len;
        struct spi_transfer *t;
        u8 *local_buf;
        u8 *rx_buf = rx_buff;
        unsigned int trans_len;
        unsigned int addr;
-       int i, pos, loop;
+       unsigned int tx_only;
+       unsigned int rx_pos = 0;
+       unsigned int pos;
+       int i, loop;
 
        local_buf = kzalloc(SPCOM_TRANLEN_MAX, GFP_KERNEL);
        if (!local_buf) {
@@ -374,36 +376,48 @@ static void fsl_espi_rw_trans(struct spi_message *m,
                return;
        }
 
-       for (pos = 0, loop = 0; pos < n_rx; pos += trans_len, loop++) {
-               trans_len = n_rx - pos;
-               if (trans_len > SPCOM_TRANLEN_MAX - n_tx)
-                       trans_len = SPCOM_TRANLEN_MAX - n_tx;
+       for (pos = 0, loop = 0; pos < total_len; pos += trans_len, loop++) {
+               trans_len = total_len - pos;
 
                i = 0;
+               tx_only = 0;
                list_for_each_entry(t, &m->transfers, transfer_list) {
                        if (t->tx_buf) {
                                memcpy(local_buf + i, t->tx_buf, t->len);
                                i += t->len;
+                               if (!t->rx_buf)
+                                       tx_only += t->len;
                        }
                }
 
+               /* Add additional TX bytes to compensate SPCOM_TRANLEN_MAX */
+               if (loop > 0)
+                       trans_len += tx_only;
+
+               if (trans_len > SPCOM_TRANLEN_MAX)
+                       trans_len = SPCOM_TRANLEN_MAX;
+
+               /* Update device offset */
                if (pos > 0) {
                        addr = fsl_espi_cmd2addr(local_buf);
-                       addr += pos;
+                       addr += rx_pos;
                        fsl_espi_addr2cmd(addr, local_buf);
                }
 
-               espi_trans->n_tx = n_tx;
-               espi_trans->n_rx = trans_len;
-               espi_trans->len = trans_len + n_tx;
+               espi_trans->len = trans_len;
                espi_trans->tx_buf = local_buf;
                espi_trans->rx_buf = local_buf;
                fsl_espi_do_trans(m, espi_trans);
 
-               memcpy(rx_buf + pos, espi_trans->rx_buf + n_tx, trans_len);
+               /* If there is at least one RX byte then copy it to rx_buf */
+               if (tx_only < SPCOM_TRANLEN_MAX)
+                       memcpy(rx_buf + rx_pos, espi_trans->rx_buf + tx_only,
+                                       trans_len - tx_only);
+
+               rx_pos += trans_len - tx_only;
 
                if (loop > 0)
-                       espi_trans->actual_length += espi_trans->len - n_tx;
+                       espi_trans->actual_length += espi_trans->len - tx_only;
                else
                        espi_trans->actual_length += espi_trans->len;
        }
@@ -418,6 +432,7 @@ static int fsl_espi_do_one_msg(struct spi_master *master,
        u8 *rx_buf = NULL;
        unsigned int n_tx = 0;
        unsigned int n_rx = 0;
+       unsigned int xfer_len = 0;
        struct fsl_espi_transfer espi_trans;
 
        list_for_each_entry(t, &m->transfers, transfer_list) {
@@ -427,11 +442,13 @@ static int fsl_espi_do_one_msg(struct spi_master *master,
                        n_rx += t->len;
                        rx_buf = t->rx_buf;
                }
+               if ((t->tx_buf) || (t->rx_buf))
+                       xfer_len += t->len;
        }
 
        espi_trans.n_tx = n_tx;
        espi_trans.n_rx = n_rx;
-       espi_trans.len = n_tx + n_rx;
+       espi_trans.len = xfer_len;
        espi_trans.actual_length = 0;
        espi_trans.status = 0;
 
index 4df8942..d1a5b9f 100644 (file)
@@ -1210,6 +1210,7 @@ static int omap2_mcspi_transfer_one_message(struct spi_master *master,
        struct omap2_mcspi      *mcspi;
        struct omap2_mcspi_dma  *mcspi_dma;
        struct spi_transfer     *t;
+       int status;
 
        spi = m->spi;
        mcspi = spi_master_get_devdata(master);
@@ -1229,7 +1230,8 @@ static int omap2_mcspi_transfer_one_message(struct spi_master *master,
                                        tx_buf ? "tx" : "",
                                        rx_buf ? "rx" : "",
                                        t->bits_per_word);
-                       return -EINVAL;
+                       status = -EINVAL;
+                       goto out;
                }
 
                if (m->is_dma_mapped || len < DMA_MIN_BYTES)
@@ -1241,7 +1243,8 @@ static int omap2_mcspi_transfer_one_message(struct spi_master *master,
                        if (dma_mapping_error(mcspi->dev, t->tx_dma)) {
                                dev_dbg(mcspi->dev, "dma %cX %d bytes error\n",
                                                'T', len);
-                               return -EINVAL;
+                               status = -EINVAL;
+                               goto out;
                        }
                }
                if (mcspi_dma->dma_rx && rx_buf != NULL) {
@@ -1253,14 +1256,19 @@ static int omap2_mcspi_transfer_one_message(struct spi_master *master,
                                if (tx_buf != NULL)
                                        dma_unmap_single(mcspi->dev, t->tx_dma,
                                                        len, DMA_TO_DEVICE);
-                               return -EINVAL;
+                               status = -EINVAL;
+                               goto out;
                        }
                }
        }
 
        omap2_mcspi_work(mcspi, m);
+       /* spi_finalize_current_message() changes the status inside the
+        * spi_message, save the status here. */
+       status = m->status;
+out:
        spi_finalize_current_message(master);
-       return 0;
+       return status;
 }
 
 static int omap2_mcspi_master_setup(struct omap2_mcspi *mcspi)
index d5d7d22..50910d8 100644 (file)
@@ -583,6 +583,15 @@ static int spi_unmap_msg(struct spi_master *master, struct spi_message *msg)
        rx_dev = master->dma_rx->device->dev;
 
        list_for_each_entry(xfer, &msg->transfers, transfer_list) {
+               /*
+                * Restore the original value of tx_buf or rx_buf if they are
+                * NULL.
+                */
+               if (xfer->tx_buf == master->dummy_tx)
+                       xfer->tx_buf = NULL;
+               if (xfer->rx_buf == master->dummy_rx)
+                       xfer->rx_buf = NULL;
+
                if (!master->can_dma(master, msg->spi, xfer))
                        continue;
 
index 0942841..c5352ea 100644 (file)
@@ -621,8 +621,8 @@ static u32 ssb_pmu_get_alp_clock_clk0(struct ssb_chipcommon *cc)
        u32 crystalfreq;
        const struct pmu0_plltab_entry *e = NULL;
 
-       crystalfreq = chipco_read32(cc, SSB_CHIPCO_PMU_CTL) &
-                     SSB_CHIPCO_PMU_CTL_XTALFREQ >> SSB_CHIPCO_PMU_CTL_XTALFREQ_SHIFT;
+       crystalfreq = (chipco_read32(cc, SSB_CHIPCO_PMU_CTL) &
+                      SSB_CHIPCO_PMU_CTL_XTALFREQ)  >> SSB_CHIPCO_PMU_CTL_XTALFREQ_SHIFT;
        e = pmu0_plltab_find_entry(crystalfreq);
        BUG_ON(!e);
        return e->freq * 1000;
@@ -634,7 +634,7 @@ u32 ssb_pmu_get_alp_clock(struct ssb_chipcommon *cc)
 
        switch (bus->chip_id) {
        case 0x5354:
-               ssb_pmu_get_alp_clock_clk0(cc);
+               return ssb_pmu_get_alp_clock_clk0(cc);
        default:
                ssb_err("ERROR: PMU alp clock unknown for device %04X\n",
                        bus->chip_id);
index 15a7ee3..5fe1c22 100644 (file)
@@ -359,12 +359,13 @@ static void ssb_pcicore_init_hostmode(struct ssb_pcicore *pc)
 
        /*
         * Accessing PCI config without a proper delay after devices reset (not
-        * GPIO reset) was causing reboots on WRT300N v1.0.
+        * GPIO reset) was causing reboots on WRT300N v1.0 (BCM4704).
         * Tested delay 850 us lowered reboot chance to 50-80%, 1000 us fixed it
         * completely. Flushing all writes was also tested but with no luck.
+        * The same problem was reported for WRT350N v1 (BCM4705), so we just
+        * sleep here unconditionally.
         */
-       if (pc->dev->bus->chip_id == 0x4704)
-               usleep_range(1000, 2000);
+       usleep_range(1000, 2000);
 
        /* Enable PCI bridge BAR0 prefetch and burst */
        val = PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY;
index 8199b0a..1cf24e4 100644 (file)
@@ -158,7 +158,7 @@ static int up_to_host(struct mux_rx *r)
        unsigned int start_flag;
        unsigned int payload_size;
        unsigned short packet_type;
-       int dummy_cnt;
+       int total_len;
        u32 packet_size_sum = r->offset;
        int index;
        int ret = TO_HOST_INVALID_PACKET;
@@ -176,10 +176,10 @@ static int up_to_host(struct mux_rx *r)
                        break;
                }
 
-               dummy_cnt = ALIGN(MUX_HEADER_SIZE + payload_size, 4);
+               total_len = ALIGN(MUX_HEADER_SIZE + payload_size, 4);
 
                if (len - packet_size_sum <
-                       MUX_HEADER_SIZE + payload_size + dummy_cnt) {
+                       total_len) {
                        pr_err("invalid payload : %d %d %04x\n",
                               payload_size, len, packet_type);
                        break;
@@ -202,7 +202,7 @@ static int up_to_host(struct mux_rx *r)
                        break;
                }
 
-               packet_size_sum += MUX_HEADER_SIZE + payload_size + dummy_cnt;
+               packet_size_sum += total_len;
                if (len - packet_size_sum <= MUX_HEADER_SIZE + 2) {
                        ret = r->callback(NULL,
                                        0,
@@ -361,7 +361,6 @@ static int gdm_mux_send(void *priv_dev, void *data, int len, int tty_index,
        struct mux_pkt_header *mux_header;
        struct mux_tx *t = NULL;
        static u32 seq_num = 1;
-       int dummy_cnt;
        int total_len;
        int ret;
        unsigned long flags;
@@ -374,9 +373,7 @@ static int gdm_mux_send(void *priv_dev, void *data, int len, int tty_index,
 
        spin_lock_irqsave(&mux_dev->write_lock, flags);
 
-       dummy_cnt = ALIGN(MUX_HEADER_SIZE + len, 4);
-
-       total_len = len + MUX_HEADER_SIZE + dummy_cnt;
+       total_len = ALIGN(MUX_HEADER_SIZE + len, 4);
 
        t = alloc_mux_tx(total_len);
        if (!t) {
@@ -392,7 +389,8 @@ static int gdm_mux_send(void *priv_dev, void *data, int len, int tty_index,
        mux_header->packet_type = __cpu_to_le16(packet_type[tty_index]);
 
        memcpy(t->buf+MUX_HEADER_SIZE, data, len);
-       memset(t->buf+MUX_HEADER_SIZE+len, 0, dummy_cnt);
+       memset(t->buf+MUX_HEADER_SIZE+len, 0, total_len - MUX_HEADER_SIZE -
+              len);
 
        t->len = total_len;
        t->callback = cb;
index 3925db1..513c81f 100644 (file)
@@ -189,22 +189,7 @@ static inline int ll_quota_off(struct super_block *sb, int off, int remount)
 #endif
 
 
-
-/*
- * After 3.1, kernel's nameidata.intent.open.flags is different
- * with lustre's lookup_intent.it_flags, as lustre's it_flags'
- * lower bits equal to FMODE_xxx while kernel doesn't transliterate
- * lower bits of nameidata.intent.open.flags to FMODE_xxx.
- * */
 #include <linux/version.h>
-static inline int ll_namei_to_lookup_intent_flag(int flag)
-{
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0)
-       flag = (flag & ~O_ACCMODE) | OPEN_FMODE(flag);
-#endif
-       return flag;
-}
-
 #include <linux/fs.h>
 
 # define ll_umode_t    umode_t
index cc3ab35..f926224 100644 (file)
@@ -87,7 +87,7 @@ static void cfs_cpu_core_siblings(int cpu, cpumask_t *mask)
 /* return cpumask of HTs in the same core */
 static void cfs_cpu_ht_siblings(int cpu, cpumask_t *mask)
 {
-       cpumask_copy(mask, topology_thread_cpumask(cpu));
+       cpumask_copy(mask, topology_sibling_cpumask(cpu));
 }
 
 static void cfs_node_to_cpumask(int node, cpumask_t *mask)
index 5f918e3..528af90 100644 (file)
 #define VM_FAULT_RETRY 0
 #endif
 
-/* Kernel 3.1 kills LOOKUP_CONTINUE, LOOKUP_PARENT is equivalent to it.
- * seem kernel commit 49084c3bb2055c401f3493c13edae14d49128ca0 */
-#ifndef LOOKUP_CONTINUE
-#define LOOKUP_CONTINUE LOOKUP_PARENT
-#endif
-
 /** Only used on client-side for indicating the tail of dir hash/offset. */
 #define LL_DIR_END_OFF   0x7fffffffffffffffULL
 #define LL_DIR_END_OFF_32BIT    0x7fffffffUL
index 3711e67..69b2036 100644 (file)
@@ -118,7 +118,7 @@ failed:
        return rc;
 }
 
-static void *ll_follow_link(struct dentry *dentry, struct nameidata *nd)
+static const char *ll_follow_link(struct dentry *dentry, void **cookie)
 {
        struct inode *inode = d_inode(dentry);
        struct ptlrpc_request *request = NULL;
@@ -126,32 +126,22 @@ static void *ll_follow_link(struct dentry *dentry, struct nameidata *nd)
        char *symname = NULL;
 
        CDEBUG(D_VFSTRACE, "VFS Op\n");
-       /* Limit the recursive symlink depth to 5 instead of default
-        * 8 links when kernel has 4k stack to prevent stack overflow.
-        * For 8k stacks we need to limit it to 7 for local servers. */
-       if (THREAD_SIZE < 8192 && current->link_count >= 6) {
-               rc = -ELOOP;
-       } else if (THREAD_SIZE == 8192 && current->link_count >= 8) {
-               rc = -ELOOP;
-       } else {
-               ll_inode_size_lock(inode);
-               rc = ll_readlink_internal(inode, &request, &symname);
-               ll_inode_size_unlock(inode);
-       }
+       ll_inode_size_lock(inode);
+       rc = ll_readlink_internal(inode, &request, &symname);
+       ll_inode_size_unlock(inode);
        if (rc) {
                ptlrpc_req_finished(request);
-               request = NULL;
-               symname = ERR_PTR(rc);
+               return ERR_PTR(rc);
        }
 
-       nd_set_link(nd, symname);
        /* symname may contain a pointer to the request message buffer,
         * we delay request releasing until ll_put_link then.
         */
-       return request;
+       *cookie = request;
+       return symname;
 }
 
-static void ll_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
+static void ll_put_link(struct inode *unused, void *cookie)
 {
        ptlrpc_req_finished(cookie);
 }
index 8e61421..344189a 100644 (file)
@@ -557,7 +557,7 @@ ptlrpc_server_nthreads_check(struct ptlrpc_service *svc,
                 * there are.
                 */
                /* weight is # of HTs */
-               if (cpumask_weight(topology_thread_cpumask(0)) > 1) {
+               if (cpumask_weight(topology_sibling_cpumask(0)) > 1) {
                        /* depress thread factor for hyper-thread */
                        factor = factor - (factor >> 1) + (factor >> 3);
                }
@@ -2768,7 +2768,7 @@ int ptlrpc_hr_init(void)
 
        init_waitqueue_head(&ptlrpc_hr.hr_waitq);
 
-       weight = cpumask_weight(topology_thread_cpumask(0));
+       weight = cpumask_weight(topology_sibling_cpumask(0));
 
        cfs_percpt_for_each(hrp, i, ptlrpc_hr.hr_partitions) {
                hrp->hrp_cpt = i;
index 5ff4716..784b5ec 100644 (file)
@@ -746,8 +746,8 @@ void oz_hcd_pd_reset(void *hpd, void *hport)
 /*
  * Context: softirq
  */
-void oz_hcd_get_desc_cnf(void *hport, u8 req_id, int status, const u8 *desc,
-                       int length, int offset, int total_size)
+void oz_hcd_get_desc_cnf(void *hport, u8 req_id, u8 status, const u8 *desc,
+                       u8 length, u16 offset, u16 total_size)
 {
        struct oz_port *port = hport;
        struct urb *urb;
@@ -759,8 +759,8 @@ void oz_hcd_get_desc_cnf(void *hport, u8 req_id, int status, const u8 *desc,
        if (!urb)
                return;
        if (status == 0) {
-               int copy_len;
-               int required_size = urb->transfer_buffer_length;
+               unsigned int copy_len;
+               unsigned int required_size = urb->transfer_buffer_length;
 
                if (required_size > total_size)
                        required_size = total_size;
index 4249fa3..d2a6085 100644 (file)
@@ -29,8 +29,8 @@ void oz_usb_request_heartbeat(void *hpd);
 
 /* Confirmation functions.
  */
-void oz_hcd_get_desc_cnf(void *hport, u8 req_id, int status,
-       const u8 *desc, int length, int offset, int total_size);
+void oz_hcd_get_desc_cnf(void *hport, u8 req_id, u8 status,
+       const u8 *desc, u8 length, u16 offset, u16 total_size);
 void oz_hcd_control_cnf(void *hport, u8 req_id, u8 rcode,
        const u8 *data, int data_len);
 
index d434d8c..f660bb1 100644 (file)
@@ -326,7 +326,11 @@ static void oz_usb_handle_ep_data(struct oz_usb_ctx *usb_ctx,
                        struct oz_multiple_fixed *body =
                                (struct oz_multiple_fixed *)data_hdr;
                        u8 *data = body->data;
-                       int n = (len - sizeof(struct oz_multiple_fixed)+1)
+                       unsigned int n;
+                       if (!body->unit_size ||
+                               len < sizeof(struct oz_multiple_fixed) - 1)
+                               break;
+                       n = (len - (sizeof(struct oz_multiple_fixed) - 1))
                                / body->unit_size;
                        while (n--) {
                                oz_hcd_data_ind(usb_ctx->hport, body->endpoint,
@@ -390,10 +394,15 @@ void oz_usb_rx(struct oz_pd *pd, struct oz_elt *elt)
        case OZ_GET_DESC_RSP: {
                        struct oz_get_desc_rsp *body =
                                (struct oz_get_desc_rsp *)usb_hdr;
-                       int data_len = elt->length -
-                                       sizeof(struct oz_get_desc_rsp) + 1;
-                       u16 offs = le16_to_cpu(get_unaligned(&body->offset));
-                       u16 total_size =
+                       u16 offs, total_size;
+                       u8 data_len;
+
+                       if (elt->length < sizeof(struct oz_get_desc_rsp) - 1)
+                               break;
+                       data_len = elt->length -
+                                       (sizeof(struct oz_get_desc_rsp) - 1);
+                       offs = le16_to_cpu(get_unaligned(&body->offset));
+                       total_size =
                                le16_to_cpu(get_unaligned(&body->total_size));
                        oz_dbg(ON, "USB_REQ_GET_DESCRIPTOR - cnf\n");
                        oz_hcd_get_desc_cnf(usb_ctx->hport, body->req_id,
index f1d47a0..ada8d5d 100644 (file)
@@ -898,11 +898,11 @@ static void SwLedControlMode1(struct _adapter *padapter,
                          IS_LED_WPS_BLINKING(pLed))
                                return;
                        if (pLed->bLedLinkBlinkInProgress == true) {
-                               del_timer_sync(&pLed->BlinkTimer);
+                               del_timer(&pLed->BlinkTimer);
                                pLed->bLedLinkBlinkInProgress = false;
                        }
                        if (pLed->bLedBlinkInProgress == true) {
-                               del_timer_sync(&pLed->BlinkTimer);
+                               del_timer(&pLed->BlinkTimer);
                                pLed->bLedBlinkInProgress = false;
                        }
                        pLed->bLedNoLinkBlinkInProgress = true;
@@ -921,11 +921,11 @@ static void SwLedControlMode1(struct _adapter *padapter,
                            IS_LED_WPS_BLINKING(pLed))
                                return;
                        if (pLed->bLedNoLinkBlinkInProgress == true) {
-                               del_timer_sync(&pLed->BlinkTimer);
+                               del_timer(&pLed->BlinkTimer);
                                pLed->bLedNoLinkBlinkInProgress = false;
                        }
                        if (pLed->bLedBlinkInProgress == true) {
-                               del_timer_sync(&pLed->BlinkTimer);
+                               del_timer(&pLed->BlinkTimer);
                                pLed->bLedBlinkInProgress = false;
                        }
                        pLed->bLedLinkBlinkInProgress = true;
@@ -946,15 +946,15 @@ static void SwLedControlMode1(struct _adapter *padapter,
                        if (IS_LED_WPS_BLINKING(pLed))
                                return;
                        if (pLed->bLedNoLinkBlinkInProgress == true) {
-                               del_timer_sync(&pLed->BlinkTimer);
+                               del_timer(&pLed->BlinkTimer);
                                pLed->bLedNoLinkBlinkInProgress = false;
                        }
                        if (pLed->bLedLinkBlinkInProgress == true) {
-                               del_timer_sync(&pLed->BlinkTimer);
+                               del_timer(&pLed->BlinkTimer);
                                 pLed->bLedLinkBlinkInProgress = false;
                        }
                        if (pLed->bLedBlinkInProgress == true) {
-                               del_timer_sync(&pLed->BlinkTimer);
+                               del_timer(&pLed->BlinkTimer);
                                pLed->bLedBlinkInProgress = false;
                        }
                        pLed->bLedScanBlinkInProgress = true;
@@ -975,11 +975,11 @@ static void SwLedControlMode1(struct _adapter *padapter,
                            IS_LED_WPS_BLINKING(pLed))
                                return;
                        if (pLed->bLedNoLinkBlinkInProgress == true) {
-                               del_timer_sync(&pLed->BlinkTimer);
+                               del_timer(&pLed->BlinkTimer);
                                pLed->bLedNoLinkBlinkInProgress = false;
                        }
                        if (pLed->bLedLinkBlinkInProgress == true) {
-                               del_timer_sync(&pLed->BlinkTimer);
+                               del_timer(&pLed->BlinkTimer);
                                pLed->bLedLinkBlinkInProgress = false;
                        }
                        pLed->bLedBlinkInProgress = true;
@@ -998,19 +998,19 @@ static void SwLedControlMode1(struct _adapter *padapter,
        case LED_CTL_START_WPS_BOTTON:
                 if (pLed->bLedWPSBlinkInProgress == false) {
                        if (pLed->bLedNoLinkBlinkInProgress == true) {
-                               del_timer_sync(&pLed->BlinkTimer);
+                               del_timer(&pLed->BlinkTimer);
                                pLed->bLedNoLinkBlinkInProgress = false;
                        }
                        if (pLed->bLedLinkBlinkInProgress == true) {
-                               del_timer_sync(&pLed->BlinkTimer);
+                               del_timer(&pLed->BlinkTimer);
                                 pLed->bLedLinkBlinkInProgress = false;
                        }
                        if (pLed->bLedBlinkInProgress == true) {
-                               del_timer_sync(&pLed->BlinkTimer);
+                               del_timer(&pLed->BlinkTimer);
                                pLed->bLedBlinkInProgress = false;
                        }
                        if (pLed->bLedScanBlinkInProgress == true) {
-                               del_timer_sync(&pLed->BlinkTimer);
+                               del_timer(&pLed->BlinkTimer);
                                pLed->bLedScanBlinkInProgress = false;
                        }
                        pLed->bLedWPSBlinkInProgress = true;
@@ -1025,23 +1025,23 @@ static void SwLedControlMode1(struct _adapter *padapter,
                break;
        case LED_CTL_STOP_WPS:
                if (pLed->bLedNoLinkBlinkInProgress == true) {
-                       del_timer_sync(&pLed->BlinkTimer);
+                       del_timer(&pLed->BlinkTimer);
                        pLed->bLedNoLinkBlinkInProgress = false;
                }
                if (pLed->bLedLinkBlinkInProgress == true) {
-                       del_timer_sync(&pLed->BlinkTimer);
+                       del_timer(&pLed->BlinkTimer);
                         pLed->bLedLinkBlinkInProgress = false;
                }
                if (pLed->bLedBlinkInProgress == true) {
-                       del_timer_sync(&pLed->BlinkTimer);
+                       del_timer(&pLed->BlinkTimer);
                        pLed->bLedBlinkInProgress = false;
                }
                if (pLed->bLedScanBlinkInProgress == true) {
-                       del_timer_sync(&pLed->BlinkTimer);
+                       del_timer(&pLed->BlinkTimer);
                        pLed->bLedScanBlinkInProgress = false;
                }
                if (pLed->bLedWPSBlinkInProgress)
-                       del_timer_sync(&pLed->BlinkTimer);
+                       del_timer(&pLed->BlinkTimer);
                else
                        pLed->bLedWPSBlinkInProgress = true;
                pLed->CurrLedState = LED_BLINK_WPS_STOP;
@@ -1057,7 +1057,7 @@ static void SwLedControlMode1(struct _adapter *padapter,
                break;
        case LED_CTL_STOP_WPS_FAIL:
                if (pLed->bLedWPSBlinkInProgress) {
-                       del_timer_sync(&pLed->BlinkTimer);
+                       del_timer(&pLed->BlinkTimer);
                        pLed->bLedWPSBlinkInProgress = false;
                }
                pLed->bLedNoLinkBlinkInProgress = true;
@@ -1073,23 +1073,23 @@ static void SwLedControlMode1(struct _adapter *padapter,
                pLed->CurrLedState = LED_OFF;
                pLed->BlinkingLedState = LED_OFF;
                if (pLed->bLedNoLinkBlinkInProgress) {
-                       del_timer_sync(&pLed->BlinkTimer);
+                       del_timer(&pLed->BlinkTimer);
                        pLed->bLedNoLinkBlinkInProgress = false;
                }
                if (pLed->bLedLinkBlinkInProgress) {
-                       del_timer_sync(&pLed->BlinkTimer);
+                       del_timer(&pLed->BlinkTimer);
                        pLed->bLedLinkBlinkInProgress = false;
                }
                if (pLed->bLedBlinkInProgress) {
-                       del_timer_sync(&pLed->BlinkTimer);
+                       del_timer(&pLed->BlinkTimer);
                        pLed->bLedBlinkInProgress = false;
                }
                if (pLed->bLedWPSBlinkInProgress) {
-                       del_timer_sync(&pLed->BlinkTimer);
+                       del_timer(&pLed->BlinkTimer);
                        pLed->bLedWPSBlinkInProgress = false;
                }
                if (pLed->bLedScanBlinkInProgress) {
-                       del_timer_sync(&pLed->BlinkTimer);
+                       del_timer(&pLed->BlinkTimer);
                        pLed->bLedScanBlinkInProgress = false;
                }
                mod_timer(&pLed->BlinkTimer,
@@ -1116,7 +1116,7 @@ static void SwLedControlMode2(struct _adapter *padapter,
                                return;
 
                        if (pLed->bLedBlinkInProgress == true) {
-                               del_timer_sync(&pLed->BlinkTimer);
+                               del_timer(&pLed->BlinkTimer);
                                pLed->bLedBlinkInProgress = false;
                        }
                        pLed->bLedScanBlinkInProgress = true;
@@ -1154,11 +1154,11 @@ static void SwLedControlMode2(struct _adapter *padapter,
                pLed->CurrLedState = LED_ON;
                pLed->BlinkingLedState = LED_ON;
                if (pLed->bLedBlinkInProgress) {
-                       del_timer_sync(&pLed->BlinkTimer);
+                       del_timer(&pLed->BlinkTimer);
                        pLed->bLedBlinkInProgress = false;
                }
                if (pLed->bLedScanBlinkInProgress) {
-                       del_timer_sync(&pLed->BlinkTimer);
+                       del_timer(&pLed->BlinkTimer);
                        pLed->bLedScanBlinkInProgress = false;
                }
 
@@ -1170,11 +1170,11 @@ static void SwLedControlMode2(struct _adapter *padapter,
        case LED_CTL_START_WPS_BOTTON:
                if (pLed->bLedWPSBlinkInProgress == false) {
                        if (pLed->bLedBlinkInProgress == true) {
-                               del_timer_sync(&pLed->BlinkTimer);
+                               del_timer(&pLed->BlinkTimer);
                                pLed->bLedBlinkInProgress = false;
                        }
                        if (pLed->bLedScanBlinkInProgress == true) {
-                               del_timer_sync(&pLed->BlinkTimer);
+                               del_timer(&pLed->BlinkTimer);
                                pLed->bLedScanBlinkInProgress = false;
                        }
                        pLed->bLedWPSBlinkInProgress = true;
@@ -1214,15 +1214,15 @@ static void SwLedControlMode2(struct _adapter *padapter,
                pLed->CurrLedState = LED_OFF;
                pLed->BlinkingLedState = LED_OFF;
                if (pLed->bLedBlinkInProgress) {
-                       del_timer_sync(&pLed->BlinkTimer);
+                       del_timer(&pLed->BlinkTimer);
                        pLed->bLedBlinkInProgress = false;
                }
                if (pLed->bLedScanBlinkInProgress) {
-                       del_timer_sync(&pLed->BlinkTimer);
+                       del_timer(&pLed->BlinkTimer);
                        pLed->bLedScanBlinkInProgress = false;
                }
                if (pLed->bLedWPSBlinkInProgress) {
-                       del_timer_sync(&pLed->BlinkTimer);
+                       del_timer(&pLed->BlinkTimer);
                        pLed->bLedWPSBlinkInProgress = false;
                }
                mod_timer(&pLed->BlinkTimer,
@@ -1248,7 +1248,7 @@ static void SwLedControlMode3(struct _adapter *padapter,
                        if (IS_LED_WPS_BLINKING(pLed))
                                return;
                        if (pLed->bLedBlinkInProgress == true) {
-                               del_timer_sync(&pLed->BlinkTimer);
+                               del_timer(&pLed->BlinkTimer);
                                pLed->bLedBlinkInProgress = false;
                        }
                        pLed->bLedScanBlinkInProgress = true;
@@ -1286,11 +1286,11 @@ static void SwLedControlMode3(struct _adapter *padapter,
                pLed->CurrLedState = LED_ON;
                pLed->BlinkingLedState = LED_ON;
                if (pLed->bLedBlinkInProgress) {
-                       del_timer_sync(&pLed->BlinkTimer);
+                       del_timer(&pLed->BlinkTimer);
                        pLed->bLedBlinkInProgress = false;
                }
                if (pLed->bLedScanBlinkInProgress) {
-                       del_timer_sync(&pLed->BlinkTimer);
+                       del_timer(&pLed->BlinkTimer);
                        pLed->bLedScanBlinkInProgress = false;
                }
                mod_timer(&pLed->BlinkTimer,
@@ -1300,11 +1300,11 @@ static void SwLedControlMode3(struct _adapter *padapter,
        case LED_CTL_START_WPS_BOTTON:
                if (pLed->bLedWPSBlinkInProgress == false) {
                        if (pLed->bLedBlinkInProgress == true) {
-                               del_timer_sync(&pLed->BlinkTimer);
+                               del_timer(&pLed->BlinkTimer);
                                pLed->bLedBlinkInProgress = false;
                        }
                        if (pLed->bLedScanBlinkInProgress == true) {
-                               del_timer_sync(&pLed->BlinkTimer);
+                               del_timer(&pLed->BlinkTimer);
                                pLed->bLedScanBlinkInProgress = false;
                        }
                        pLed->bLedWPSBlinkInProgress = true;
@@ -1319,7 +1319,7 @@ static void SwLedControlMode3(struct _adapter *padapter,
                break;
        case LED_CTL_STOP_WPS:
                if (pLed->bLedWPSBlinkInProgress) {
-                       del_timer_sync(&(pLed->BlinkTimer));
+                       del_timer(&pLed->BlinkTimer);
                        pLed->bLedWPSBlinkInProgress = false;
                } else
                        pLed->bLedWPSBlinkInProgress = true;
@@ -1336,7 +1336,7 @@ static void SwLedControlMode3(struct _adapter *padapter,
                break;
        case LED_CTL_STOP_WPS_FAIL:
                if (pLed->bLedWPSBlinkInProgress) {
-                       del_timer_sync(&pLed->BlinkTimer);
+                       del_timer(&pLed->BlinkTimer);
                        pLed->bLedWPSBlinkInProgress = false;
                }
                pLed->CurrLedState = LED_OFF;
@@ -1357,15 +1357,15 @@ static void SwLedControlMode3(struct _adapter *padapter,
                pLed->CurrLedState = LED_OFF;
                pLed->BlinkingLedState = LED_OFF;
                if (pLed->bLedBlinkInProgress) {
-                       del_timer_sync(&pLed->BlinkTimer);
+                       del_timer(&pLed->BlinkTimer);
                        pLed->bLedBlinkInProgress = false;
                }
                if (pLed->bLedScanBlinkInProgress) {
-                       del_timer_sync(&pLed->BlinkTimer);
+                       del_timer(&pLed->BlinkTimer);
                        pLed->bLedScanBlinkInProgress = false;
                }
                if (pLed->bLedWPSBlinkInProgress) {
-                       del_timer_sync(&pLed->BlinkTimer);
+                       del_timer(&pLed->BlinkTimer);
                        pLed->bLedWPSBlinkInProgress = false;
                }
                mod_timer(&pLed->BlinkTimer,
@@ -1388,7 +1388,7 @@ static void SwLedControlMode4(struct _adapter *padapter,
        case LED_CTL_START_TO_LINK:
                if (pLed1->bLedWPSBlinkInProgress) {
                        pLed1->bLedWPSBlinkInProgress = false;
-                       del_timer_sync(&pLed1->BlinkTimer);
+                       del_timer(&pLed1->BlinkTimer);
                        pLed1->BlinkingLedState = LED_OFF;
                        pLed1->CurrLedState = LED_OFF;
                        if (pLed1->bLedOn)
@@ -1400,11 +1400,11 @@ static void SwLedControlMode4(struct _adapter *padapter,
                            IS_LED_WPS_BLINKING(pLed))
                                return;
                        if (pLed->bLedBlinkInProgress == true) {
-                               del_timer_sync(&pLed->BlinkTimer);
+                               del_timer(&pLed->BlinkTimer);
                                pLed->bLedBlinkInProgress = false;
                        }
                        if (pLed->bLedNoLinkBlinkInProgress == true) {
-                               del_timer_sync(&pLed->BlinkTimer);
+                               del_timer(&pLed->BlinkTimer);
                                pLed->bLedNoLinkBlinkInProgress = false;
                        }
                        pLed->bLedStartToLinkBlinkInProgress = true;
@@ -1426,7 +1426,7 @@ static void SwLedControlMode4(struct _adapter *padapter,
                if (LedAction == LED_CTL_LINK) {
                        if (pLed1->bLedWPSBlinkInProgress) {
                                pLed1->bLedWPSBlinkInProgress = false;
-                               del_timer_sync(&pLed1->BlinkTimer);
+                               del_timer(&pLed1->BlinkTimer);
                                pLed1->BlinkingLedState = LED_OFF;
                                pLed1->CurrLedState = LED_OFF;
                                if (pLed1->bLedOn)
@@ -1439,7 +1439,7 @@ static void SwLedControlMode4(struct _adapter *padapter,
                            IS_LED_WPS_BLINKING(pLed))
                                return;
                        if (pLed->bLedBlinkInProgress == true) {
-                               del_timer_sync(&pLed->BlinkTimer);
+                               del_timer(&pLed->BlinkTimer);
                                pLed->bLedBlinkInProgress = false;
                        }
                        pLed->bLedNoLinkBlinkInProgress = true;
@@ -1460,11 +1460,11 @@ static void SwLedControlMode4(struct _adapter *padapter,
                        if (IS_LED_WPS_BLINKING(pLed))
                                return;
                        if (pLed->bLedNoLinkBlinkInProgress == true) {
-                               del_timer_sync(&pLed->BlinkTimer);
+                               del_timer(&pLed->BlinkTimer);
                                pLed->bLedNoLinkBlinkInProgress = false;
                        }
                        if (pLed->bLedBlinkInProgress == true) {
-                               del_timer_sync(&pLed->BlinkTimer);
+                               del_timer(&pLed->BlinkTimer);
                                pLed->bLedBlinkInProgress = false;
                        }
                        pLed->bLedScanBlinkInProgress = true;
@@ -1485,7 +1485,7 @@ static void SwLedControlMode4(struct _adapter *padapter,
                            IS_LED_WPS_BLINKING(pLed))
                                return;
                        if (pLed->bLedNoLinkBlinkInProgress == true) {
-                               del_timer_sync(&pLed->BlinkTimer);
+                               del_timer(&pLed->BlinkTimer);
                                pLed->bLedNoLinkBlinkInProgress = false;
                        }
                        pLed->bLedBlinkInProgress = true;
@@ -1503,7 +1503,7 @@ static void SwLedControlMode4(struct _adapter *padapter,
        case LED_CTL_START_WPS_BOTTON:
                if (pLed1->bLedWPSBlinkInProgress) {
                        pLed1->bLedWPSBlinkInProgress = false;
-                       del_timer_sync(&(pLed1->BlinkTimer));
+                       del_timer(&pLed1->BlinkTimer);
                        pLed1->BlinkingLedState = LED_OFF;
                        pLed1->CurrLedState = LED_OFF;
                        if (pLed1->bLedOn)
@@ -1512,15 +1512,15 @@ static void SwLedControlMode4(struct _adapter *padapter,
                }
                if (pLed->bLedWPSBlinkInProgress == false) {
                        if (pLed->bLedNoLinkBlinkInProgress == true) {
-                               del_timer_sync(&pLed->BlinkTimer);
+                               del_timer(&pLed->BlinkTimer);
                                pLed->bLedNoLinkBlinkInProgress = false;
                        }
                        if (pLed->bLedBlinkInProgress == true) {
-                               del_timer_sync(&pLed->BlinkTimer);
+                               del_timer(&pLed->BlinkTimer);
                                pLed->bLedBlinkInProgress = false;
                        }
                        if (pLed->bLedScanBlinkInProgress == true) {
-                               del_timer_sync(&pLed->BlinkTimer);
+                               del_timer(&pLed->BlinkTimer);
                                pLed->bLedScanBlinkInProgress = false;
                        }
                        pLed->bLedWPSBlinkInProgress = true;
@@ -1538,7 +1538,7 @@ static void SwLedControlMode4(struct _adapter *padapter,
                break;
        case LED_CTL_STOP_WPS:  /*WPS connect success*/
                if (pLed->bLedWPSBlinkInProgress) {
-                       del_timer_sync(&pLed->BlinkTimer);
+                       del_timer(&pLed->BlinkTimer);
                        pLed->bLedWPSBlinkInProgress = false;
                }
                pLed->bLedNoLinkBlinkInProgress = true;
@@ -1552,7 +1552,7 @@ static void SwLedControlMode4(struct _adapter *padapter,
                break;
        case LED_CTL_STOP_WPS_FAIL:     /*WPS authentication fail*/
                if (pLed->bLedWPSBlinkInProgress) {
-                       del_timer_sync(&pLed->BlinkTimer);
+                       del_timer(&pLed->BlinkTimer);
                        pLed->bLedWPSBlinkInProgress = false;
                }
                pLed->bLedNoLinkBlinkInProgress = true;
@@ -1565,7 +1565,7 @@ static void SwLedControlMode4(struct _adapter *padapter,
                          msecs_to_jiffies(LED_BLINK_NO_LINK_INTERVAL_ALPHA));
                /*LED1 settings*/
                if (pLed1->bLedWPSBlinkInProgress)
-                       del_timer_sync(&pLed1->BlinkTimer);
+                       del_timer(&pLed1->BlinkTimer);
                else
                        pLed1->bLedWPSBlinkInProgress = true;
                pLed1->CurrLedState = LED_BLINK_WPS_STOP;
@@ -1578,7 +1578,7 @@ static void SwLedControlMode4(struct _adapter *padapter,
                break;
        case LED_CTL_STOP_WPS_FAIL_OVERLAP:     /*WPS session overlap*/
                if (pLed->bLedWPSBlinkInProgress) {
-                       del_timer_sync(&pLed->BlinkTimer);
+                       del_timer(&pLed->BlinkTimer);
                        pLed->bLedWPSBlinkInProgress = false;
                }
                pLed->bLedNoLinkBlinkInProgress = true;
@@ -1591,7 +1591,7 @@ static void SwLedControlMode4(struct _adapter *padapter,
                          msecs_to_jiffies(LED_BLINK_NO_LINK_INTERVAL_ALPHA));
                /*LED1 settings*/
                if (pLed1->bLedWPSBlinkInProgress)
-                       del_timer_sync(&pLed1->BlinkTimer);
+                       del_timer(&pLed1->BlinkTimer);
                else
                        pLed1->bLedWPSBlinkInProgress = true;
                pLed1->CurrLedState = LED_BLINK_WPS_STOP_OVERLAP;
@@ -1607,31 +1607,31 @@ static void SwLedControlMode4(struct _adapter *padapter,
                pLed->CurrLedState = LED_OFF;
                pLed->BlinkingLedState = LED_OFF;
                if (pLed->bLedNoLinkBlinkInProgress) {
-                       del_timer_sync(&pLed->BlinkTimer);
+                       del_timer(&pLed->BlinkTimer);
                        pLed->bLedNoLinkBlinkInProgress = false;
                }
                if (pLed->bLedLinkBlinkInProgress) {
-                       del_timer_sync(&pLed->BlinkTimer);
+                       del_timer(&pLed->BlinkTimer);
                        pLed->bLedLinkBlinkInProgress = false;
                }
                if (pLed->bLedBlinkInProgress) {
-                       del_timer_sync(&pLed->BlinkTimer);
+                       del_timer(&pLed->BlinkTimer);
                        pLed->bLedBlinkInProgress = false;
                }
                if (pLed->bLedWPSBlinkInProgress) {
-                       del_timer_sync(&pLed->BlinkTimer);
+                       del_timer(&pLed->BlinkTimer);
                        pLed->bLedWPSBlinkInProgress = false;
                }
                if (pLed->bLedScanBlinkInProgress) {
-                       del_timer_sync(&pLed->BlinkTimer);
+                       del_timer(&pLed->BlinkTimer);
                        pLed->bLedScanBlinkInProgress = false;
                }
                if (pLed->bLedStartToLinkBlinkInProgress) {
-                       del_timer_sync(&pLed->BlinkTimer);
+                       del_timer(&pLed->BlinkTimer);
                        pLed->bLedStartToLinkBlinkInProgress = false;
                }
                if (pLed1->bLedWPSBlinkInProgress) {
-                       del_timer_sync(&pLed1->BlinkTimer);
+                       del_timer(&pLed1->BlinkTimer);
                        pLed1->bLedWPSBlinkInProgress = false;
                }
                pLed1->BlinkingLedState = LED_UNKNOWN;
@@ -1671,7 +1671,7 @@ static void SwLedControlMode5(struct _adapter *padapter,
                        ; /* dummy branch */
                else if (pLed->bLedScanBlinkInProgress == false) {
                        if (pLed->bLedBlinkInProgress == true) {
-                               del_timer_sync(&pLed->BlinkTimer);
+                               del_timer(&pLed->BlinkTimer);
                                pLed->bLedBlinkInProgress = false;
                        }
                        pLed->bLedScanBlinkInProgress = true;
@@ -1705,7 +1705,7 @@ static void SwLedControlMode5(struct _adapter *padapter,
                pLed->CurrLedState = LED_OFF;
                pLed->BlinkingLedState = LED_OFF;
                if (pLed->bLedBlinkInProgress) {
-                       del_timer_sync(&pLed->BlinkTimer);
+                       del_timer(&pLed->BlinkTimer);
                        pLed->bLedBlinkInProgress = false;
                }
                SwLedOff(padapter, pLed);
@@ -1756,7 +1756,7 @@ static void SwLedControlMode6(struct _adapter *padapter,
        case LED_CTL_START_WPS_BOTTON:
                if (pLed->bLedWPSBlinkInProgress == false) {
                        if (pLed->bLedBlinkInProgress == true) {
-                               del_timer_sync(&pLed->BlinkTimer);
+                               del_timer(&pLed->BlinkTimer);
                                pLed->bLedBlinkInProgress = false;
                        }
                        pLed->bLedWPSBlinkInProgress = true;
@@ -1772,7 +1772,7 @@ static void SwLedControlMode6(struct _adapter *padapter,
        case LED_CTL_STOP_WPS_FAIL:
        case LED_CTL_STOP_WPS:
                if (pLed->bLedWPSBlinkInProgress) {
-                       del_timer_sync(&pLed->BlinkTimer);
+                       del_timer(&pLed->BlinkTimer);
                        pLed->bLedWPSBlinkInProgress = false;
                }
                pLed->CurrLedState = LED_ON;
@@ -1784,11 +1784,11 @@ static void SwLedControlMode6(struct _adapter *padapter,
                pLed->CurrLedState = LED_OFF;
                pLed->BlinkingLedState = LED_OFF;
                if (pLed->bLedBlinkInProgress) {
-                       del_timer_sync(&pLed->BlinkTimer);
+                       del_timer(&pLed->BlinkTimer);
                        pLed->bLedBlinkInProgress = false;
                }
                if (pLed->bLedWPSBlinkInProgress) {
-                       del_timer_sync(&pLed->BlinkTimer);
+                       del_timer(&pLed->BlinkTimer);
                        pLed->bLedWPSBlinkInProgress = false;
                }
                SwLedOff(padapter, pLed);
index 1a1c38f..e35854d 100644 (file)
@@ -910,7 +910,7 @@ void r8712_createbss_cmd_callback(struct _adapter *padapter,
        if (pcmd->res != H2C_SUCCESS)
                mod_timer(&pmlmepriv->assoc_timer,
                          jiffies + msecs_to_jiffies(1));
-       del_timer_sync(&pmlmepriv->assoc_timer);
+       del_timer(&pmlmepriv->assoc_timer);
 #ifdef __BIG_ENDIAN
        /* endian_convert */
        pnetwork->Length = le32_to_cpu(pnetwork->Length);
index 42fba3f..cb0b638 100644 (file)
@@ -1900,23 +1900,20 @@ static int r871x_mp_ioctl_hdl(struct net_device *dev,
        struct mp_ioctl_handler *phandler;
        struct mp_ioctl_param *poidparam;
        unsigned long BytesRead, BytesWritten, BytesNeeded;
-       u8 *pparmbuf = NULL, bset;
+       u8 *pparmbuf, bset;
        u16 len;
        uint status;
        int ret = 0;
 
-       if ((!p->length) || (!p->pointer)) {
-               ret = -EINVAL;
-               goto _r871x_mp_ioctl_hdl_exit;
-       }
+       if ((!p->length) || (!p->pointer))
+               return -EINVAL;
+
        bset = (u8)(p->flags & 0xFFFF);
        len = p->length;
-       pparmbuf = NULL;
        pparmbuf = memdup_user(p->pointer, len);
-       if (IS_ERR(pparmbuf)) {
-               ret = PTR_ERR(pparmbuf);
-               goto _r871x_mp_ioctl_hdl_exit;
-       }
+       if (IS_ERR(pparmbuf))
+               return PTR_ERR(pparmbuf);
+
        poidparam = (struct mp_ioctl_param *)pparmbuf;
        if (poidparam->subcode >= MAX_MP_IOCTL_SUBCODE) {
                ret = -EINVAL;
index fb2b195..c044b0e 100644 (file)
@@ -582,7 +582,7 @@ void r8712_surveydone_event_callback(struct _adapter *adapter, u8 *pbuf)
        spin_lock_irqsave(&pmlmepriv->lock, irqL);
 
        if (check_fwstate(pmlmepriv, _FW_UNDER_SURVEY) == true) {
-               del_timer_sync(&pmlmepriv->scan_to_timer);
+               del_timer(&pmlmepriv->scan_to_timer);
 
                _clr_fwstate_(pmlmepriv, _FW_UNDER_SURVEY);
        }
@@ -696,7 +696,7 @@ void r8712_ind_disconnect(struct _adapter *padapter)
        }
        if (padapter->pwrctrlpriv.pwr_mode !=
            padapter->registrypriv.power_mgnt) {
-               del_timer_sync(&pmlmepriv->dhcp_timer);
+               del_timer(&pmlmepriv->dhcp_timer);
                r8712_set_ps_mode(padapter, padapter->registrypriv.power_mgnt,
                                  padapter->registrypriv.smart_ps);
        }
@@ -910,7 +910,7 @@ void r8712_joinbss_event_callback(struct _adapter *adapter, u8 *pbuf)
                        if (check_fwstate(pmlmepriv, WIFI_STATION_STATE)
                                == true)
                                r8712_indicate_connect(adapter);
-                       del_timer_sync(&pmlmepriv->assoc_timer);
+                       del_timer(&pmlmepriv->assoc_timer);
                } else
                        goto ignore_joinbss_callback;
        } else {
index aaa5844..9bc04f4 100644 (file)
@@ -103,7 +103,7 @@ void r8712_cpwm_int_hdl(struct _adapter *padapter,
 
        if (pwrpriv->cpwm_tog == ((preportpwrstate->state) & 0x80))
                return;
-       del_timer_sync(&padapter->pwrctrlpriv.rpwm_check_timer);
+       del_timer(&padapter->pwrctrlpriv.rpwm_check_timer);
        _enter_pwrlock(&pwrpriv->lock);
        pwrpriv->cpwm = (preportpwrstate->state) & 0xf;
        if (pwrpriv->cpwm >= PS_STATE_S2) {
index 7bb96c4..a9b93d0 100644 (file)
@@ -198,7 +198,7 @@ void r8712_free_stainfo(struct _adapter *padapter, struct sta_info *psta)
         * cancel reordering_ctrl_timer */
        for (i = 0; i < 16; i++) {
                preorder_ctrl = &psta->recvreorder_ctrl[i];
-               del_timer_sync(&preorder_ctrl->reordering_ctrl_timer);
+               del_timer(&preorder_ctrl->reordering_ctrl_timer);
        }
        spin_lock(&(pfree_sta_queue->lock));
        /* insert into free_sta_queue; 20061114 */
index 3c7ea95..dbbb2f8 100644 (file)
@@ -1250,7 +1250,7 @@ err_enable:
        return -ENODEV;
 }
 
-static void __exit lynxfb_pci_remove(struct pci_dev *pdev)
+static void lynxfb_pci_remove(struct pci_dev *pdev)
 {
        struct fb_info *info;
        struct lynx_share *share;
index 1cdcf49..e00c060 100644 (file)
@@ -362,12 +362,16 @@ bool CARDbSetPhyParameter(struct vnt_private *pDevice, u8 bb_type)
  * Return Value: none
  */
 bool CARDbUpdateTSF(struct vnt_private *pDevice, unsigned char byRxRate,
-                   u64 qwBSSTimestamp, u64 qwLocalTSF)
+                   u64 qwBSSTimestamp)
 {
+       u64 local_tsf;
        u64 qwTSFOffset = 0;
 
-       if (qwBSSTimestamp != qwLocalTSF) {
-               qwTSFOffset = CARDqGetTSFOffset(byRxRate, qwBSSTimestamp, qwLocalTSF);
+       CARDbGetCurrentTSF(pDevice, &local_tsf);
+
+       if (qwBSSTimestamp != local_tsf) {
+               qwTSFOffset = CARDqGetTSFOffset(byRxRate, qwBSSTimestamp,
+                                               local_tsf);
                /* adjust TSF, HW's TSF add TSF Offset reg */
                VNSvOutPortD(pDevice->PortOffset + MAC_REG_TSFOFST, (u32)qwTSFOffset);
                VNSvOutPortD(pDevice->PortOffset + MAC_REG_TSFOFST + 4, (u32)(qwTSFOffset >> 32));
index 2dfc419..16cca49 100644 (file)
@@ -83,7 +83,7 @@ bool CARDbRadioPowerOff(struct vnt_private *);
 bool CARDbRadioPowerOn(struct vnt_private *);
 bool CARDbSetPhyParameter(struct vnt_private *, u8);
 bool CARDbUpdateTSF(struct vnt_private *, unsigned char byRxRate,
-                   u64 qwBSSTimestamp, u64 qwLocalTSF);
+                   u64 qwBSSTimestamp);
 bool CARDbSetBeaconPeriod(struct vnt_private *, unsigned short wBeaconInterval);
 
 #endif /* __CARD_H__ */
index 4bb4f8e..0343ae3 100644 (file)
@@ -912,7 +912,11 @@ static int vnt_int_report_rate(struct vnt_private *priv,
 
        if (!(tsr1 & TSR1_TERR)) {
                info->status.rates[0].idx = idx;
-               info->flags |= IEEE80211_TX_STAT_ACK;
+
+               if (info->flags & IEEE80211_TX_CTL_NO_ACK)
+                       info->flags |= IEEE80211_TX_STAT_NOACK_TRANSMITTED;
+               else
+                       info->flags |= IEEE80211_TX_STAT_ACK;
        }
 
        return 0;
@@ -937,9 +941,6 @@ static int device_tx_srv(struct vnt_private *pDevice, unsigned int uIdx)
                /* Only the status of first TD in the chain is correct */
                if (pTD->m_td1TD1.byTCR & TCR_STP) {
                        if ((pTD->pTDInfo->byFlags & TD_FLAGS_NETIF_SKB) != 0) {
-
-                               vnt_int_report_rate(pDevice, pTD->pTDInfo, byTsr0, byTsr1);
-
                                if (!(byTsr1 & TSR1_TERR)) {
                                        if (byTsr0 != 0) {
                                                pr_debug(" Tx[%d] OK but has error. tsr1[%02X] tsr0[%02X]\n",
@@ -958,6 +959,9 @@ static int device_tx_srv(struct vnt_private *pDevice, unsigned int uIdx)
                                                 (int)uIdx, byTsr1, byTsr0);
                                }
                        }
+
+                       vnt_int_report_rate(pDevice, pTD->pTDInfo, byTsr0, byTsr1);
+
                        device_free_tx_buf(pDevice, pTD);
                        pDevice->iTDUsed[uIdx]--;
                }
@@ -989,10 +993,8 @@ static void device_free_tx_buf(struct vnt_private *pDevice, PSTxDesc pDesc)
                                 skb->len, DMA_TO_DEVICE);
        }
 
-       if (pTDInfo->byFlags & TD_FLAGS_NETIF_SKB)
+       if (skb)
                ieee80211_tx_status_irqsafe(pDevice->hw, skb);
-       else
-               dev_kfree_skb_irq(skb);
 
        pTDInfo->skb_dma = 0;
        pTDInfo->skb = NULL;
@@ -1204,14 +1206,6 @@ static int vnt_tx_packet(struct vnt_private *priv, struct sk_buff *skb)
        if (dma_idx == TYPE_AC0DMA)
                head_td->pTDInfo->byFlags = TD_FLAGS_NETIF_SKB;
 
-       priv->iTDUsed[dma_idx]++;
-
-       /* Take ownership */
-       wmb();
-       head_td->m_td0TD0.f1Owner = OWNED_BY_NIC;
-
-       /* get Next */
-       wmb();
        priv->apCurrTD[dma_idx] = head_td->next;
 
        spin_unlock_irqrestore(&priv->lock, flags);
@@ -1232,11 +1226,18 @@ static int vnt_tx_packet(struct vnt_private *priv, struct sk_buff *skb)
 
        head_td->buff_addr = cpu_to_le32(head_td->pTDInfo->skb_dma);
 
+       /* Poll Transmit the adapter */
+       wmb();
+       head_td->m_td0TD0.f1Owner = OWNED_BY_NIC;
+       wmb(); /* second memory barrier */
+
        if (head_td->pTDInfo->byFlags & TD_FLAGS_NETIF_SKB)
                MACvTransmitAC0(priv->PortOffset);
        else
                MACvTransmit0(priv->PortOffset);
 
+       priv->iTDUsed[dma_idx]++;
+
        spin_unlock_irqrestore(&priv->lock, flags);
 
        return 0;
@@ -1416,9 +1417,16 @@ static void vnt_bss_info_changed(struct ieee80211_hw *hw,
 
        priv->current_aid = conf->aid;
 
-       if (changed & BSS_CHANGED_BSSID)
+       if (changed & BSS_CHANGED_BSSID) {
+               unsigned long flags;
+
+               spin_lock_irqsave(&priv->lock, flags);
+
                MACvWriteBSSIDAddress(priv->PortOffset, (u8 *)conf->bssid);
 
+               spin_unlock_irqrestore(&priv->lock, flags);
+       }
+
        if (changed & BSS_CHANGED_BASIC_RATES) {
                priv->basic_rates = conf->basic_rates;
 
@@ -1477,7 +1485,7 @@ static void vnt_bss_info_changed(struct ieee80211_hw *hw,
        if (changed & BSS_CHANGED_ASSOC && priv->op_mode != NL80211_IFTYPE_AP) {
                if (conf->assoc) {
                        CARDbUpdateTSF(priv, conf->beacon_rate->hw_value,
-                                      conf->sync_device_ts, conf->sync_tsf);
+                                      conf->sync_tsf);
 
                        CARDbSetBeaconPeriod(priv, conf->beacon_int);
 
index f6c2cf8..5c58996 100644 (file)
@@ -805,10 +805,18 @@ int vnt_tx_packet(struct vnt_private *priv, struct sk_buff *skb)
                vnt_schedule_command(priv, WLAN_CMD_SETPOWER);
        }
 
-       if (current_rate > RATE_11M)
-               pkt_type = priv->packet_type;
-       else
+       if (current_rate > RATE_11M) {
+               if (info->band == IEEE80211_BAND_5GHZ) {
+                       pkt_type = PK_TYPE_11A;
+               } else {
+                       if (tx_rate->flags & IEEE80211_TX_RC_USE_CTS_PROTECT)
+                               pkt_type = PK_TYPE_11GB;
+                       else
+                               pkt_type = PK_TYPE_11GA;
+               }
+       } else {
                pkt_type = PK_TYPE_11B;
+       }
 
        spin_lock_irqsave(&priv->lock, flags);
 
index 34871a6..74e6114 100644 (file)
@@ -230,7 +230,7 @@ int iscsit_access_np(struct iscsi_np *np, struct iscsi_portal_group *tpg)
         * Here we serialize access across the TIQN+TPG Tuple.
         */
        ret = down_interruptible(&tpg->np_login_sem);
-       if ((ret != 0) || signal_pending(current))
+       if (ret != 0)
                return -1;
 
        spin_lock_bh(&tpg->tpg_state_lock);
index 8ce94ff..70d799d 100644 (file)
@@ -346,6 +346,7 @@ static int iscsi_login_zero_tsih_s1(
        if (IS_ERR(sess->se_sess)) {
                iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
                                ISCSI_LOGIN_STATUS_NO_RESOURCES);
+               kfree(sess->sess_ops);
                kfree(sess);
                return -ENOMEM;
        }
index e8a2408..5e3295f 100644 (file)
@@ -161,10 +161,7 @@ struct iscsi_portal_group *iscsit_get_tpg_from_np(
 int iscsit_get_tpg(
        struct iscsi_portal_group *tpg)
 {
-       int ret;
-
-       ret = mutex_lock_interruptible(&tpg->tpg_access_lock);
-       return ((ret != 0) || signal_pending(current)) ? -1 : 0;
+       return mutex_lock_interruptible(&tpg->tpg_access_lock);
 }
 
 void iscsit_put_tpg(struct iscsi_portal_group *tpg)
index 75cbde1..4f8d4d4 100644 (file)
@@ -704,7 +704,7 @@ target_alua_state_check(struct se_cmd *cmd)
 
        if (dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)
                return 0;
-       if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV)
+       if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH)
                return 0;
 
        if (!port)
@@ -2377,7 +2377,7 @@ ssize_t core_alua_store_secondary_write_metadata(
 
 int core_setup_alua(struct se_device *dev)
 {
-       if (dev->transport->transport_type != TRANSPORT_PLUGIN_PHBA_PDEV &&
+       if (!(dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) &&
            !(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) {
                struct t10_alua_lu_gp_member *lu_gp_mem;
 
index ddaf76a..e7b0430 100644 (file)
@@ -212,10 +212,6 @@ static struct config_group *target_core_register_fabric(
 
        pr_debug("Target_Core_ConfigFS: REGISTER -> Allocated Fabric:"
                        " %s\n", tf->tf_group.cg_item.ci_name);
-       /*
-        * Setup tf_ops.tf_subsys pointer for usage with configfs_depend_item()
-        */
-       tf->tf_ops.tf_subsys = tf->tf_subsys;
        tf->tf_fabric = &tf->tf_group.cg_item;
        pr_debug("Target_Core_ConfigFS: REGISTER -> Set tf->tf_fabric"
                        " for %s\n", name);
@@ -291,10 +287,17 @@ static struct configfs_subsystem target_core_fabrics = {
        },
 };
 
-struct configfs_subsystem *target_core_subsystem[] = {
-       &target_core_fabrics,
-       NULL,
-};
+int target_depend_item(struct config_item *item)
+{
+       return configfs_depend_item(&target_core_fabrics, item);
+}
+EXPORT_SYMBOL(target_depend_item);
+
+void target_undepend_item(struct config_item *item)
+{
+       return configfs_undepend_item(&target_core_fabrics, item);
+}
+EXPORT_SYMBOL(target_undepend_item);
 
 /*##############################################################################
 // Start functions called by external Target Fabrics Modules
@@ -467,7 +470,6 @@ int target_register_template(const struct target_core_fabric_ops *fo)
         * struct target_fabric_configfs->tf_cit_tmpl
         */
        tf->tf_module = fo->module;
-       tf->tf_subsys = target_core_subsystem[0];
        snprintf(tf->tf_name, TARGET_FABRIC_NAME_SIZE, "%s", fo->name);
 
        tf->tf_ops = *fo;
@@ -809,7 +811,7 @@ static ssize_t target_core_dev_pr_show_attr_res_holder(struct se_device *dev,
 {
        int ret;
 
-       if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV)
+       if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH)
                return sprintf(page, "Passthrough\n");
 
        spin_lock(&dev->dev_reservation_lock);
@@ -960,7 +962,7 @@ SE_DEV_PR_ATTR_RO(res_pr_type);
 static ssize_t target_core_dev_pr_show_attr_res_type(
                struct se_device *dev, char *page)
 {
-       if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV)
+       if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH)
                return sprintf(page, "SPC_PASSTHROUGH\n");
        else if (dev->dev_reservation_flags & DRF_SPC2_RESERVATIONS)
                return sprintf(page, "SPC2_RESERVATIONS\n");
@@ -973,7 +975,7 @@ SE_DEV_PR_ATTR_RO(res_type);
 static ssize_t target_core_dev_pr_show_attr_res_aptpl_active(
                struct se_device *dev, char *page)
 {
-       if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV)
+       if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH)
                return 0;
 
        return sprintf(page, "APTPL Bit Status: %s\n",
@@ -988,7 +990,7 @@ SE_DEV_PR_ATTR_RO(res_aptpl_active);
 static ssize_t target_core_dev_pr_show_attr_res_aptpl_metadata(
                struct se_device *dev, char *page)
 {
-       if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV)
+       if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH)
                return 0;
 
        return sprintf(page, "Ready to process PR APTPL metadata..\n");
@@ -1035,7 +1037,7 @@ static ssize_t target_core_dev_pr_store_attr_res_aptpl_metadata(
        u16 port_rpti = 0, tpgt = 0;
        u8 type = 0, scope;
 
-       if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV)
+       if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH)
                return 0;
        if (dev->dev_reservation_flags & DRF_SPC2_RESERVATIONS)
                return 0;
@@ -2870,7 +2872,7 @@ static int __init target_core_init_configfs(void)
 {
        struct config_group *target_cg, *hba_cg = NULL, *alua_cg = NULL;
        struct config_group *lu_gp_cg = NULL;
-       struct configfs_subsystem *subsys;
+       struct configfs_subsystem *subsys = &target_core_fabrics;
        struct t10_alua_lu_gp *lu_gp;
        int ret;
 
@@ -2878,7 +2880,6 @@ static int __init target_core_init_configfs(void)
                " Engine: %s on %s/%s on "UTS_RELEASE"\n",
                TARGET_CORE_VERSION, utsname()->sysname, utsname()->machine);
 
-       subsys = target_core_subsystem[0];
        config_group_init(&subsys->su_group);
        mutex_init(&subsys->su_mutex);
 
@@ -3008,13 +3009,10 @@ out_global:
 
 static void __exit target_core_exit_configfs(void)
 {
-       struct configfs_subsystem *subsys;
        struct config_group *hba_cg, *alua_cg, *lu_gp_cg;
        struct config_item *item;
        int i;
 
-       subsys = target_core_subsystem[0];
-
        lu_gp_cg = &alua_lu_gps_group;
        for (i = 0; lu_gp_cg->default_groups[i]; i++) {
                item = &lu_gp_cg->default_groups[i]->cg_item;
@@ -3045,8 +3043,8 @@ static void __exit target_core_exit_configfs(void)
         * We expect subsys->su_group.default_groups to be released
         * by configfs subsystem provider logic..
         */
-       configfs_unregister_subsystem(subsys);
-       kfree(subsys->su_group.default_groups);
+       configfs_unregister_subsystem(&target_core_fabrics);
+       kfree(target_core_fabrics.su_group.default_groups);
 
        core_alua_free_lu_gp(default_lu_gp);
        default_lu_gp = NULL;
index 7faa6ae..ce5f768 100644 (file)
@@ -33,6 +33,7 @@
 #include <linux/kthread.h>
 #include <linux/in.h>
 #include <linux/export.h>
+#include <asm/unaligned.h>
 #include <net/sock.h>
 #include <net/tcp.h>
 #include <scsi/scsi.h>
@@ -527,7 +528,7 @@ static void core_export_port(
        list_add_tail(&port->sep_list, &dev->dev_sep_list);
        spin_unlock(&dev->se_port_lock);
 
-       if (dev->transport->transport_type != TRANSPORT_PLUGIN_PHBA_PDEV &&
+       if (!(dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) &&
            !(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) {
                tg_pt_gp_mem = core_alua_allocate_tg_pt_gp_mem(port);
                if (IS_ERR(tg_pt_gp_mem) || !tg_pt_gp_mem) {
@@ -1603,7 +1604,7 @@ int target_configure_device(struct se_device *dev)
         * anything virtual (IBLOCK, FILEIO, RAMDISK), but not for TCM/pSCSI
         * passthrough because this is being provided by the backend LLD.
         */
-       if (dev->transport->transport_type != TRANSPORT_PLUGIN_PHBA_PDEV) {
+       if (!(dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH)) {
                strncpy(&dev->t10_wwn.vendor[0], "LIO-ORG", 8);
                strncpy(&dev->t10_wwn.model[0],
                        dev->transport->inquiry_prod, 16);
@@ -1707,3 +1708,76 @@ void core_dev_release_virtual_lun0(void)
                target_free_device(g_lun0_dev);
        core_delete_hba(hba);
 }
+
+/*
+ * Common CDB parsing for kernel and user passthrough.
+ */
+sense_reason_t
+passthrough_parse_cdb(struct se_cmd *cmd,
+       sense_reason_t (*exec_cmd)(struct se_cmd *cmd))
+{
+       unsigned char *cdb = cmd->t_task_cdb;
+
+       /*
+        * Clear a lun set in the cdb if the initiator talking to use spoke
+        * and old standards version, as we can't assume the underlying device
+        * won't choke up on it.
+        */
+       switch (cdb[0]) {
+       case READ_10: /* SBC - RDProtect */
+       case READ_12: /* SBC - RDProtect */
+       case READ_16: /* SBC - RDProtect */
+       case SEND_DIAGNOSTIC: /* SPC - SELF-TEST Code */
+       case VERIFY: /* SBC - VRProtect */
+       case VERIFY_16: /* SBC - VRProtect */
+       case WRITE_VERIFY: /* SBC - VRProtect */
+       case WRITE_VERIFY_12: /* SBC - VRProtect */
+       case MAINTENANCE_IN: /* SPC - Parameter Data Format for SA RTPG */
+               break;
+       default:
+               cdb[1] &= 0x1f; /* clear logical unit number */
+               break;
+       }
+
+       /*
+        * For REPORT LUNS we always need to emulate the response, for everything
+        * else, pass it up.
+        */
+       if (cdb[0] == REPORT_LUNS) {
+               cmd->execute_cmd = spc_emulate_report_luns;
+               return TCM_NO_SENSE;
+       }
+
+       /* Set DATA_CDB flag for ops that should have it */
+       switch (cdb[0]) {
+       case READ_6:
+       case READ_10:
+       case READ_12:
+       case READ_16:
+       case WRITE_6:
+       case WRITE_10:
+       case WRITE_12:
+       case WRITE_16:
+       case WRITE_VERIFY:
+       case WRITE_VERIFY_12:
+       case 0x8e: /* WRITE_VERIFY_16 */
+       case COMPARE_AND_WRITE:
+       case XDWRITEREAD_10:
+               cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB;
+               break;
+       case VARIABLE_LENGTH_CMD:
+               switch (get_unaligned_be16(&cdb[8])) {
+               case READ_32:
+               case WRITE_32:
+               case 0x0c: /* WRITE_VERIFY_32 */
+               case XDWRITEREAD_32:
+                       cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB;
+                       break;
+               }
+       }
+
+       cmd->execute_cmd = exec_cmd;
+
+       return TCM_NO_SENSE;
+}
+EXPORT_SYMBOL(passthrough_parse_cdb);
index f7e6e51..3f27bfd 100644 (file)
@@ -958,7 +958,6 @@ static struct se_subsystem_api fileio_template = {
        .inquiry_prod           = "FILEIO",
        .inquiry_rev            = FD_VERSION,
        .owner                  = THIS_MODULE,
-       .transport_type         = TRANSPORT_PLUGIN_VHBA_PDEV,
        .attach_hba             = fd_attach_hba,
        .detach_hba             = fd_detach_hba,
        .alloc_device           = fd_alloc_device,
index 1b7947c..8c96568 100644 (file)
@@ -904,7 +904,6 @@ static struct se_subsystem_api iblock_template = {
        .inquiry_prod           = "IBLOCK",
        .inquiry_rev            = IBLOCK_VERSION,
        .owner                  = THIS_MODULE,
-       .transport_type         = TRANSPORT_PLUGIN_VHBA_PDEV,
        .attach_hba             = iblock_attach_hba,
        .detach_hba             = iblock_detach_hba,
        .alloc_device           = iblock_alloc_device,
index 874a9bc..68bd7f5 100644 (file)
@@ -4,9 +4,6 @@
 /* target_core_alua.c */
 extern struct t10_alua_lu_gp *default_lu_gp;
 
-/* target_core_configfs.c */
-extern struct configfs_subsystem *target_core_subsystem[];
-
 /* target_core_device.c */
 extern struct mutex g_device_mutex;
 extern struct list_head g_device_list;
index c1aa965..a15411c 100644 (file)
@@ -1367,41 +1367,26 @@ void core_scsi3_free_all_registrations(
 
 static int core_scsi3_tpg_depend_item(struct se_portal_group *tpg)
 {
-       return configfs_depend_item(tpg->se_tpg_tfo->tf_subsys,
-                       &tpg->tpg_group.cg_item);
+       return target_depend_item(&tpg->tpg_group.cg_item);
 }
 
 static void core_scsi3_tpg_undepend_item(struct se_portal_group *tpg)
 {
-       configfs_undepend_item(tpg->se_tpg_tfo->tf_subsys,
-                       &tpg->tpg_group.cg_item);
-
+       target_undepend_item(&tpg->tpg_group.cg_item);
        atomic_dec_mb(&tpg->tpg_pr_ref_count);
 }
 
 static int core_scsi3_nodeacl_depend_item(struct se_node_acl *nacl)
 {
-       struct se_portal_group *tpg = nacl->se_tpg;
-
        if (nacl->dynamic_node_acl)
                return 0;
-
-       return configfs_depend_item(tpg->se_tpg_tfo->tf_subsys,
-                       &nacl->acl_group.cg_item);
+       return target_depend_item(&nacl->acl_group.cg_item);
 }
 
 static void core_scsi3_nodeacl_undepend_item(struct se_node_acl *nacl)
 {
-       struct se_portal_group *tpg = nacl->se_tpg;
-
-       if (nacl->dynamic_node_acl) {
-               atomic_dec_mb(&nacl->acl_pr_ref_count);
-               return;
-       }
-
-       configfs_undepend_item(tpg->se_tpg_tfo->tf_subsys,
-                       &nacl->acl_group.cg_item);
-
+       if (!nacl->dynamic_node_acl)
+               target_undepend_item(&nacl->acl_group.cg_item);
        atomic_dec_mb(&nacl->acl_pr_ref_count);
 }
 
@@ -1419,8 +1404,7 @@ static int core_scsi3_lunacl_depend_item(struct se_dev_entry *se_deve)
        nacl = lun_acl->se_lun_nacl;
        tpg = nacl->se_tpg;
 
-       return configfs_depend_item(tpg->se_tpg_tfo->tf_subsys,
-                       &lun_acl->se_lun_group.cg_item);
+       return target_depend_item(&lun_acl->se_lun_group.cg_item);
 }
 
 static void core_scsi3_lunacl_undepend_item(struct se_dev_entry *se_deve)
@@ -1438,9 +1422,7 @@ static void core_scsi3_lunacl_undepend_item(struct se_dev_entry *se_deve)
        nacl = lun_acl->se_lun_nacl;
        tpg = nacl->se_tpg;
 
-       configfs_undepend_item(tpg->se_tpg_tfo->tf_subsys,
-                       &lun_acl->se_lun_group.cg_item);
-
+       target_undepend_item(&lun_acl->se_lun_group.cg_item);
        atomic_dec_mb(&se_deve->pr_ref_count);
 }
 
@@ -4111,7 +4093,7 @@ target_check_reservation(struct se_cmd *cmd)
                return 0;
        if (dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)
                return 0;
-       if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV)
+       if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH)
                return 0;
 
        spin_lock(&dev->dev_reservation_lock);
index f6c954c..ecc5eae 100644 (file)
@@ -521,6 +521,7 @@ static int pscsi_configure_device(struct se_device *dev)
                                        " pdv_host_id: %d\n", pdv->pdv_host_id);
                                return -EINVAL;
                        }
+                       pdv->pdv_lld_host = sh;
                }
        } else {
                if (phv->phv_mode == PHV_VIRTUAL_HOST_ID) {
@@ -603,6 +604,8 @@ static void pscsi_free_device(struct se_device *dev)
                if ((phv->phv_mode == PHV_LLD_SCSI_HOST_NO) &&
                    (phv->phv_lld_host != NULL))
                        scsi_host_put(phv->phv_lld_host);
+               else if (pdv->pdv_lld_host)
+                       scsi_host_put(pdv->pdv_lld_host);
 
                if ((sd->type == TYPE_DISK) || (sd->type == TYPE_ROM))
                        scsi_device_put(sd);
@@ -970,64 +973,13 @@ fail:
        return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 }
 
-/*
- * Clear a lun set in the cdb if the initiator talking to use spoke
- * and old standards version, as we can't assume the underlying device
- * won't choke up on it.
- */
-static inline void pscsi_clear_cdb_lun(unsigned char *cdb)
-{
-       switch (cdb[0]) {
-       case READ_10: /* SBC - RDProtect */
-       case READ_12: /* SBC - RDProtect */
-       case READ_16: /* SBC - RDProtect */
-       case SEND_DIAGNOSTIC: /* SPC - SELF-TEST Code */
-       case VERIFY: /* SBC - VRProtect */
-       case VERIFY_16: /* SBC - VRProtect */
-       case WRITE_VERIFY: /* SBC - VRProtect */
-       case WRITE_VERIFY_12: /* SBC - VRProtect */
-       case MAINTENANCE_IN: /* SPC - Parameter Data Format for SA RTPG */
-               break;
-       default:
-               cdb[1] &= 0x1f; /* clear logical unit number */
-               break;
-       }
-}
-
 static sense_reason_t
 pscsi_parse_cdb(struct se_cmd *cmd)
 {
-       unsigned char *cdb = cmd->t_task_cdb;
-
        if (cmd->se_cmd_flags & SCF_BIDI)
                return TCM_UNSUPPORTED_SCSI_OPCODE;
 
-       pscsi_clear_cdb_lun(cdb);
-
-       /*
-        * For REPORT LUNS we always need to emulate the response, for everything
-        * else the default for pSCSI is to pass the command to the underlying
-        * LLD / physical hardware.
-        */
-       switch (cdb[0]) {
-       case REPORT_LUNS:
-               cmd->execute_cmd = spc_emulate_report_luns;
-               return 0;
-       case READ_6:
-       case READ_10:
-       case READ_12:
-       case READ_16:
-       case WRITE_6:
-       case WRITE_10:
-       case WRITE_12:
-       case WRITE_16:
-       case WRITE_VERIFY:
-               cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB;
-               /* FALLTHROUGH*/
-       default:
-               cmd->execute_cmd = pscsi_execute_cmd;
-               return 0;
-       }
+       return passthrough_parse_cdb(cmd, pscsi_execute_cmd);
 }
 
 static sense_reason_t
@@ -1189,7 +1141,7 @@ static struct configfs_attribute *pscsi_backend_dev_attrs[] = {
 static struct se_subsystem_api pscsi_template = {
        .name                   = "pscsi",
        .owner                  = THIS_MODULE,
-       .transport_type         = TRANSPORT_PLUGIN_PHBA_PDEV,
+       .transport_flags        = TRANSPORT_FLAG_PASSTHROUGH,
        .attach_hba             = pscsi_attach_hba,
        .detach_hba             = pscsi_detach_hba,
        .pmode_enable_hba       = pscsi_pmode_enable_hba,
index 1bd757d..820d305 100644 (file)
@@ -45,6 +45,7 @@ struct pscsi_dev_virt {
        int     pdv_lun_id;
        struct block_device *pdv_bd;
        struct scsi_device *pdv_sd;
+       struct Scsi_Host *pdv_lld_host;
 } ____cacheline_aligned;
 
 typedef enum phv_modes {
index a263bf5..d16489b 100644 (file)
@@ -733,7 +733,6 @@ static struct se_subsystem_api rd_mcp_template = {
        .name                   = "rd_mcp",
        .inquiry_prod           = "RAMDISK-MCP",
        .inquiry_rev            = RD_MCP_VERSION,
-       .transport_type         = TRANSPORT_PLUGIN_VHBA_VDEV,
        .attach_hba             = rd_attach_hba,
        .detach_hba             = rd_detach_hba,
        .alloc_device           = rd_alloc_device,
index 8855781..733824e 100644 (file)
@@ -568,7 +568,7 @@ sbc_compare_and_write(struct se_cmd *cmd)
         * comparision using SGLs at cmd->t_bidi_data_sg..
         */
        rc = down_interruptible(&dev->caw_sem);
-       if ((rc != 0) || signal_pending(current)) {
+       if (rc != 0) {
                cmd->transport_complete_callback = NULL;
                return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
        }
index 3fe5cb2..675f2d9 100644 (file)
@@ -1196,7 +1196,7 @@ transport_check_alloc_task_attr(struct se_cmd *cmd)
         * Check if SAM Task Attribute emulation is enabled for this
         * struct se_device storage object
         */
-       if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV)
+       if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH)
                return 0;
 
        if (cmd->sam_task_attr == TCM_ACA_TAG) {
@@ -1770,7 +1770,7 @@ static int target_write_prot_action(struct se_cmd *cmd)
                                                   sectors, 0, NULL, 0);
                if (unlikely(cmd->pi_err)) {
                        spin_lock_irq(&cmd->t_state_lock);
-                       cmd->transport_state &= ~CMD_T_BUSY|CMD_T_SENT;
+                       cmd->transport_state &= ~(CMD_T_BUSY|CMD_T_SENT);
                        spin_unlock_irq(&cmd->t_state_lock);
                        transport_generic_request_failure(cmd, cmd->pi_err);
                        return -1;
@@ -1787,7 +1787,7 @@ static bool target_handle_task_attr(struct se_cmd *cmd)
 {
        struct se_device *dev = cmd->se_dev;
 
-       if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV)
+       if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH)
                return false;
 
        /*
@@ -1868,7 +1868,7 @@ void target_execute_cmd(struct se_cmd *cmd)
 
        if (target_handle_task_attr(cmd)) {
                spin_lock_irq(&cmd->t_state_lock);
-               cmd->transport_state &= ~CMD_T_BUSY|CMD_T_SENT;
+               cmd->transport_state &= ~(CMD_T_BUSY | CMD_T_SENT);
                spin_unlock_irq(&cmd->t_state_lock);
                return;
        }
@@ -1912,7 +1912,7 @@ static void transport_complete_task_attr(struct se_cmd *cmd)
 {
        struct se_device *dev = cmd->se_dev;
 
-       if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV)
+       if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH)
                return;
 
        if (cmd->sam_task_attr == TCM_SIMPLE_TAG) {
@@ -1957,8 +1957,7 @@ static void transport_complete_qf(struct se_cmd *cmd)
        case DMA_TO_DEVICE:
                if (cmd->se_cmd_flags & SCF_BIDI) {
                        ret = cmd->se_tfo->queue_data_in(cmd);
-                       if (ret < 0)
-                               break;
+                       break;
                }
                /* Fall through for DMA_TO_DEVICE */
        case DMA_NONE:
index dbc872a..07d2996 100644 (file)
@@ -71,13 +71,6 @@ struct tcmu_hba {
        u32 host_id;
 };
 
-/* User wants all cmds or just some */
-enum passthru_level {
-       TCMU_PASS_ALL = 0,
-       TCMU_PASS_IO,
-       TCMU_PASS_INVALID,
-};
-
 #define TCMU_CONFIG_LEN 256
 
 struct tcmu_dev {
@@ -89,7 +82,6 @@ struct tcmu_dev {
 #define TCMU_DEV_BIT_OPEN 0
 #define TCMU_DEV_BIT_BROKEN 1
        unsigned long flags;
-       enum passthru_level pass_level;
 
        struct uio_info uio_info;
 
@@ -683,8 +675,6 @@ static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name)
        setup_timer(&udev->timeout, tcmu_device_timedout,
                (unsigned long)udev);
 
-       udev->pass_level = TCMU_PASS_ALL;
-
        return &udev->se_dev;
 }
 
@@ -948,13 +938,13 @@ static void tcmu_free_device(struct se_device *dev)
 }
 
 enum {
-       Opt_dev_config, Opt_dev_size, Opt_err, Opt_pass_level,
+       Opt_dev_config, Opt_dev_size, Opt_hw_block_size, Opt_err,
 };
 
 static match_table_t tokens = {
        {Opt_dev_config, "dev_config=%s"},
        {Opt_dev_size, "dev_size=%u"},
-       {Opt_pass_level, "pass_level=%u"},
+       {Opt_hw_block_size, "hw_block_size=%u"},
        {Opt_err, NULL}
 };
 
@@ -965,7 +955,7 @@ static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev,
        char *orig, *ptr, *opts, *arg_p;
        substring_t args[MAX_OPT_ARGS];
        int ret = 0, token;
-       int arg;
+       unsigned long tmp_ul;
 
        opts = kstrdup(page, GFP_KERNEL);
        if (!opts)
@@ -998,15 +988,23 @@ static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev,
                        if (ret < 0)
                                pr_err("kstrtoul() failed for dev_size=\n");
                        break;
-               case Opt_pass_level:
-                       match_int(args, &arg);
-                       if (arg >= TCMU_PASS_INVALID) {
-                               pr_warn("TCMU: Invalid pass_level: %d\n", arg);
+               case Opt_hw_block_size:
+                       arg_p = match_strdup(&args[0]);
+                       if (!arg_p) {
+                               ret = -ENOMEM;
                                break;
                        }
-
-                       pr_debug("TCMU: Setting pass_level to %d\n", arg);
-                       udev->pass_level = arg;
+                       ret = kstrtoul(arg_p, 0, &tmp_ul);
+                       kfree(arg_p);
+                       if (ret < 0) {
+                               pr_err("kstrtoul() failed for hw_block_size=\n");
+                               break;
+                       }
+                       if (!tmp_ul) {
+                               pr_err("hw_block_size must be nonzero\n");
+                               break;
+                       }
+                       dev->dev_attrib.hw_block_size = tmp_ul;
                        break;
                default:
                        break;
@@ -1024,8 +1022,7 @@ static ssize_t tcmu_show_configfs_dev_params(struct se_device *dev, char *b)
 
        bl = sprintf(b + bl, "Config: %s ",
                     udev->dev_config[0] ? udev->dev_config : "NULL");
-       bl += sprintf(b + bl, "Size: %zu PassLevel: %u\n",
-                     udev->dev_size, udev->pass_level);
+       bl += sprintf(b + bl, "Size: %zu\n", udev->dev_size);
 
        return bl;
 }
@@ -1039,20 +1036,6 @@ static sector_t tcmu_get_blocks(struct se_device *dev)
 }
 
 static sense_reason_t
-tcmu_execute_rw(struct se_cmd *se_cmd, struct scatterlist *sgl, u32 sgl_nents,
-               enum dma_data_direction data_direction)
-{
-       int ret;
-
-       ret = tcmu_queue_cmd(se_cmd);
-
-       if (ret != 0)
-               return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
-       else
-               return TCM_NO_SENSE;
-}
-
-static sense_reason_t
 tcmu_pass_op(struct se_cmd *se_cmd)
 {
        int ret = tcmu_queue_cmd(se_cmd);
@@ -1063,91 +1046,29 @@ tcmu_pass_op(struct se_cmd *se_cmd)
                return TCM_NO_SENSE;
 }
 
-static struct sbc_ops tcmu_sbc_ops = {
-       .execute_rw = tcmu_execute_rw,
-       .execute_sync_cache     = tcmu_pass_op,
-       .execute_write_same     = tcmu_pass_op,
-       .execute_write_same_unmap = tcmu_pass_op,
-       .execute_unmap          = tcmu_pass_op,
-};
-
 static sense_reason_t
 tcmu_parse_cdb(struct se_cmd *cmd)
 {
-       unsigned char *cdb = cmd->t_task_cdb;
-       struct tcmu_dev *udev = TCMU_DEV(cmd->se_dev);
-       sense_reason_t ret;
-
-       switch (udev->pass_level) {
-       case TCMU_PASS_ALL:
-               /* We're just like pscsi, then */
-               /*
-                * For REPORT LUNS we always need to emulate the response, for everything
-                * else, pass it up.
-                */
-               switch (cdb[0]) {
-               case REPORT_LUNS:
-                       cmd->execute_cmd = spc_emulate_report_luns;
-                       break;
-               case READ_6:
-               case READ_10:
-               case READ_12:
-               case READ_16:
-               case WRITE_6:
-               case WRITE_10:
-               case WRITE_12:
-               case WRITE_16:
-               case WRITE_VERIFY:
-                       cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB;
-                       /* FALLTHROUGH */
-               default:
-                       cmd->execute_cmd = tcmu_pass_op;
-               }
-               ret = TCM_NO_SENSE;
-               break;
-       case TCMU_PASS_IO:
-               ret = sbc_parse_cdb(cmd, &tcmu_sbc_ops);
-               break;
-       default:
-               pr_err("Unknown tcm-user pass level %d\n", udev->pass_level);
-               ret = TCM_CHECK_CONDITION_ABORT_CMD;
-       }
-
-       return ret;
+       return passthrough_parse_cdb(cmd, tcmu_pass_op);
 }
 
-DEF_TB_DEFAULT_ATTRIBS(tcmu);
+DEF_TB_DEV_ATTRIB_RO(tcmu, hw_pi_prot_type);
+TB_DEV_ATTR_RO(tcmu, hw_pi_prot_type);
+
+DEF_TB_DEV_ATTRIB_RO(tcmu, hw_block_size);
+TB_DEV_ATTR_RO(tcmu, hw_block_size);
+
+DEF_TB_DEV_ATTRIB_RO(tcmu, hw_max_sectors);
+TB_DEV_ATTR_RO(tcmu, hw_max_sectors);
+
+DEF_TB_DEV_ATTRIB_RO(tcmu, hw_queue_depth);
+TB_DEV_ATTR_RO(tcmu, hw_queue_depth);
 
 static struct configfs_attribute *tcmu_backend_dev_attrs[] = {
-       &tcmu_dev_attrib_emulate_model_alias.attr,
-       &tcmu_dev_attrib_emulate_dpo.attr,
-       &tcmu_dev_attrib_emulate_fua_write.attr,
-       &tcmu_dev_attrib_emulate_fua_read.attr,
-       &tcmu_dev_attrib_emulate_write_cache.attr,
-       &tcmu_dev_attrib_emulate_ua_intlck_ctrl.attr,
-       &tcmu_dev_attrib_emulate_tas.attr,
-       &tcmu_dev_attrib_emulate_tpu.attr,
-       &tcmu_dev_attrib_emulate_tpws.attr,
-       &tcmu_dev_attrib_emulate_caw.attr,
-       &tcmu_dev_attrib_emulate_3pc.attr,
-       &tcmu_dev_attrib_pi_prot_type.attr,
        &tcmu_dev_attrib_hw_pi_prot_type.attr,
-       &tcmu_dev_attrib_pi_prot_format.attr,
-       &tcmu_dev_attrib_enforce_pr_isids.attr,
-       &tcmu_dev_attrib_is_nonrot.attr,
-       &tcmu_dev_attrib_emulate_rest_reord.attr,
-       &tcmu_dev_attrib_force_pr_aptpl.attr,
        &tcmu_dev_attrib_hw_block_size.attr,
-       &tcmu_dev_attrib_block_size.attr,
        &tcmu_dev_attrib_hw_max_sectors.attr,
-       &tcmu_dev_attrib_optimal_sectors.attr,
        &tcmu_dev_attrib_hw_queue_depth.attr,
-       &tcmu_dev_attrib_queue_depth.attr,
-       &tcmu_dev_attrib_max_unmap_lba_count.attr,
-       &tcmu_dev_attrib_max_unmap_block_desc_count.attr,
-       &tcmu_dev_attrib_unmap_granularity.attr,
-       &tcmu_dev_attrib_unmap_granularity_alignment.attr,
-       &tcmu_dev_attrib_max_write_same_len.attr,
        NULL,
 };
 
@@ -1156,7 +1077,7 @@ static struct se_subsystem_api tcmu_template = {
        .inquiry_prod           = "USER",
        .inquiry_rev            = TCMU_VERSION,
        .owner                  = THIS_MODULE,
-       .transport_type         = TRANSPORT_PLUGIN_VHBA_PDEV,
+       .transport_flags        = TRANSPORT_FLAG_PASSTHROUGH,
        .attach_hba             = tcmu_attach_hba,
        .detach_hba             = tcmu_detach_hba,
        .alloc_device           = tcmu_alloc_device,
index a600ff1..8fd680a 100644 (file)
@@ -58,7 +58,6 @@ static int target_xcopy_locate_se_dev_e4(struct se_cmd *se_cmd, struct xcopy_op
                                        bool src)
 {
        struct se_device *se_dev;
-       struct configfs_subsystem *subsys = target_core_subsystem[0];
        unsigned char tmp_dev_wwn[XCOPY_NAA_IEEE_REGEX_LEN], *dev_wwn;
        int rc;
 
@@ -90,8 +89,7 @@ static int target_xcopy_locate_se_dev_e4(struct se_cmd *se_cmd, struct xcopy_op
                                " se_dev\n", xop->src_dev);
                }
 
-               rc = configfs_depend_item(subsys,
-                               &se_dev->dev_group.cg_item);
+               rc = target_depend_item(&se_dev->dev_group.cg_item);
                if (rc != 0) {
                        pr_err("configfs_depend_item attempt failed:"
                                " %d for se_dev: %p\n", rc, se_dev);
@@ -99,8 +97,8 @@ static int target_xcopy_locate_se_dev_e4(struct se_cmd *se_cmd, struct xcopy_op
                        return rc;
                }
 
-               pr_debug("Called configfs_depend_item for subsys: %p se_dev: %p"
-                       " se_dev->se_dev_group: %p\n", subsys, se_dev,
+               pr_debug("Called configfs_depend_item for se_dev: %p"
+                       " se_dev->se_dev_group: %p\n", se_dev,
                        &se_dev->dev_group);
 
                mutex_unlock(&g_device_mutex);
@@ -373,7 +371,6 @@ static int xcopy_pt_get_cmd_state(struct se_cmd *se_cmd)
 
 static void xcopy_pt_undepend_remotedev(struct xcopy_op *xop)
 {
-       struct configfs_subsystem *subsys = target_core_subsystem[0];
        struct se_device *remote_dev;
 
        if (xop->op_origin == XCOL_SOURCE_RECV_OP)
@@ -381,11 +378,11 @@ static void xcopy_pt_undepend_remotedev(struct xcopy_op *xop)
        else
                remote_dev = xop->src_dev;
 
-       pr_debug("Calling configfs_undepend_item for subsys: %p"
+       pr_debug("Calling configfs_undepend_item for"
                  " remote_dev: %p remote_dev->dev_group: %p\n",
-                 subsys, remote_dev, &remote_dev->dev_group.cg_item);
+                 remote_dev, &remote_dev->dev_group.cg_item);
 
-       configfs_undepend_item(subsys, &remote_dev->dev_group.cg_item);
+       target_undepend_item(&remote_dev->dev_group.cg_item);
 }
 
 static void xcopy_pt_release_cmd(struct se_cmd *se_cmd)
index c2556cf..01255fd 100644 (file)
@@ -224,9 +224,9 @@ static const struct armada_thermal_data armada380_data = {
        .is_valid_shift = 10,
        .temp_shift = 0,
        .temp_mask = 0x3ff,
-       .coef_b = 1169498786UL,
-       .coef_m = 2000000UL,
-       .coef_div = 4289,
+       .coef_b = 2931108200UL,
+       .coef_m = 5000000UL,
+       .coef_div = 10502,
        .inverted = true,
 };
 
index 12623bc..725718e 100644 (file)
@@ -206,51 +206,57 @@ static void find_target_mwait(void)
 
 }
 
+struct pkg_cstate_info {
+       bool skip;
+       int msr_index;
+       int cstate_id;
+};
+
+#define PKG_CSTATE_INIT(id) {                          \
+               .msr_index = MSR_PKG_C##id##_RESIDENCY, \
+               .cstate_id = id                         \
+                       }
+
+static struct pkg_cstate_info pkg_cstates[] = {
+       PKG_CSTATE_INIT(2),
+       PKG_CSTATE_INIT(3),
+       PKG_CSTATE_INIT(6),
+       PKG_CSTATE_INIT(7),
+       PKG_CSTATE_INIT(8),
+       PKG_CSTATE_INIT(9),
+       PKG_CSTATE_INIT(10),
+       {NULL},
+};
+
 static bool has_pkg_state_counter(void)
 {
-       u64 tmp;
-       return !rdmsrl_safe(MSR_PKG_C2_RESIDENCY, &tmp) ||
-              !rdmsrl_safe(MSR_PKG_C3_RESIDENCY, &tmp) ||
-              !rdmsrl_safe(MSR_PKG_C6_RESIDENCY, &tmp) ||
-              !rdmsrl_safe(MSR_PKG_C7_RESIDENCY, &tmp);
+       u64 val;
+       struct pkg_cstate_info *info = pkg_cstates;
+
+       /* check if any one of the counter msrs exists */
+       while (info->msr_index) {
+               if (!rdmsrl_safe(info->msr_index, &val))
+                       return true;
+               info++;
+       }
+
+       return false;
 }
 
 static u64 pkg_state_counter(void)
 {
        u64 val;
        u64 count = 0;
-
-       static bool skip_c2;
-       static bool skip_c3;
-       static bool skip_c6;
-       static bool skip_c7;
-
-       if (!skip_c2) {
-               if (!rdmsrl_safe(MSR_PKG_C2_RESIDENCY, &val))
-                       count += val;
-               else
-                       skip_c2 = true;
-       }
-
-       if (!skip_c3) {
-               if (!rdmsrl_safe(MSR_PKG_C3_RESIDENCY, &val))
-                       count += val;
-               else
-                       skip_c3 = true;
-       }
-
-       if (!skip_c6) {
-               if (!rdmsrl_safe(MSR_PKG_C6_RESIDENCY, &val))
-                       count += val;
-               else
-                       skip_c6 = true;
-       }
-
-       if (!skip_c7) {
-               if (!rdmsrl_safe(MSR_PKG_C7_RESIDENCY, &val))
-                       count += val;
-               else
-                       skip_c7 = true;
+       struct pkg_cstate_info *info = pkg_cstates;
+
+       while (info->msr_index) {
+               if (!info->skip) {
+                       if (!rdmsrl_safe(info->msr_index, &val))
+                               count += val;
+                       else
+                               info->skip = true;
+               }
+               info++;
        }
 
        return count;
@@ -667,7 +673,7 @@ static struct thermal_cooling_device_ops powerclamp_cooling_ops = {
 };
 
 /* runs on Nehalem and later */
-static const struct x86_cpu_id intel_powerclamp_ids[] = {
+static const struct x86_cpu_id intel_powerclamp_ids[] __initconst = {
        { X86_VENDOR_INTEL, 6, 0x1a},
        { X86_VENDOR_INTEL, 6, 0x1c},
        { X86_VENDOR_INTEL, 6, 0x1e},
@@ -689,12 +695,13 @@ static const struct x86_cpu_id intel_powerclamp_ids[] = {
        { X86_VENDOR_INTEL, 6, 0x46},
        { X86_VENDOR_INTEL, 6, 0x4c},
        { X86_VENDOR_INTEL, 6, 0x4d},
+       { X86_VENDOR_INTEL, 6, 0x4f},
        { X86_VENDOR_INTEL, 6, 0x56},
        {}
 };
 MODULE_DEVICE_TABLE(x86cpu, intel_powerclamp_ids);
 
-static int powerclamp_probe(void)
+static int __init powerclamp_probe(void)
 {
        if (!x86_match_cpu(intel_powerclamp_ids)) {
                pr_err("Intel powerclamp does not run on family %d model %d\n",
@@ -760,7 +767,7 @@ file_error:
        debugfs_remove_recursive(debug_dir);
 }
 
-static int powerclamp_init(void)
+static int __init powerclamp_init(void)
 {
        int retval;
        int bitmap_size;
@@ -809,7 +816,7 @@ exit_free:
 }
 module_init(powerclamp_init);
 
-static void powerclamp_exit(void)
+static void __exit powerclamp_exit(void)
 {
        unregister_hotcpu_notifier(&powerclamp_cpu_notifier);
        end_power_clamp();
index 3aa46ac..cd8f5f9 100644 (file)
@@ -529,7 +529,7 @@ static int rockchip_thermal_probe(struct platform_device *pdev)
 
        thermal->pclk = devm_clk_get(&pdev->dev, "apb_pclk");
        if (IS_ERR(thermal->pclk)) {
-               error = PTR_ERR(thermal->clk);
+               error = PTR_ERR(thermal->pclk);
                dev_err(&pdev->dev, "failed to get apb_pclk clock: %d\n",
                        error);
                return error;
index 0531c75..8e39181 100644 (file)
@@ -103,7 +103,7 @@ static inline int of_thermal_get_ntrips(struct thermal_zone_device *tz)
 static inline bool of_thermal_is_trip_valid(struct thermal_zone_device *tz,
                                            int trip)
 {
-       return 0;
+       return false;
 }
 static inline const struct thermal_trip *
 of_thermal_get_trip_points(struct thermal_zone_device *tz)
index a492927..58b5c66 100644 (file)
@@ -420,7 +420,8 @@ const struct ti_bandgap_data dra752_data = {
                        TI_BANDGAP_FEATURE_FREEZE_BIT |
                        TI_BANDGAP_FEATURE_TALERT |
                        TI_BANDGAP_FEATURE_COUNTER_DELAY |
-                       TI_BANDGAP_FEATURE_HISTORY_BUFFER,
+                       TI_BANDGAP_FEATURE_HISTORY_BUFFER |
+                       TI_BANDGAP_FEATURE_ERRATA_814,
        .fclock_name = "l3instr_ts_gclk_div",
        .div_ck_name = "l3instr_ts_gclk_div",
        .conv_table = dra752_adc_to_temp,
index eff0c80..79ff70c 100644 (file)
@@ -319,7 +319,8 @@ const struct ti_bandgap_data omap5430_data = {
                        TI_BANDGAP_FEATURE_FREEZE_BIT |
                        TI_BANDGAP_FEATURE_TALERT |
                        TI_BANDGAP_FEATURE_COUNTER_DELAY |
-                       TI_BANDGAP_FEATURE_HISTORY_BUFFER,
+                       TI_BANDGAP_FEATURE_HISTORY_BUFFER |
+                       TI_BANDGAP_FEATURE_ERRATA_813,
        .fclock_name = "l3instr_ts_gclk_div",
        .div_ck_name = "l3instr_ts_gclk_div",
        .conv_table = omap5430_adc_to_temp,
index 62a5d44..bc14dc8 100644 (file)
@@ -119,6 +119,37 @@ exit:
 }
 
 /**
+ * ti_errata814_bandgap_read_temp() - helper function to read dra7 sensor temperature
+ * @bgp: pointer to ti_bandgap structure
+ * @reg: desired register (offset) to be read
+ *
+ * Function to read dra7 bandgap sensor temperature. This is done separately
+ * so as to workaround the errata "Bandgap Temperature read Dtemp can be
+ * corrupted" - Errata ID: i814".
+ * Read accesses to registers listed below can be corrupted due to incorrect
+ * resynchronization between clock domains.
+ * Read access to registers below can be corrupted :
+ * CTRL_CORE_DTEMP_MPU/GPU/CORE/DSPEVE/IVA_n (n = 0 to 4)
+ * CTRL_CORE_TEMP_SENSOR_MPU/GPU/CORE/DSPEVE/IVA_n
+ *
+ * Return: the register value.
+ */
+static u32 ti_errata814_bandgap_read_temp(struct ti_bandgap *bgp,  u32 reg)
+{
+       u32 val1, val2;
+
+       val1 = ti_bandgap_readl(bgp, reg);
+       val2 = ti_bandgap_readl(bgp, reg);
+
+       /* If both times we read the same value then that is right */
+       if (val1 == val2)
+               return val1;
+
+       /* if val1 and val2 are different read it third time */
+       return ti_bandgap_readl(bgp, reg);
+}
+
+/**
  * ti_bandgap_read_temp() - helper function to read sensor temperature
  * @bgp: pointer to ti_bandgap structure
  * @id: bandgap sensor id
@@ -148,7 +179,11 @@ static u32 ti_bandgap_read_temp(struct ti_bandgap *bgp, int id)
        }
 
        /* read temperature */
-       temp = ti_bandgap_readl(bgp, reg);
+       if (TI_BANDGAP_HAS(bgp, ERRATA_814))
+               temp = ti_errata814_bandgap_read_temp(bgp, reg);
+       else
+               temp = ti_bandgap_readl(bgp, reg);
+
        temp &= tsr->bgap_dtemp_mask;
 
        if (TI_BANDGAP_HAS(bgp, FREEZE_BIT))
@@ -410,7 +445,7 @@ static int ti_bandgap_update_alert_threshold(struct ti_bandgap *bgp, int id,
 {
        struct temp_sensor_data *ts_data = bgp->conf->sensors[id].ts_data;
        struct temp_sensor_registers *tsr;
-       u32 thresh_val, reg_val, t_hot, t_cold;
+       u32 thresh_val, reg_val, t_hot, t_cold, ctrl;
        int err = 0;
 
        tsr = bgp->conf->sensors[id].registers;
@@ -442,8 +477,47 @@ static int ti_bandgap_update_alert_threshold(struct ti_bandgap *bgp, int id,
                  ~(tsr->threshold_thot_mask | tsr->threshold_tcold_mask);
        reg_val |= (t_hot << __ffs(tsr->threshold_thot_mask)) |
                   (t_cold << __ffs(tsr->threshold_tcold_mask));
+
+       /**
+        * Errata i813:
+        * Spurious Thermal Alert: Talert can happen randomly while the device
+        * remains under the temperature limit defined for this event to trig.
+        * This spurious event is caused by a incorrect re-synchronization
+        * between clock domains. The comparison between configured threshold
+        * and current temperature value can happen while the value is
+        * transitioning (metastable), thus causing inappropriate event
+        * generation. No spurious event occurs as long as the threshold value
+        * stays unchanged. Spurious event can be generated while a thermal
+        * alert threshold is modified in
+        * CONTROL_BANDGAP_THRESHOLD_MPU/GPU/CORE/DSPEVE/IVA_n.
+        */
+
+       if (TI_BANDGAP_HAS(bgp, ERRATA_813)) {
+               /* Mask t_hot and t_cold events at the IP Level */
+               ctrl = ti_bandgap_readl(bgp, tsr->bgap_mask_ctrl);
+
+               if (hot)
+                       ctrl &= ~tsr->mask_hot_mask;
+               else
+                       ctrl &= ~tsr->mask_cold_mask;
+
+               ti_bandgap_writel(bgp, ctrl, tsr->bgap_mask_ctrl);
+       }
+
+       /* Write the threshold value */
        ti_bandgap_writel(bgp, reg_val, tsr->bgap_threshold);
 
+       if (TI_BANDGAP_HAS(bgp, ERRATA_813)) {
+               /* Unmask t_hot and t_cold events at the IP Level */
+               ctrl = ti_bandgap_readl(bgp, tsr->bgap_mask_ctrl);
+               if (hot)
+                       ctrl |= tsr->mask_hot_mask;
+               else
+                       ctrl |= tsr->mask_cold_mask;
+
+               ti_bandgap_writel(bgp, ctrl, tsr->bgap_mask_ctrl);
+       }
+
        if (err) {
                dev_err(bgp->dev, "failed to reprogram thot threshold\n");
                err = -EIO;
index b3adf72..0c52f7a 100644 (file)
@@ -318,6 +318,10 @@ struct ti_temp_sensor {
  * TI_BANDGAP_FEATURE_HISTORY_BUFFER - used when the bandgap device features
  *     a history buffer of temperatures.
  *
+ * TI_BANDGAP_FEATURE_ERRATA_814 - used to workaorund when the bandgap device
+ *     has Errata 814
+ * TI_BANDGAP_FEATURE_ERRATA_813 - used to workaorund when the bandgap device
+ *     has Errata 813
  * TI_BANDGAP_HAS(b, f) - macro to check if a bandgap device is capable of a
  *      specific feature (above) or not. Return non-zero, if yes.
  */
@@ -331,6 +335,8 @@ struct ti_temp_sensor {
 #define TI_BANDGAP_FEATURE_FREEZE_BIT          BIT(7)
 #define TI_BANDGAP_FEATURE_COUNTER_DELAY       BIT(8)
 #define TI_BANDGAP_FEATURE_HISTORY_BUFFER      BIT(9)
+#define TI_BANDGAP_FEATURE_ERRATA_814          BIT(10)
+#define TI_BANDGAP_FEATURE_ERRATA_813          BIT(11)
 #define TI_BANDGAP_HAS(b, f)                   \
                        ((b)->conf->features & TI_BANDGAP_FEATURE_ ## f)
 
index f1e5742..7a3d146 100644 (file)
@@ -289,7 +289,7 @@ static int xen_initial_domain_console_init(void)
                        return -ENOMEM;
        }
 
-       info->irq = bind_virq_to_irq(VIRQ_CONSOLE, 0);
+       info->irq = bind_virq_to_irq(VIRQ_CONSOLE, 0, false);
        info->vtermno = HVC_COOKIE;
 
        spin_lock(&xencons_lock);
@@ -299,11 +299,27 @@ static int xen_initial_domain_console_init(void)
        return 0;
 }
 
+static void xen_console_update_evtchn(struct xencons_info *info)
+{
+       if (xen_hvm_domain()) {
+               uint64_t v;
+               int err;
+
+               err = hvm_get_parameter(HVM_PARAM_CONSOLE_EVTCHN, &v);
+               if (!err && v)
+                       info->evtchn = v;
+       } else
+               info->evtchn = xen_start_info->console.domU.evtchn;
+}
+
 void xen_console_resume(void)
 {
        struct xencons_info *info = vtermno_to_xencons(HVC_COOKIE);
-       if (info != NULL && info->irq)
+       if (info != NULL && info->irq) {
+               if (!xen_initial_domain())
+                       xen_console_update_evtchn(info);
                rebind_evtchn_irq(info->evtchn, info->irq);
+       }
 }
 
 static void xencons_disconnect_backend(struct xencons_info *info)
index 04d9e23..358323c 100644 (file)
@@ -174,13 +174,13 @@ struct mips_ejtag_fdc_tty {
 static inline void mips_ejtag_fdc_write(struct mips_ejtag_fdc_tty *priv,
                                        unsigned int offs, unsigned int data)
 {
-       iowrite32(data, priv->reg + offs);
+       __raw_writel(data, priv->reg + offs);
 }
 
 static inline unsigned int mips_ejtag_fdc_read(struct mips_ejtag_fdc_tty *priv,
                                               unsigned int offs)
 {
-       return ioread32(priv->reg + offs);
+       return __raw_readl(priv->reg + offs);
 }
 
 /* Encoding of byte stream in FDC words */
@@ -347,9 +347,9 @@ static void mips_ejtag_fdc_console_write(struct console *c, const char *s,
                s += inc[word.bytes - 1];
 
                /* Busy wait until there's space in fifo */
-               while (ioread32(regs + REG_FDSTAT) & REG_FDSTAT_TXF)
+               while (__raw_readl(regs + REG_FDSTAT) & REG_FDSTAT_TXF)
                        ;
-               iowrite32(word.word, regs + REG_FDTX(c->index));
+               __raw_writel(word.word, regs + REG_FDTX(c->index));
        }
 out:
        local_irq_restore(flags);
@@ -1227,7 +1227,7 @@ static int kgdbfdc_read_char(void)
 
                /* Read next word from KGDB channel */
                do {
-                       stat = ioread32(regs + REG_FDSTAT);
+                       stat = __raw_readl(regs + REG_FDSTAT);
 
                        /* No data waiting? */
                        if (stat & REG_FDSTAT_RXE)
@@ -1236,7 +1236,7 @@ static int kgdbfdc_read_char(void)
                        /* Read next word */
                        channel = (stat & REG_FDSTAT_RXCHAN) >>
                                        REG_FDSTAT_RXCHAN_SHIFT;
-                       data = ioread32(regs + REG_FDRX);
+                       data = __raw_readl(regs + REG_FDRX);
                } while (channel != CONFIG_MIPS_EJTAG_FDC_KGDB_CHAN);
 
                /* Decode into rbuf */
@@ -1266,9 +1266,10 @@ static void kgdbfdc_push_one(void)
                return;
 
        /* Busy wait until there's space in fifo */
-       while (ioread32(regs + REG_FDSTAT) & REG_FDSTAT_TXF)
+       while (__raw_readl(regs + REG_FDSTAT) & REG_FDSTAT_TXF)
                ;
-       iowrite32(word.word, regs + REG_FDTX(CONFIG_MIPS_EJTAG_FDC_KGDB_CHAN));
+       __raw_writel(word.word,
+                    regs + REG_FDTX(CONFIG_MIPS_EJTAG_FDC_KGDB_CHAN));
 }
 
 /* flush the whole write buffer to the TX FIFO */
index 91abc00..2c34c32 100644 (file)
@@ -3170,7 +3170,7 @@ static int gsmtty_break_ctl(struct tty_struct *tty, int state)
        return gsmtty_modem_update(dlci, encode);
 }
 
-static void gsmtty_remove(struct tty_driver *driver, struct tty_struct *tty)
+static void gsmtty_cleanup(struct tty_struct *tty)
 {
        struct gsm_dlci *dlci = tty->driver_data;
        struct gsm_mux *gsm = dlci->gsm;
@@ -3178,7 +3178,6 @@ static void gsmtty_remove(struct tty_driver *driver, struct tty_struct *tty)
        dlci_put(dlci);
        dlci_put(gsm->dlci[0]);
        mux_put(gsm);
-       driver->ttys[tty->index] = NULL;
 }
 
 /* Virtual ttys for the demux */
@@ -3199,7 +3198,7 @@ static const struct tty_operations gsmtty_ops = {
        .tiocmget               = gsmtty_tiocmget,
        .tiocmset               = gsmtty_tiocmset,
        .break_ctl              = gsmtty_break_ctl,
-       .remove                 = gsmtty_remove,
+       .cleanup                = gsmtty_cleanup,
 };
 
 
index 644ddb8..bbc4ce6 100644 (file)
@@ -600,7 +600,7 @@ static ssize_t n_hdlc_tty_read(struct tty_struct *tty, struct file *file,
        add_wait_queue(&tty->read_wait, &wait);
 
        for (;;) {
-               if (test_bit(TTY_OTHER_CLOSED, &tty->flags)) {
+               if (test_bit(TTY_OTHER_DONE, &tty->flags)) {
                        ret = -EIO;
                        break;
                }
@@ -828,7 +828,7 @@ static unsigned int n_hdlc_tty_poll(struct tty_struct *tty, struct file *filp,
                /* set bits for operations that won't block */
                if (n_hdlc->rx_buf_list.head)
                        mask |= POLLIN | POLLRDNORM;    /* readable */
-               if (test_bit(TTY_OTHER_CLOSED, &tty->flags))
+               if (test_bit(TTY_OTHER_DONE, &tty->flags))
                        mask |= POLLHUP;
                if (tty_hung_up_p(filp))
                        mask |= POLLHUP;
index cf6e0f2..396344c 100644 (file)
@@ -162,6 +162,17 @@ static inline int tty_put_user(struct tty_struct *tty, unsigned char x,
        return put_user(x, ptr);
 }
 
+static inline int tty_copy_to_user(struct tty_struct *tty,
+                                       void __user *to,
+                                       const void *from,
+                                       unsigned long n)
+{
+       struct n_tty_data *ldata = tty->disc_data;
+
+       tty_audit_add_data(tty, to, n, ldata->icanon);
+       return copy_to_user(to, from, n);
+}
+
 /**
  *     n_tty_kick_worker - start input worker (if required)
  *     @tty: terminal
@@ -1949,6 +1960,18 @@ static inline int input_available_p(struct tty_struct *tty, int poll)
                return ldata->commit_head - ldata->read_tail >= amt;
 }
 
+static inline int check_other_done(struct tty_struct *tty)
+{
+       int done = test_bit(TTY_OTHER_DONE, &tty->flags);
+       if (done) {
+               /* paired with cmpxchg() in check_other_closed(); ensures
+                * read buffer head index is not stale
+                */
+               smp_mb__after_atomic();
+       }
+       return done;
+}
+
 /**
  *     copy_from_read_buf      -       copy read data directly
  *     @tty: terminal device
@@ -2058,8 +2081,8 @@ static int canon_copy_from_read_buf(struct tty_struct *tty,
 
        size = N_TTY_BUF_SIZE - tail;
        n = eol - tail;
-       if (n > 4096)
-               n += 4096;
+       if (n > N_TTY_BUF_SIZE)
+               n += N_TTY_BUF_SIZE;
        n += found;
        c = n;
 
@@ -2072,12 +2095,12 @@ static int canon_copy_from_read_buf(struct tty_struct *tty,
                    __func__, eol, found, n, c, size, more);
 
        if (n > size) {
-               ret = copy_to_user(*b, read_buf_addr(ldata, tail), size);
+               ret = tty_copy_to_user(tty, *b, read_buf_addr(ldata, tail), size);
                if (ret)
                        return -EFAULT;
-               ret = copy_to_user(*b + size, ldata->read_buf, n - size);
+               ret = tty_copy_to_user(tty, *b + size, ldata->read_buf, n - size);
        } else
-               ret = copy_to_user(*b, read_buf_addr(ldata, tail), n);
+               ret = tty_copy_to_user(tty, *b, read_buf_addr(ldata, tail), n);
 
        if (ret)
                return -EFAULT;
@@ -2167,7 +2190,7 @@ static ssize_t n_tty_read(struct tty_struct *tty, struct file *file,
        struct n_tty_data *ldata = tty->disc_data;
        unsigned char __user *b = buf;
        DEFINE_WAIT_FUNC(wait, woken_wake_function);
-       int c;
+       int c, done;
        int minimum, time;
        ssize_t retval = 0;
        long timeout;
@@ -2235,8 +2258,10 @@ static ssize_t n_tty_read(struct tty_struct *tty, struct file *file,
                    ((minimum - (b - buf)) >= 1))
                        ldata->minimum_to_wake = (minimum - (b - buf));
 
+               done = check_other_done(tty);
+
                if (!input_available_p(tty, 0)) {
-                       if (test_bit(TTY_OTHER_CLOSED, &tty->flags)) {
+                       if (done) {
                                retval = -EIO;
                                break;
                        }
@@ -2443,12 +2468,12 @@ static unsigned int n_tty_poll(struct tty_struct *tty, struct file *file,
 
        poll_wait(file, &tty->read_wait, wait);
        poll_wait(file, &tty->write_wait, wait);
+       if (check_other_done(tty))
+               mask |= POLLHUP;
        if (input_available_p(tty, 1))
                mask |= POLLIN | POLLRDNORM;
        if (tty->packet && tty->link->ctrl_status)
                mask |= POLLPRI | POLLIN | POLLRDNORM;
-       if (test_bit(TTY_OTHER_CLOSED, &tty->flags))
-               mask |= POLLHUP;
        if (tty_hung_up_p(file))
                mask |= POLLHUP;
        if (!(mask & (POLLHUP | POLLIN | POLLRDNORM))) {
index e72ee62..4d5e840 100644 (file)
@@ -53,9 +53,8 @@ static void pty_close(struct tty_struct *tty, struct file *filp)
        /* Review - krefs on tty_link ?? */
        if (!tty->link)
                return;
-       tty_flush_to_ldisc(tty->link);
        set_bit(TTY_OTHER_CLOSED, &tty->link->flags);
-       wake_up_interruptible(&tty->link->read_wait);
+       tty_flip_buffer_push(tty->link->port);
        wake_up_interruptible(&tty->link->write_wait);
        if (tty->driver->subtype == PTY_TYPE_MASTER) {
                set_bit(TTY_OTHER_CLOSED, &tty->flags);
@@ -243,7 +242,9 @@ static int pty_open(struct tty_struct *tty, struct file *filp)
                goto out;
 
        clear_bit(TTY_IO_ERROR, &tty->flags);
+       /* TTY_OTHER_CLOSED must be cleared before TTY_OTHER_DONE */
        clear_bit(TTY_OTHER_CLOSED, &tty->link->flags);
+       clear_bit(TTY_OTHER_DONE, &tty->link->flags);
        set_bit(TTY_THROTTLED, &tty->flags);
        return 0;
 
index 9289999..dce1a23 100644 (file)
@@ -562,12 +562,36 @@ static irqreturn_t omap_wake_irq(int irq, void *dev_id)
        return IRQ_NONE;
 }
 
+#ifdef CONFIG_SERIAL_8250_DMA
+static int omap_8250_dma_handle_irq(struct uart_port *port);
+#endif
+
+static irqreturn_t omap8250_irq(int irq, void *dev_id)
+{
+       struct uart_port *port = dev_id;
+       struct uart_8250_port *up = up_to_u8250p(port);
+       unsigned int iir;
+       int ret;
+
+#ifdef CONFIG_SERIAL_8250_DMA
+       if (up->dma) {
+               ret = omap_8250_dma_handle_irq(port);
+               return IRQ_RETVAL(ret);
+       }
+#endif
+
+       serial8250_rpm_get(up);
+       iir = serial_port_in(port, UART_IIR);
+       ret = serial8250_handle_irq(port, iir);
+       serial8250_rpm_put(up);
+
+       return IRQ_RETVAL(ret);
+}
+
 static int omap_8250_startup(struct uart_port *port)
 {
-       struct uart_8250_port *up =
-               container_of(port, struct uart_8250_port, port);
+       struct uart_8250_port *up = up_to_u8250p(port);
        struct omap8250_priv *priv = port->private_data;
-
        int ret;
 
        if (priv->wakeirq) {
@@ -580,10 +604,31 @@ static int omap_8250_startup(struct uart_port *port)
 
        pm_runtime_get_sync(port->dev);
 
-       ret = serial8250_do_startup(port);
-       if (ret)
+       up->mcr = 0;
+       serial_out(up, UART_FCR, UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT);
+
+       serial_out(up, UART_LCR, UART_LCR_WLEN8);
+
+       up->lsr_saved_flags = 0;
+       up->msr_saved_flags = 0;
+
+       if (up->dma) {
+               ret = serial8250_request_dma(up);
+               if (ret) {
+                       dev_warn_ratelimited(port->dev,
+                                            "failed to request DMA\n");
+                       up->dma = NULL;
+               }
+       }
+
+       ret = request_irq(port->irq, omap8250_irq, IRQF_SHARED,
+                         dev_name(port->dev), port);
+       if (ret < 0)
                goto err;
 
+       up->ier = UART_IER_RLSI | UART_IER_RDI;
+       serial_out(up, UART_IER, up->ier);
+
 #ifdef CONFIG_PM
        up->capabilities |= UART_CAP_RPM;
 #endif
@@ -610,8 +655,7 @@ err:
 
 static void omap_8250_shutdown(struct uart_port *port)
 {
-       struct uart_8250_port *up =
-               container_of(port, struct uart_8250_port, port);
+       struct uart_8250_port *up = up_to_u8250p(port);
        struct omap8250_priv *priv = port->private_data;
 
        flush_work(&priv->qos_work);
@@ -621,11 +665,24 @@ static void omap_8250_shutdown(struct uart_port *port)
        pm_runtime_get_sync(port->dev);
 
        serial_out(up, UART_OMAP_WER, 0);
-       serial8250_do_shutdown(port);
+
+       up->ier = 0;
+       serial_out(up, UART_IER, 0);
+
+       if (up->dma)
+               serial8250_release_dma(up);
+
+       /*
+        * Disable break condition and FIFOs
+        */
+       if (up->lcr & UART_LCR_SBC)
+               serial_out(up, UART_LCR, up->lcr & ~UART_LCR_SBC);
+       serial_out(up, UART_FCR, UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT);
 
        pm_runtime_mark_last_busy(port->dev);
        pm_runtime_put_autosuspend(port->dev);
 
+       free_irq(port->irq, port);
        if (priv->wakeirq)
                free_irq(priv->wakeirq, port);
 }
@@ -974,6 +1031,13 @@ static inline int omap_8250_rx_dma(struct uart_8250_port *p, unsigned int iir)
 }
 #endif
 
+static int omap8250_no_handle_irq(struct uart_port *port)
+{
+       /* IRQ has not been requested but handling irq? */
+       WARN_ONCE(1, "Unexpected irq handling before port startup\n");
+       return 0;
+}
+
 static int omap8250_probe(struct platform_device *pdev)
 {
        struct resource *regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -1075,6 +1139,7 @@ static int omap8250_probe(struct platform_device *pdev)
        pm_runtime_get_sync(&pdev->dev);
 
        omap_serial_fill_features_erratas(&up, priv);
+       up.port.handle_irq = omap8250_no_handle_irq;
 #ifdef CONFIG_SERIAL_8250_DMA
        if (pdev->dev.of_node) {
                /*
@@ -1088,7 +1153,6 @@ static int omap8250_probe(struct platform_device *pdev)
                ret = of_property_count_strings(pdev->dev.of_node, "dma-names");
                if (ret == 2) {
                        up.dma = &priv->omap8250_dma;
-                       up.port.handle_irq = omap_8250_dma_handle_irq;
                        priv->omap8250_dma.fn = the_no_dma_filter_fn;
                        priv->omap8250_dma.tx_dma = omap_8250_tx_dma;
                        priv->omap8250_dma.rx_dma = omap_8250_rx_dma;
index 5a4e9d5..763eb20 100644 (file)
@@ -1249,20 +1249,19 @@ __acquires(&uap->port.lock)
 
 /*
  * Transmit a character
- * There must be at least one free entry in the TX FIFO to accept the char.
  *
- * Returns true if the FIFO might have space in it afterwards;
- * returns false if the FIFO definitely became full.
+ * Returns true if the character was successfully queued to the FIFO.
+ * Returns false otherwise.
  */
 static bool pl011_tx_char(struct uart_amba_port *uap, unsigned char c)
 {
+       if (readw(uap->port.membase + UART01x_FR) & UART01x_FR_TXFF)
+               return false; /* unable to transmit character */
+
        writew(c, uap->port.membase + UART01x_DR);
        uap->port.icount.tx++;
 
-       if (likely(uap->tx_irq_seen > 1))
-               return true;
-
-       return !(readw(uap->port.membase + UART01x_FR) & UART01x_FR_TXFF);
+       return true;
 }
 
 static bool pl011_tx_chars(struct uart_amba_port *uap)
@@ -1296,7 +1295,8 @@ static bool pl011_tx_chars(struct uart_amba_port *uap)
                return false;
 
        if (uap->port.x_char) {
-               pl011_tx_char(uap, uap->port.x_char);
+               if (!pl011_tx_char(uap, uap->port.x_char))
+                       goto done;
                uap->port.x_char = 0;
                --count;
        }
@@ -1639,6 +1639,9 @@ static int pl011_startup(struct uart_port *port)
 
        writew(uap->vendor->ifls, uap->port.membase + UART011_IFLS);
 
+       /* Assume that TX IRQ doesn't work until we see one: */
+       uap->tx_irq_seen = 0;
+
        spin_lock_irq(&uap->port.lock);
 
        /* restore RTS and DTR */
@@ -1702,7 +1705,7 @@ static void pl011_shutdown(struct uart_port *port)
        spin_lock_irq(&uap->port.lock);
        uap->im = 0;
        writew(uap->im, uap->port.membase + UART011_IMSC);
-       writew(0xffff & ~UART011_TXIS, uap->port.membase + UART011_ICR);
+       writew(0xffff, uap->port.membase + UART011_ICR);
        spin_unlock_irq(&uap->port.lock);
 
        pl011_dma_shutdown(uap);
index 5fdc9f3..6dc471e 100644 (file)
@@ -187,13 +187,8 @@ static int __init param_setup_earlycon(char *buf)
                return 0;
 
        err = setup_earlycon(buf);
-       if (err == -ENOENT) {
-               pr_warn("no match for %s\n", buf);
-               err = 0;
-       } else if (err == -EALREADY) {
-               pr_warn("already registered\n");
-               err = 0;
-       }
+       if (err == -ENOENT || err == -EALREADY)
+               return 0;
        return err;
 }
 early_param("earlycon", param_setup_earlycon);
index c8cfa06..8825039 100644 (file)
@@ -911,6 +911,14 @@ static void dma_rx_callback(void *data)
 
        status = dmaengine_tx_status(chan, (dma_cookie_t)0, &state);
        count = RX_BUF_SIZE - state.residue;
+
+       if (readl(sport->port.membase + USR2) & USR2_IDLE) {
+               /* In condition [3] the SDMA counted up too early */
+               count--;
+
+               writel(USR2_IDLE, sport->port.membase + USR2);
+       }
+
        dev_dbg(sport->port.dev, "We get %d bytes.\n", count);
 
        if (count) {
index 211479a..7f49172 100644 (file)
@@ -1735,6 +1735,8 @@ static int serial_omap_probe(struct platform_device *pdev)
 err_add_port:
        pm_runtime_put(&pdev->dev);
        pm_runtime_disable(&pdev->dev);
+       pm_qos_remove_request(&up->pm_qos_request);
+       device_init_wakeup(up->dev, false);
 err_rs485:
 err_port_line:
        return ret;
index 7566164..2f78b77 100644 (file)
 
 #define TTY_BUFFER_PAGE        (((PAGE_SIZE - sizeof(struct tty_buffer)) / 2) & ~0xFF)
 
+/*
+ * If all tty flip buffers have been processed by flush_to_ldisc() or
+ * dropped by tty_buffer_flush(), check if the linked pty has been closed.
+ * If so, wake the reader/poll to process
+ */
+static inline void check_other_closed(struct tty_struct *tty)
+{
+       unsigned long flags, old;
+
+       /* transition from TTY_OTHER_CLOSED => TTY_OTHER_DONE must be atomic */
+       for (flags = ACCESS_ONCE(tty->flags);
+            test_bit(TTY_OTHER_CLOSED, &flags);
+            ) {
+               old = flags;
+               __set_bit(TTY_OTHER_DONE, &flags);
+               flags = cmpxchg(&tty->flags, old, flags);
+               if (old == flags) {
+                       wake_up_interruptible(&tty->read_wait);
+                       break;
+               }
+       }
+}
 
 /**
  *     tty_buffer_lock_exclusive       -       gain exclusive access to buffer
@@ -229,6 +251,8 @@ void tty_buffer_flush(struct tty_struct *tty, struct tty_ldisc *ld)
        if (ld && ld->ops->flush_buffer)
                ld->ops->flush_buffer(tty);
 
+       check_other_closed(tty);
+
        atomic_dec(&buf->priority);
        mutex_unlock(&buf->lock);
 }
@@ -471,8 +495,10 @@ static void flush_to_ldisc(struct work_struct *work)
                smp_rmb();
                count = head->commit - head->read;
                if (!count) {
-                       if (next == NULL)
+                       if (next == NULL) {
+                               check_other_closed(tty);
                                break;
+                       }
                        buf->head = next;
                        tty_buffer_free(port, head);
                        continue;
@@ -489,19 +515,6 @@ static void flush_to_ldisc(struct work_struct *work)
 }
 
 /**
- *     tty_flush_to_ldisc
- *     @tty: tty to push
- *
- *     Push the terminal flip buffers to the line discipline.
- *
- *     Must not be called from IRQ context.
- */
-void tty_flush_to_ldisc(struct tty_struct *tty)
-{
-       flush_work(&tty->port->buf.work);
-}
-
-/**
  *     tty_flip_buffer_push    -       terminal
  *     @port: tty port to push
  *
index dfb05ed..5b7061a 100644 (file)
@@ -88,9 +88,13 @@ static ssize_t ci_port_test_write(struct file *file, const char __user *ubuf,
        char buf[32];
        int ret;
 
-       if (copy_from_user(buf, ubuf, min_t(size_t, sizeof(buf) - 1, count)))
+       count = min_t(size_t, sizeof(buf) - 1, count);
+       if (copy_from_user(buf, ubuf, count))
                return -EFAULT;
 
+       /* sscanf requires a zero terminated string */
+       buf[count] = '\0';
+
        if (sscanf(buf, "%u", &mode) != 1)
                return -EINVAL;
 
index 41e510a..d85abfe 100644 (file)
@@ -106,6 +106,9 @@ static const struct usb_device_id usb_quirk_list[] = {
        { USB_DEVICE(0x04f3, 0x010c), .driver_info =
                        USB_QUIRK_DEVICE_QUALIFIER },
 
+       { USB_DEVICE(0x04f3, 0x0125), .driver_info =
+                       USB_QUIRK_DEVICE_QUALIFIER },
+
        { USB_DEVICE(0x04f3, 0x016f), .driver_info =
                        USB_QUIRK_DEVICE_QUALIFIER },
 
index fdab715..c0eafa6 100644 (file)
 #define DWC3_DGCMD_SET_ENDPOINT_NRDY   0x0c
 #define DWC3_DGCMD_RUN_SOC_BUS_LOOPBACK        0x10
 
-#define DWC3_DGCMD_STATUS(n)           (((n) >> 15) & 1)
+#define DWC3_DGCMD_STATUS(n)           (((n) >> 12) & 0x0F)
 #define DWC3_DGCMD_CMDACT              (1 << 10)
 #define DWC3_DGCMD_CMDIOC              (1 << 8)
 
 #define DWC3_DEPCMD_PARAM_SHIFT                16
 #define DWC3_DEPCMD_PARAM(x)           ((x) << DWC3_DEPCMD_PARAM_SHIFT)
 #define DWC3_DEPCMD_GET_RSC_IDX(x)     (((x) >> DWC3_DEPCMD_PARAM_SHIFT) & 0x7f)
-#define DWC3_DEPCMD_STATUS(x)          (((x) >> 15) & 1)
+#define DWC3_DEPCMD_STATUS(x)          (((x) >> 12) & 0x0F)
 #define DWC3_DEPCMD_HIPRI_FORCERM      (1 << 11)
 #define DWC3_DEPCMD_CMDACT             (1 << 10)
 #define DWC3_DEPCMD_CMDIOC             (1 << 8)
index edba534..6b486a3 100644 (file)
@@ -65,8 +65,8 @@
 #define USBOTGSS_IRQENABLE_SET_MISC            0x003c
 #define USBOTGSS_IRQENABLE_CLR_MISC            0x0040
 #define USBOTGSS_IRQMISC_OFFSET                        0x03fc
-#define USBOTGSS_UTMI_OTG_CTRL                 0x0080
-#define USBOTGSS_UTMI_OTG_STATUS               0x0084
+#define USBOTGSS_UTMI_OTG_STATUS               0x0080
+#define USBOTGSS_UTMI_OTG_CTRL                 0x0084
 #define USBOTGSS_UTMI_OTG_OFFSET               0x0480
 #define USBOTGSS_TXFIFO_DEPTH                  0x0508
 #define USBOTGSS_RXFIFO_DEPTH                  0x050c
 #define USBOTGSS_IRQMISC_DISCHRGVBUS_FALL              (1 << 3)
 #define USBOTGSS_IRQMISC_IDPULLUP_FALL         (1 << 0)
 
-/* UTMI_OTG_CTRL REGISTER */
-#define USBOTGSS_UTMI_OTG_CTRL_DRVVBUS         (1 << 5)
-#define USBOTGSS_UTMI_OTG_CTRL_CHRGVBUS                (1 << 4)
-#define USBOTGSS_UTMI_OTG_CTRL_DISCHRGVBUS     (1 << 3)
-#define USBOTGSS_UTMI_OTG_CTRL_IDPULLUP                (1 << 0)
-
 /* UTMI_OTG_STATUS REGISTER */
-#define USBOTGSS_UTMI_OTG_STATUS_SW_MODE       (1 << 31)
-#define USBOTGSS_UTMI_OTG_STATUS_POWERPRESENT  (1 << 9)
-#define USBOTGSS_UTMI_OTG_STATUS_TXBITSTUFFENABLE (1 << 8)
-#define USBOTGSS_UTMI_OTG_STATUS_IDDIG         (1 << 4)
-#define USBOTGSS_UTMI_OTG_STATUS_SESSEND       (1 << 3)
-#define USBOTGSS_UTMI_OTG_STATUS_SESSVALID     (1 << 2)
-#define USBOTGSS_UTMI_OTG_STATUS_VBUSVALID     (1 << 1)
+#define USBOTGSS_UTMI_OTG_STATUS_DRVVBUS       (1 << 5)
+#define USBOTGSS_UTMI_OTG_STATUS_CHRGVBUS      (1 << 4)
+#define USBOTGSS_UTMI_OTG_STATUS_DISCHRGVBUS   (1 << 3)
+#define USBOTGSS_UTMI_OTG_STATUS_IDPULLUP      (1 << 0)
+
+/* UTMI_OTG_CTRL REGISTER */
+#define USBOTGSS_UTMI_OTG_CTRL_SW_MODE         (1 << 31)
+#define USBOTGSS_UTMI_OTG_CTRL_POWERPRESENT    (1 << 9)
+#define USBOTGSS_UTMI_OTG_CTRL_TXBITSTUFFENABLE (1 << 8)
+#define USBOTGSS_UTMI_OTG_CTRL_IDDIG           (1 << 4)
+#define USBOTGSS_UTMI_OTG_CTRL_SESSEND         (1 << 3)
+#define USBOTGSS_UTMI_OTG_CTRL_SESSVALID       (1 << 2)
+#define USBOTGSS_UTMI_OTG_CTRL_VBUSVALID       (1 << 1)
 
 struct dwc3_omap {
        struct device           *dev;
@@ -119,7 +119,7 @@ struct dwc3_omap {
        int                     irq;
        void __iomem            *base;
 
-       u32                     utmi_otg_status;
+       u32                     utmi_otg_ctrl;
        u32                     utmi_otg_offset;
        u32                     irqmisc_offset;
        u32                     irq_eoi_offset;
@@ -153,15 +153,15 @@ static inline void dwc3_omap_writel(void __iomem *base, u32 offset, u32 value)
        writel(value, base + offset);
 }
 
-static u32 dwc3_omap_read_utmi_status(struct dwc3_omap *omap)
+static u32 dwc3_omap_read_utmi_ctrl(struct dwc3_omap *omap)
 {
-       return dwc3_omap_readl(omap->base, USBOTGSS_UTMI_OTG_STATUS +
+       return dwc3_omap_readl(omap->base, USBOTGSS_UTMI_OTG_CTRL +
                                                        omap->utmi_otg_offset);
 }
 
-static void dwc3_omap_write_utmi_status(struct dwc3_omap *omap, u32 value)
+static void dwc3_omap_write_utmi_ctrl(struct dwc3_omap *omap, u32 value)
 {
-       dwc3_omap_writel(omap->base, USBOTGSS_UTMI_OTG_STATUS +
+       dwc3_omap_writel(omap->base, USBOTGSS_UTMI_OTG_CTRL +
                                        omap->utmi_otg_offset, value);
 
 }
@@ -235,25 +235,25 @@ static void dwc3_omap_set_mailbox(struct dwc3_omap *omap,
                        }
                }
 
-               val = dwc3_omap_read_utmi_status(omap);
-               val &= ~(USBOTGSS_UTMI_OTG_STATUS_IDDIG
-                               | USBOTGSS_UTMI_OTG_STATUS_VBUSVALID
-                               | USBOTGSS_UTMI_OTG_STATUS_SESSEND);
-               val |= USBOTGSS_UTMI_OTG_STATUS_SESSVALID
-                               | USBOTGSS_UTMI_OTG_STATUS_POWERPRESENT;
-               dwc3_omap_write_utmi_status(omap, val);
+               val = dwc3_omap_read_utmi_ctrl(omap);
+               val &= ~(USBOTGSS_UTMI_OTG_CTRL_IDDIG
+                               | USBOTGSS_UTMI_OTG_CTRL_VBUSVALID
+                               | USBOTGSS_UTMI_OTG_CTRL_SESSEND);
+               val |= USBOTGSS_UTMI_OTG_CTRL_SESSVALID
+                               | USBOTGSS_UTMI_OTG_CTRL_POWERPRESENT;
+               dwc3_omap_write_utmi_ctrl(omap, val);
                break;
 
        case OMAP_DWC3_VBUS_VALID:
                dev_dbg(omap->dev, "VBUS Connect\n");
 
-               val = dwc3_omap_read_utmi_status(omap);
-               val &= ~USBOTGSS_UTMI_OTG_STATUS_SESSEND;
-               val |= USBOTGSS_UTMI_OTG_STATUS_IDDIG
-                               | USBOTGSS_UTMI_OTG_STATUS_VBUSVALID
-                               | USBOTGSS_UTMI_OTG_STATUS_SESSVALID
-                               | USBOTGSS_UTMI_OTG_STATUS_POWERPRESENT;
-               dwc3_omap_write_utmi_status(omap, val);
+               val = dwc3_omap_read_utmi_ctrl(omap);
+               val &= ~USBOTGSS_UTMI_OTG_CTRL_SESSEND;
+               val |= USBOTGSS_UTMI_OTG_CTRL_IDDIG
+                               | USBOTGSS_UTMI_OTG_CTRL_VBUSVALID
+                               | USBOTGSS_UTMI_OTG_CTRL_SESSVALID
+                               | USBOTGSS_UTMI_OTG_CTRL_POWERPRESENT;
+               dwc3_omap_write_utmi_ctrl(omap, val);
                break;
 
        case OMAP_DWC3_ID_FLOAT:
@@ -263,13 +263,13 @@ static void dwc3_omap_set_mailbox(struct dwc3_omap *omap,
        case OMAP_DWC3_VBUS_OFF:
                dev_dbg(omap->dev, "VBUS Disconnect\n");
 
-               val = dwc3_omap_read_utmi_status(omap);
-               val &= ~(USBOTGSS_UTMI_OTG_STATUS_SESSVALID
-                               | USBOTGSS_UTMI_OTG_STATUS_VBUSVALID
-                               | USBOTGSS_UTMI_OTG_STATUS_POWERPRESENT);
-               val |= USBOTGSS_UTMI_OTG_STATUS_SESSEND
-                               | USBOTGSS_UTMI_OTG_STATUS_IDDIG;
-               dwc3_omap_write_utmi_status(omap, val);
+               val = dwc3_omap_read_utmi_ctrl(omap);
+               val &= ~(USBOTGSS_UTMI_OTG_CTRL_SESSVALID
+                               | USBOTGSS_UTMI_OTG_CTRL_VBUSVALID
+                               | USBOTGSS_UTMI_OTG_CTRL_POWERPRESENT);
+               val |= USBOTGSS_UTMI_OTG_CTRL_SESSEND
+                               | USBOTGSS_UTMI_OTG_CTRL_IDDIG;
+               dwc3_omap_write_utmi_ctrl(omap, val);
                break;
 
        default:
@@ -422,22 +422,22 @@ static void dwc3_omap_set_utmi_mode(struct dwc3_omap *omap)
        struct device_node      *node = omap->dev->of_node;
        int                     utmi_mode = 0;
 
-       reg = dwc3_omap_read_utmi_status(omap);
+       reg = dwc3_omap_read_utmi_ctrl(omap);
 
        of_property_read_u32(node, "utmi-mode", &utmi_mode);
 
        switch (utmi_mode) {
        case DWC3_OMAP_UTMI_MODE_SW:
-               reg |= USBOTGSS_UTMI_OTG_STATUS_SW_MODE;
+               reg |= USBOTGSS_UTMI_OTG_CTRL_SW_MODE;
                break;
        case DWC3_OMAP_UTMI_MODE_HW:
-               reg &= ~USBOTGSS_UTMI_OTG_STATUS_SW_MODE;
+               reg &= ~USBOTGSS_UTMI_OTG_CTRL_SW_MODE;
                break;
        default:
                dev_dbg(omap->dev, "UNKNOWN utmi mode %d\n", utmi_mode);
        }
 
-       dwc3_omap_write_utmi_status(omap, reg);
+       dwc3_omap_write_utmi_ctrl(omap, reg);
 }
 
 static int dwc3_omap_extcon_register(struct dwc3_omap *omap)
@@ -614,7 +614,7 @@ static int dwc3_omap_suspend(struct device *dev)
 {
        struct dwc3_omap        *omap = dev_get_drvdata(dev);
 
-       omap->utmi_otg_status = dwc3_omap_read_utmi_status(omap);
+       omap->utmi_otg_ctrl = dwc3_omap_read_utmi_ctrl(omap);
        dwc3_omap_disable_irqs(omap);
 
        return 0;
@@ -624,7 +624,7 @@ static int dwc3_omap_resume(struct device *dev)
 {
        struct dwc3_omap        *omap = dev_get_drvdata(dev);
 
-       dwc3_omap_write_utmi_status(omap, omap->utmi_otg_status);
+       dwc3_omap_write_utmi_ctrl(omap, omap->utmi_otg_ctrl);
        dwc3_omap_enable_irqs(omap);
 
        pm_runtime_disable(dev);
index c42765b..0495c94 100644 (file)
@@ -1295,6 +1295,7 @@ static void purge_configs_funcs(struct gadget_info *gi)
                        }
                }
                c->next_interface_id = 0;
+               memset(c->interface, 0, sizeof(c->interface));
                c->superspeed = 0;
                c->highspeed = 0;
                c->fullspeed = 0;
index 6bdb570..3507f88 100644 (file)
@@ -315,7 +315,6 @@ static ssize_t ffs_ep0_write(struct file *file, const char __user *buf,
                                return ret;
                        }
 
-                       set_bit(FFS_FL_CALL_CLOSED_CALLBACK, &ffs->flags);
                        return len;
                }
                break;
@@ -847,7 +846,7 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data)
                                ret = ep->status;
                                if (io_data->read && ret > 0) {
                                        ret = copy_to_iter(data, ret, &io_data->data);
-                                       if (unlikely(iov_iter_count(&io_data->data)))
+                                       if (!ret)
                                                ret = -EFAULT;
                                }
                        }
@@ -1463,8 +1462,7 @@ static void ffs_data_clear(struct ffs_data *ffs)
 {
        ENTER();
 
-       if (test_and_clear_bit(FFS_FL_CALL_CLOSED_CALLBACK, &ffs->flags))
-               ffs_closed(ffs);
+       ffs_closed(ffs);
 
        BUG_ON(ffs->gadget);
 
@@ -3422,9 +3420,13 @@ static int ffs_ready(struct ffs_data *ffs)
        ffs_obj->desc_ready = true;
        ffs_obj->ffs_data = ffs;
 
-       if (ffs_obj->ffs_ready_callback)
+       if (ffs_obj->ffs_ready_callback) {
                ret = ffs_obj->ffs_ready_callback(ffs);
+               if (ret)
+                       goto done;
+       }
 
+       set_bit(FFS_FL_CALL_CLOSED_CALLBACK, &ffs->flags);
 done:
        ffs_dev_unlock();
        return ret;
@@ -3443,7 +3445,8 @@ static void ffs_closed(struct ffs_data *ffs)
 
        ffs_obj->desc_ready = false;
 
-       if (ffs_obj->ffs_closed_callback)
+       if (test_and_clear_bit(FFS_FL_CALL_CLOSED_CALLBACK, &ffs->flags) &&
+           ffs_obj->ffs_closed_callback)
                ffs_obj->ffs_closed_callback(ffs);
 
        if (!ffs_obj->opts || ffs_obj->opts->no_configfs
index 13dfc99..f7f35a3 100644 (file)
@@ -437,12 +437,20 @@ static int hidg_setup(struct usb_function *f,
                  | USB_REQ_GET_DESCRIPTOR):
                switch (value >> 8) {
                case HID_DT_HID:
+               {
+                       struct hid_descriptor hidg_desc_copy = hidg_desc;
+
                        VDBG(cdev, "USB_REQ_GET_DESCRIPTOR: HID\n");
+                       hidg_desc_copy.desc[0].bDescriptorType = HID_DT_REPORT;
+                       hidg_desc_copy.desc[0].wDescriptorLength =
+                               cpu_to_le16(hidg->report_desc_length);
+
                        length = min_t(unsigned short, length,
-                                                  hidg_desc.bLength);
-                       memcpy(req->buf, &hidg_desc, length);
+                                                  hidg_desc_copy.bLength);
+                       memcpy(req->buf, &hidg_desc_copy, length);
                        goto respond;
                        break;
+               }
                case HID_DT_REPORT:
                        VDBG(cdev, "USB_REQ_GET_DESCRIPTOR: REPORT\n");
                        length = min_t(unsigned short, length,
@@ -632,6 +640,10 @@ static int hidg_bind(struct usb_configuration *c, struct usb_function *f)
        hidg_fs_in_ep_desc.wMaxPacketSize = cpu_to_le16(hidg->report_length);
        hidg_hs_out_ep_desc.wMaxPacketSize = cpu_to_le16(hidg->report_length);
        hidg_fs_out_ep_desc.wMaxPacketSize = cpu_to_le16(hidg->report_length);
+       /*
+        * We can use hidg_desc struct here but we should not relay
+        * that its content won't change after returning from this function.
+        */
        hidg_desc.desc[0].bDescriptorType = HID_DT_REPORT;
        hidg_desc.desc[0].wDescriptorLength =
                cpu_to_le16(hidg->report_desc_length);
index 259b656..6316aa5 100644 (file)
@@ -973,7 +973,13 @@ static ssize_t f_midi_opts_id_show(struct f_midi_opts *opts, char *page)
        int result;
 
        mutex_lock(&opts->lock);
-       result = strlcpy(page, opts->id, PAGE_SIZE);
+       if (opts->id) {
+               result = strlcpy(page, opts->id, PAGE_SIZE);
+       } else {
+               page[0] = 0;
+               result = 0;
+       }
+
        mutex_unlock(&opts->lock);
 
        return result;
index 9719abf..7856b33 100644 (file)
@@ -588,7 +588,10 @@ static int f_audio_set_alt(struct usb_function *f, unsigned intf, unsigned alt)
 
        if (intf == 1) {
                if (alt == 1) {
-                       config_ep_by_speed(cdev->gadget, f, out_ep);
+                       err = config_ep_by_speed(cdev->gadget, f, out_ep);
+                       if (err)
+                               return err;
+
                        usb_ep_enable(out_ep);
                        out_ep->driver_data = audio;
                        audio->copy_buf = f_audio_buffer_alloc(audio_buf_size);
index 89179ab..7ee0579 100644 (file)
@@ -113,6 +113,7 @@ struct gs_port {
        int write_allocated;
        struct gs_buf           port_write_buf;
        wait_queue_head_t       drain_wait;     /* wait while writes drain */
+       bool                    write_busy;
 
        /* REVISIT this state ... */
        struct usb_cdc_line_coding port_line_coding;    /* 8-N-1 etc */
@@ -363,7 +364,7 @@ __acquires(&port->port_lock)
        int                     status = 0;
        bool                    do_tty_wake = false;
 
-       while (!list_empty(pool)) {
+       while (!port->write_busy && !list_empty(pool)) {
                struct usb_request      *req;
                int                     len;
 
@@ -393,9 +394,11 @@ __acquires(&port->port_lock)
                 * NOTE that we may keep sending data for a while after
                 * the TTY closed (dev->ioport->port_tty is NULL).
                 */
+               port->write_busy = true;
                spin_unlock(&port->port_lock);
                status = usb_ep_queue(in, req, GFP_ATOMIC);
                spin_lock(&port->port_lock);
+               port->write_busy = false;
 
                if (status) {
                        pr_debug("%s: %s %s err %d\n",
index c30b7b5..1194b09 100644 (file)
@@ -121,7 +121,7 @@ static struct usb_function *f_msg;
 /*
  * We _always_ have both ACM and mass storage functions.
  */
-static int __init acm_ms_do_config(struct usb_configuration *c)
+static int acm_ms_do_config(struct usb_configuration *c)
 {
        struct fsg_opts *opts;
        int     status;
@@ -174,7 +174,7 @@ static struct usb_configuration acm_ms_config_driver = {
 
 /*-------------------------------------------------------------------------*/
 
-static int __init acm_ms_bind(struct usb_composite_dev *cdev)
+static int acm_ms_bind(struct usb_composite_dev *cdev)
 {
        struct usb_gadget       *gadget = cdev->gadget;
        struct fsg_opts         *opts;
@@ -249,7 +249,7 @@ fail_get_msg:
        return status;
 }
 
-static int __exit acm_ms_unbind(struct usb_composite_dev *cdev)
+static int acm_ms_unbind(struct usb_composite_dev *cdev)
 {
        usb_put_function(f_msg);
        usb_put_function_instance(fi_msg);
@@ -258,13 +258,13 @@ static int __exit acm_ms_unbind(struct usb_composite_dev *cdev)
        return 0;
 }
 
-static __refdata struct usb_composite_driver acm_ms_driver = {
+static struct usb_composite_driver acm_ms_driver = {
        .name           = "g_acm_ms",
        .dev            = &device_desc,
        .max_speed      = USB_SPEED_SUPER,
        .strings        = dev_strings,
        .bind           = acm_ms_bind,
-       .unbind         = __exit_p(acm_ms_unbind),
+       .unbind         = acm_ms_unbind,
 };
 
 module_usb_composite_driver(acm_ms_driver);
index f46a395..f289caf 100644 (file)
@@ -167,7 +167,7 @@ static const struct usb_descriptor_header *otg_desc[] = {
 
 /*-------------------------------------------------------------------------*/
 
-static int __init audio_do_config(struct usb_configuration *c)
+static int audio_do_config(struct usb_configuration *c)
 {
        int status;
 
@@ -216,7 +216,7 @@ static struct usb_configuration audio_config_driver = {
 
 /*-------------------------------------------------------------------------*/
 
-static int __init audio_bind(struct usb_composite_dev *cdev)
+static int audio_bind(struct usb_composite_dev *cdev)
 {
 #ifndef CONFIG_GADGET_UAC1
        struct f_uac2_opts      *uac2_opts;
@@ -276,7 +276,7 @@ fail:
        return status;
 }
 
-static int __exit audio_unbind(struct usb_composite_dev *cdev)
+static int audio_unbind(struct usb_composite_dev *cdev)
 {
 #ifdef CONFIG_GADGET_UAC1
        if (!IS_ERR_OR_NULL(f_uac1))
@@ -292,13 +292,13 @@ static int __exit audio_unbind(struct usb_composite_dev *cdev)
        return 0;
 }
 
-static __refdata struct usb_composite_driver audio_driver = {
+static struct usb_composite_driver audio_driver = {
        .name           = "g_audio",
        .dev            = &device_desc,
        .strings        = audio_strings,
        .max_speed      = USB_SPEED_HIGH,
        .bind           = audio_bind,
-       .unbind         = __exit_p(audio_unbind),
+       .unbind         = audio_unbind,
 };
 
 module_usb_composite_driver(audio_driver);
index 2e85d94..afd3e37 100644 (file)
@@ -104,7 +104,7 @@ static struct usb_function_instance *fi_ecm;
 /*
  * We _always_ have both CDC ECM and CDC ACM functions.
  */
-static int __init cdc_do_config(struct usb_configuration *c)
+static int cdc_do_config(struct usb_configuration *c)
 {
        int     status;
 
@@ -153,7 +153,7 @@ static struct usb_configuration cdc_config_driver = {
 
 /*-------------------------------------------------------------------------*/
 
-static int __init cdc_bind(struct usb_composite_dev *cdev)
+static int cdc_bind(struct usb_composite_dev *cdev)
 {
        struct usb_gadget       *gadget = cdev->gadget;
        struct f_ecm_opts       *ecm_opts;
@@ -211,7 +211,7 @@ fail:
        return status;
 }
 
-static int __exit cdc_unbind(struct usb_composite_dev *cdev)
+static int cdc_unbind(struct usb_composite_dev *cdev)
 {
        usb_put_function(f_acm);
        usb_put_function_instance(fi_serial);
@@ -222,13 +222,13 @@ static int __exit cdc_unbind(struct usb_composite_dev *cdev)
        return 0;
 }
 
-static __refdata struct usb_composite_driver cdc_driver = {
+static struct usb_composite_driver cdc_driver = {
        .name           = "g_cdc",
        .dev            = &device_desc,
        .strings        = dev_strings,
        .max_speed      = USB_SPEED_HIGH,
        .bind           = cdc_bind,
-       .unbind         = __exit_p(cdc_unbind),
+       .unbind         = cdc_unbind,
 };
 
 module_usb_composite_driver(cdc_driver);
index 633683a..204b10b 100644 (file)
@@ -284,7 +284,7 @@ fail_1:
        return -ENODEV;
 }
 
-static int __init dbgp_bind(struct usb_gadget *gadget,
+static int dbgp_bind(struct usb_gadget *gadget,
                struct usb_gadget_driver *driver)
 {
        int err, stp;
@@ -406,7 +406,7 @@ fail:
        return err;
 }
 
-static __refdata struct usb_gadget_driver dbgp_driver = {
+static struct usb_gadget_driver dbgp_driver = {
        .function = "dbgp",
        .max_speed = USB_SPEED_HIGH,
        .bind = dbgp_bind,
index c5fdc61..a3323dc 100644 (file)
@@ -222,7 +222,7 @@ static struct usb_function *f_rndis;
  * the first one present.  That's to make Microsoft's drivers happy,
  * and to follow DOCSIS 1.0 (cable modem standard).
  */
-static int __init rndis_do_config(struct usb_configuration *c)
+static int rndis_do_config(struct usb_configuration *c)
 {
        int status;
 
@@ -264,7 +264,7 @@ MODULE_PARM_DESC(use_eem, "use CDC EEM mode");
 /*
  * We _always_ have an ECM, CDC Subset, or EEM configuration.
  */
-static int __init eth_do_config(struct usb_configuration *c)
+static int eth_do_config(struct usb_configuration *c)
 {
        int status = 0;
 
@@ -318,7 +318,7 @@ static struct usb_configuration eth_config_driver = {
 
 /*-------------------------------------------------------------------------*/
 
-static int __init eth_bind(struct usb_composite_dev *cdev)
+static int eth_bind(struct usb_composite_dev *cdev)
 {
        struct usb_gadget       *gadget = cdev->gadget;
        struct f_eem_opts       *eem_opts = NULL;
@@ -447,7 +447,7 @@ fail:
        return status;
 }
 
-static int __exit eth_unbind(struct usb_composite_dev *cdev)
+static int eth_unbind(struct usb_composite_dev *cdev)
 {
        if (has_rndis()) {
                usb_put_function(f_rndis);
@@ -466,13 +466,13 @@ static int __exit eth_unbind(struct usb_composite_dev *cdev)
        return 0;
 }
 
-static __refdata struct usb_composite_driver eth_driver = {
+static struct usb_composite_driver eth_driver = {
        .name           = "g_ether",
        .dev            = &device_desc,
        .strings        = dev_strings,
        .max_speed      = USB_SPEED_SUPER,
        .bind           = eth_bind,
-       .unbind         = __exit_p(eth_unbind),
+       .unbind         = eth_unbind,
 };
 
 module_usb_composite_driver(eth_driver);
index b01b88e..e821931 100644 (file)
@@ -163,7 +163,7 @@ static int gfs_unbind(struct usb_composite_dev *cdev);
 static int gfs_do_config(struct usb_configuration *c);
 
 
-static __refdata struct usb_composite_driver gfs_driver = {
+static struct usb_composite_driver gfs_driver = {
        .name           = DRIVER_NAME,
        .dev            = &gfs_dev_desc,
        .strings        = gfs_dev_strings,
@@ -304,8 +304,10 @@ static int functionfs_ready_callback(struct ffs_data *ffs)
        gfs_registered = true;
 
        ret = usb_composite_probe(&gfs_driver);
-       if (unlikely(ret < 0))
+       if (unlikely(ret < 0)) {
+               ++missing_funcs;
                gfs_registered = false;
+       }
        
        return ret;
 }
index e02a095..da19c48 100644 (file)
@@ -118,7 +118,7 @@ static struct usb_gadget_strings *dev_strings[] = {
 static struct usb_function_instance *fi_midi;
 static struct usb_function *f_midi;
 
-static int __exit midi_unbind(struct usb_composite_dev *dev)
+static int midi_unbind(struct usb_composite_dev *dev)
 {
        usb_put_function(f_midi);
        usb_put_function_instance(fi_midi);
@@ -133,7 +133,7 @@ static struct usb_configuration midi_config = {
        .MaxPower       = CONFIG_USB_GADGET_VBUS_DRAW,
 };
 
-static int __init midi_bind_config(struct usb_configuration *c)
+static int midi_bind_config(struct usb_configuration *c)
 {
        int status;
 
@@ -150,7 +150,7 @@ static int __init midi_bind_config(struct usb_configuration *c)
        return 0;
 }
 
-static int __init midi_bind(struct usb_composite_dev *cdev)
+static int midi_bind(struct usb_composite_dev *cdev)
 {
        struct f_midi_opts *midi_opts;
        int status;
@@ -185,13 +185,13 @@ put:
        return status;
 }
 
-static __refdata struct usb_composite_driver midi_driver = {
+static struct usb_composite_driver midi_driver = {
        .name           = (char *) longname,
        .dev            = &device_desc,
        .strings        = dev_strings,
        .max_speed      = USB_SPEED_HIGH,
        .bind           = midi_bind,
-       .unbind         = __exit_p(midi_unbind),
+       .unbind         = midi_unbind,
 };
 
 module_usb_composite_driver(midi_driver);
index 614b06d..2baa572 100644 (file)
@@ -106,7 +106,7 @@ static struct usb_gadget_strings *dev_strings[] = {
 
 /****************************** Configurations ******************************/
 
-static int __init do_config(struct usb_configuration *c)
+static int do_config(struct usb_configuration *c)
 {
        struct hidg_func_node *e, *n;
        int status = 0;
@@ -147,7 +147,7 @@ static struct usb_configuration config_driver = {
 
 /****************************** Gadget Bind ******************************/
 
-static int __init hid_bind(struct usb_composite_dev *cdev)
+static int hid_bind(struct usb_composite_dev *cdev)
 {
        struct usb_gadget *gadget = cdev->gadget;
        struct list_head *tmp;
@@ -205,7 +205,7 @@ put:
        return status;
 }
 
-static int __exit hid_unbind(struct usb_composite_dev *cdev)
+static int hid_unbind(struct usb_composite_dev *cdev)
 {
        struct hidg_func_node *n;
 
@@ -216,7 +216,7 @@ static int __exit hid_unbind(struct usb_composite_dev *cdev)
        return 0;
 }
 
-static int __init hidg_plat_driver_probe(struct platform_device *pdev)
+static int hidg_plat_driver_probe(struct platform_device *pdev)
 {
        struct hidg_func_descriptor *func = dev_get_platdata(&pdev->dev);
        struct hidg_func_node *entry;
@@ -252,13 +252,13 @@ static int hidg_plat_driver_remove(struct platform_device *pdev)
 /****************************** Some noise ******************************/
 
 
-static __refdata struct usb_composite_driver hidg_driver = {
+static struct usb_composite_driver hidg_driver = {
        .name           = "g_hid",
        .dev            = &device_desc,
        .strings        = dev_strings,
        .max_speed      = USB_SPEED_HIGH,
        .bind           = hid_bind,
-       .unbind         = __exit_p(hid_unbind),
+       .unbind         = hid_unbind,
 };
 
 static struct platform_driver hidg_plat_driver = {
index 8e27a8c..e7bfb08 100644 (file)
@@ -130,7 +130,7 @@ static int msg_thread_exits(struct fsg_common *common)
        return 0;
 }
 
-static int __init msg_do_config(struct usb_configuration *c)
+static int msg_do_config(struct usb_configuration *c)
 {
        struct fsg_opts *opts;
        int ret;
@@ -170,7 +170,7 @@ static struct usb_configuration msg_config_driver = {
 
 /****************************** Gadget Bind ******************************/
 
-static int __init msg_bind(struct usb_composite_dev *cdev)
+static int msg_bind(struct usb_composite_dev *cdev)
 {
        static const struct fsg_operations ops = {
                .thread_exits = msg_thread_exits,
@@ -248,7 +248,7 @@ static int msg_unbind(struct usb_composite_dev *cdev)
 
 /****************************** Some noise ******************************/
 
-static __refdata struct usb_composite_driver msg_driver = {
+static struct usb_composite_driver msg_driver = {
        .name           = "g_mass_storage",
        .dev            = &msg_device_desc,
        .max_speed      = USB_SPEED_SUPER,
index 39d27bb..b21b51f 100644 (file)
@@ -149,7 +149,7 @@ static struct usb_function *f_acm_rndis;
 static struct usb_function *f_rndis;
 static struct usb_function *f_msg_rndis;
 
-static __init int rndis_do_config(struct usb_configuration *c)
+static int rndis_do_config(struct usb_configuration *c)
 {
        struct fsg_opts *fsg_opts;
        int ret;
@@ -237,7 +237,7 @@ static struct usb_function *f_acm_multi;
 static struct usb_function *f_ecm;
 static struct usb_function *f_msg_multi;
 
-static __init int cdc_do_config(struct usb_configuration *c)
+static int cdc_do_config(struct usb_configuration *c)
 {
        struct fsg_opts *fsg_opts;
        int ret;
@@ -466,7 +466,7 @@ fail:
        return status;
 }
 
-static int __exit multi_unbind(struct usb_composite_dev *cdev)
+static int multi_unbind(struct usb_composite_dev *cdev)
 {
 #ifdef CONFIG_USB_G_MULTI_CDC
        usb_put_function(f_msg_multi);
@@ -497,13 +497,13 @@ static int __exit multi_unbind(struct usb_composite_dev *cdev)
 /****************************** Some noise ******************************/
 
 
-static __refdata struct usb_composite_driver multi_driver = {
+static struct usb_composite_driver multi_driver = {
        .name           = "g_multi",
        .dev            = &device_desc,
        .strings        = dev_strings,
        .max_speed      = USB_SPEED_HIGH,
        .bind           = multi_bind,
-       .unbind         = __exit_p(multi_unbind),
+       .unbind         = multi_unbind,
        .needs_serial   = 1,
 };
 
index e90e23d..6ce7421 100644 (file)
@@ -107,7 +107,7 @@ static struct usb_function *f_ncm;
 
 /*-------------------------------------------------------------------------*/
 
-static int __init ncm_do_config(struct usb_configuration *c)
+static int ncm_do_config(struct usb_configuration *c)
 {
        int status;
 
@@ -143,7 +143,7 @@ static struct usb_configuration ncm_config_driver = {
 
 /*-------------------------------------------------------------------------*/
 
-static int __init gncm_bind(struct usb_composite_dev *cdev)
+static int gncm_bind(struct usb_composite_dev *cdev)
 {
        struct usb_gadget       *gadget = cdev->gadget;
        struct f_ncm_opts       *ncm_opts;
@@ -186,7 +186,7 @@ fail:
        return status;
 }
 
-static int __exit gncm_unbind(struct usb_composite_dev *cdev)
+static int gncm_unbind(struct usb_composite_dev *cdev)
 {
        if (!IS_ERR_OR_NULL(f_ncm))
                usb_put_function(f_ncm);
@@ -195,13 +195,13 @@ static int __exit gncm_unbind(struct usb_composite_dev *cdev)
        return 0;
 }
 
-static __refdata struct usb_composite_driver ncm_driver = {
+static struct usb_composite_driver ncm_driver = {
        .name           = "g_ncm",
        .dev            = &device_desc,
        .strings        = dev_strings,
        .max_speed      = USB_SPEED_HIGH,
        .bind           = gncm_bind,
-       .unbind         = __exit_p(gncm_unbind),
+       .unbind         = gncm_unbind,
 };
 
 module_usb_composite_driver(ncm_driver);
index 9b8fd70..4bb498a 100644 (file)
@@ -118,7 +118,7 @@ static struct usb_function_instance *fi_obex1;
 static struct usb_function_instance *fi_obex2;
 static struct usb_function_instance *fi_phonet;
 
-static int __init nokia_bind_config(struct usb_configuration *c)
+static int nokia_bind_config(struct usb_configuration *c)
 {
        struct usb_function *f_acm;
        struct usb_function *f_phonet = NULL;
@@ -224,7 +224,7 @@ err_get_acm:
        return status;
 }
 
-static int __init nokia_bind(struct usb_composite_dev *cdev)
+static int nokia_bind(struct usb_composite_dev *cdev)
 {
        struct usb_gadget       *gadget = cdev->gadget;
        int                     status;
@@ -307,7 +307,7 @@ err_usb:
        return status;
 }
 
-static int __exit nokia_unbind(struct usb_composite_dev *cdev)
+static int nokia_unbind(struct usb_composite_dev *cdev)
 {
        if (!IS_ERR_OR_NULL(f_obex1_cfg2))
                usb_put_function(f_obex1_cfg2);
@@ -338,13 +338,13 @@ static int __exit nokia_unbind(struct usb_composite_dev *cdev)
        return 0;
 }
 
-static __refdata struct usb_composite_driver nokia_driver = {
+static struct usb_composite_driver nokia_driver = {
        .name           = "g_nokia",
        .dev            = &device_desc,
        .strings        = dev_strings,
        .max_speed      = USB_SPEED_HIGH,
        .bind           = nokia_bind,
-       .unbind         = __exit_p(nokia_unbind),
+       .unbind         = nokia_unbind,
 };
 
 module_usb_composite_driver(nokia_driver);
index d5b6ee7..1ce7df1 100644 (file)
@@ -126,7 +126,7 @@ static struct usb_configuration printer_cfg_driver = {
        .bmAttributes           = USB_CONFIG_ATT_ONE | USB_CONFIG_ATT_SELFPOWER,
 };
 
-static int __init printer_do_config(struct usb_configuration *c)
+static int printer_do_config(struct usb_configuration *c)
 {
        struct usb_gadget       *gadget = c->cdev->gadget;
        int                     status = 0;
@@ -152,7 +152,7 @@ static int __init printer_do_config(struct usb_configuration *c)
        return status;
 }
 
-static int __init printer_bind(struct usb_composite_dev *cdev)
+static int printer_bind(struct usb_composite_dev *cdev)
 {
        struct f_printer_opts *opts;
        int ret, len;
@@ -191,7 +191,7 @@ static int __init printer_bind(struct usb_composite_dev *cdev)
        return ret;
 }
 
-static int __exit printer_unbind(struct usb_composite_dev *cdev)
+static int printer_unbind(struct usb_composite_dev *cdev)
 {
        usb_put_function(f_printer);
        usb_put_function_instance(fi_printer);
@@ -199,7 +199,7 @@ static int __exit printer_unbind(struct usb_composite_dev *cdev)
        return 0;
 }
 
-static __refdata struct usb_composite_driver printer_driver = {
+static struct usb_composite_driver printer_driver = {
        .name           = shortname,
        .dev            = &device_desc,
        .strings        = dev_strings,
index 1f5f978..8b7528f 100644 (file)
@@ -174,7 +174,7 @@ out:
        return ret;
 }
 
-static int __init gs_bind(struct usb_composite_dev *cdev)
+static int gs_bind(struct usb_composite_dev *cdev)
 {
        int                     status;
 
@@ -230,7 +230,7 @@ static int gs_unbind(struct usb_composite_dev *cdev)
        return 0;
 }
 
-static __refdata struct usb_composite_driver gserial_driver = {
+static struct usb_composite_driver gserial_driver = {
        .name           = "g_serial",
        .dev            = &device_desc,
        .strings        = dev_strings,
index 8b80add..f9b4882 100644 (file)
@@ -2397,7 +2397,7 @@ static int usb_target_bind(struct usb_composite_dev *cdev)
        return 0;
 }
 
-static __refdata struct usb_composite_driver usbg_driver = {
+static struct usb_composite_driver usbg_driver = {
        .name           = "g_target",
        .dev            = &usbg_device_desc,
        .strings        = usbg_strings,
index 04a3da2..72c976b 100644 (file)
@@ -334,7 +334,7 @@ static const struct uvc_descriptor_header * const uvc_ss_streaming_cls[] = {
  * USB configuration
  */
 
-static int __init
+static int
 webcam_config_bind(struct usb_configuration *c)
 {
        int status = 0;
@@ -358,7 +358,7 @@ static struct usb_configuration webcam_config_driver = {
        .MaxPower               = CONFIG_USB_GADGET_VBUS_DRAW,
 };
 
-static int /* __init_or_exit */
+static int
 webcam_unbind(struct usb_composite_dev *cdev)
 {
        if (!IS_ERR_OR_NULL(f_uvc))
@@ -368,7 +368,7 @@ webcam_unbind(struct usb_composite_dev *cdev)
        return 0;
 }
 
-static int __init
+static int
 webcam_bind(struct usb_composite_dev *cdev)
 {
        struct f_uvc_opts *uvc_opts;
@@ -422,7 +422,7 @@ error:
  * Driver
  */
 
-static __refdata struct usb_composite_driver webcam_driver = {
+static struct usb_composite_driver webcam_driver = {
        .name           = "g_webcam",
        .dev            = &webcam_device_descriptor,
        .strings        = webcam_device_strings,
index 5ee9515..c986e8a 100644 (file)
@@ -272,7 +272,7 @@ static struct usb_function_instance *func_inst_lb;
 module_param_named(qlen, gzero_options.qlen, uint, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(qlen, "depth of loopback queue");
 
-static int __init zero_bind(struct usb_composite_dev *cdev)
+static int zero_bind(struct usb_composite_dev *cdev)
 {
        struct f_ss_opts        *ss_opts;
        struct f_lb_opts        *lb_opts;
@@ -400,7 +400,7 @@ static int zero_unbind(struct usb_composite_dev *cdev)
        return 0;
 }
 
-static __refdata struct usb_composite_driver zero_driver = {
+static struct usb_composite_driver zero_driver = {
        .name           = "zero",
        .dev            = &device_desc,
        .strings        = dev_strings,
index 2fbedca..fc42264 100644 (file)
@@ -1942,7 +1942,7 @@ err_unprepare_fclk:
        return retval;
 }
 
-static int __exit at91udc_remove(struct platform_device *pdev)
+static int at91udc_remove(struct platform_device *pdev)
 {
        struct at91_udc *udc = platform_get_drvdata(pdev);
        unsigned long   flags;
@@ -2018,7 +2018,7 @@ static int at91udc_resume(struct platform_device *pdev)
 #endif
 
 static struct platform_driver at91_udc_driver = {
-       .remove         = __exit_p(at91udc_remove),
+       .remove         = at91udc_remove,
        .shutdown       = at91udc_shutdown,
        .suspend        = at91udc_suspend,
        .resume         = at91udc_resume,
index 4c01953..351d485 100644 (file)
@@ -2186,7 +2186,7 @@ static int usba_udc_probe(struct platform_device *pdev)
        return 0;
 }
 
-static int __exit usba_udc_remove(struct platform_device *pdev)
+static int usba_udc_remove(struct platform_device *pdev)
 {
        struct usba_udc *udc;
        int i;
@@ -2258,7 +2258,7 @@ static int usba_udc_resume(struct device *dev)
 static SIMPLE_DEV_PM_OPS(usba_udc_pm_ops, usba_udc_suspend, usba_udc_resume);
 
 static struct platform_driver udc_driver = {
-       .remove         = __exit_p(usba_udc_remove),
+       .remove         = usba_udc_remove,
        .driver         = {
                .name           = "atmel_usba_udc",
                .pm             = &usba_udc_pm_ops,
index 55fcb93..c60022b 100644 (file)
@@ -2525,7 +2525,7 @@ err_kfree:
 /* Driver removal function
  * Free resources and finish pending transactions
  */
-static int __exit fsl_udc_remove(struct platform_device *pdev)
+static int fsl_udc_remove(struct platform_device *pdev)
 {
        struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        struct fsl_usb2_platform_data *pdata = dev_get_platdata(&pdev->dev);
@@ -2663,7 +2663,7 @@ static const struct platform_device_id fsl_udc_devtype[] = {
 };
 MODULE_DEVICE_TABLE(platform, fsl_udc_devtype);
 static struct platform_driver udc_driver = {
-       .remove         = __exit_p(fsl_udc_remove),
+       .remove         = fsl_udc_remove,
        /* Just for FSL i.mx SoC currently */
        .id_table       = fsl_udc_devtype,
        /* these suspend and resume are not usb suspend and resume */
index fb4df15..3970f45 100644 (file)
@@ -1342,7 +1342,7 @@ static const struct usb_gadget_ops fusb300_gadget_ops = {
        .udc_stop       = fusb300_udc_stop,
 };
 
-static int __exit fusb300_remove(struct platform_device *pdev)
+static int fusb300_remove(struct platform_device *pdev)
 {
        struct fusb300 *fusb300 = platform_get_drvdata(pdev);
 
@@ -1492,7 +1492,7 @@ clean_up:
 }
 
 static struct platform_driver fusb300_driver = {
-       .remove =       __exit_p(fusb300_remove),
+       .remove =       fusb300_remove,
        .driver         = {
                .name = (char *) udc_name,
        },
index 8c7c83c..309706f 100644 (file)
@@ -1528,7 +1528,7 @@ static const struct usb_gadget_ops m66592_gadget_ops = {
        .pullup                 = m66592_pullup,
 };
 
-static int __exit m66592_remove(struct platform_device *pdev)
+static int m66592_remove(struct platform_device *pdev)
 {
        struct m66592           *m66592 = platform_get_drvdata(pdev);
 
@@ -1695,7 +1695,7 @@ clean_up:
 
 /*-------------------------------------------------------------------------*/
 static struct platform_driver m66592_driver = {
-       .remove =       __exit_p(m66592_remove),
+       .remove =       m66592_remove,
        .driver         = {
                .name = (char *) udc_name,
        },
index 2495fe9..0293f71 100644 (file)
@@ -1820,7 +1820,7 @@ static const struct usb_gadget_ops r8a66597_gadget_ops = {
        .set_selfpowered        = r8a66597_set_selfpowered,
 };
 
-static int __exit r8a66597_remove(struct platform_device *pdev)
+static int r8a66597_remove(struct platform_device *pdev)
 {
        struct r8a66597         *r8a66597 = platform_get_drvdata(pdev);
 
@@ -1974,7 +1974,7 @@ clean_up2:
 
 /*-------------------------------------------------------------------------*/
 static struct platform_driver r8a66597_driver = {
-       .remove =       __exit_p(r8a66597_remove),
+       .remove =       r8a66597_remove,
        .driver         = {
                .name = (char *) udc_name,
        },
index b808951..99fd9a5 100644 (file)
@@ -1487,7 +1487,7 @@ static int s3c2410_udc_pullup(struct usb_gadget *gadget, int is_on)
 
        dprintk(DEBUG_NORMAL, "%s()\n", __func__);
 
-       s3c2410_udc_set_pullup(udc, is_on ? 0 : 1);
+       s3c2410_udc_set_pullup(udc, is_on);
        return 0;
 }
 
index dd3e9fd..1f24274 100644 (file)
@@ -2071,8 +2071,8 @@ static int xudc_probe(struct platform_device *pdev)
        /* Map the registers */
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        udc->addr = devm_ioremap_resource(&pdev->dev, res);
-       if (!udc->addr)
-               return -ENOMEM;
+       if (IS_ERR(udc->addr))
+               return PTR_ERR(udc->addr);
 
        irq = platform_get_irq(pdev, 0);
        if (irq < 0) {
index f5397a5..7d34cbf 100644 (file)
@@ -2026,8 +2026,13 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_td *td,
                break;
        case COMP_DEV_ERR:
        case COMP_STALL:
+               frame->status = -EPROTO;
+               skip_td = true;
+               break;
        case COMP_TX_ERR:
                frame->status = -EPROTO;
+               if (event_trb != td->last_trb)
+                       return 0;
                skip_td = true;
                break;
        case COMP_STOP:
@@ -2640,7 +2645,7 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd)
                xhci_halt(xhci);
 hw_died:
                spin_unlock(&xhci->lock);
-               return -ESHUTDOWN;
+               return IRQ_HANDLED;
        }
 
        /*
index ec8ac16..36bf089 100644 (file)
@@ -3682,18 +3682,21 @@ int xhci_alloc_dev(struct usb_hcd *hcd, struct usb_device *udev)
 {
        struct xhci_hcd *xhci = hcd_to_xhci(hcd);
        unsigned long flags;
-       int ret;
+       int ret, slot_id;
        struct xhci_command *command;
 
        command = xhci_alloc_command(xhci, false, false, GFP_KERNEL);
        if (!command)
                return 0;
 
+       /* xhci->slot_id and xhci->addr_dev are not thread-safe */
+       mutex_lock(&xhci->mutex);
        spin_lock_irqsave(&xhci->lock, flags);
        command->completion = &xhci->addr_dev;
        ret = xhci_queue_slot_control(xhci, command, TRB_ENABLE_SLOT, 0);
        if (ret) {
                spin_unlock_irqrestore(&xhci->lock, flags);
+               mutex_unlock(&xhci->mutex);
                xhci_dbg(xhci, "FIXME: allocate a command ring segment\n");
                kfree(command);
                return 0;
@@ -3702,8 +3705,10 @@ int xhci_alloc_dev(struct usb_hcd *hcd, struct usb_device *udev)
        spin_unlock_irqrestore(&xhci->lock, flags);
 
        wait_for_completion(command->completion);
+       slot_id = xhci->slot_id;
+       mutex_unlock(&xhci->mutex);
 
-       if (!xhci->slot_id || command->status != COMP_SUCCESS) {
+       if (!slot_id || command->status != COMP_SUCCESS) {
                xhci_err(xhci, "Error while assigning device slot ID\n");
                xhci_err(xhci, "Max number of devices this xHCI host supports is %u.\n",
                                HCS_MAX_SLOTS(
@@ -3728,11 +3733,11 @@ int xhci_alloc_dev(struct usb_hcd *hcd, struct usb_device *udev)
         * xhci_discover_or_reset_device(), which may be called as part of
         * mass storage driver error handling.
         */
-       if (!xhci_alloc_virt_device(xhci, xhci->slot_id, udev, GFP_NOIO)) {
+       if (!xhci_alloc_virt_device(xhci, slot_id, udev, GFP_NOIO)) {
                xhci_warn(xhci, "Could not allocate xHCI USB device data structures\n");
                goto disable_slot;
        }
-       udev->slot_id = xhci->slot_id;
+       udev->slot_id = slot_id;
 
 #ifndef CONFIG_USB_DEFAULT_PERSIST
        /*
@@ -3778,12 +3783,15 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev,
        struct xhci_slot_ctx *slot_ctx;
        struct xhci_input_control_ctx *ctrl_ctx;
        u64 temp_64;
-       struct xhci_command *command;
+       struct xhci_command *command = NULL;
+
+       mutex_lock(&xhci->mutex);
 
        if (!udev->slot_id) {
                xhci_dbg_trace(xhci, trace_xhci_dbg_address,
                                "Bad Slot ID %d", udev->slot_id);
-               return -EINVAL;
+               ret = -EINVAL;
+               goto out;
        }
 
        virt_dev = xhci->devs[udev->slot_id];
@@ -3796,7 +3804,8 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev,
                 */
                xhci_warn(xhci, "Virt dev invalid for slot_id 0x%x!\n",
                        udev->slot_id);
-               return -EINVAL;
+               ret = -EINVAL;
+               goto out;
        }
 
        if (setup == SETUP_CONTEXT_ONLY) {
@@ -3804,13 +3813,15 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev,
                if (GET_SLOT_STATE(le32_to_cpu(slot_ctx->dev_state)) ==
                    SLOT_STATE_DEFAULT) {
                        xhci_dbg(xhci, "Slot already in default state\n");
-                       return 0;
+                       goto out;
                }
        }
 
        command = xhci_alloc_command(xhci, false, false, GFP_KERNEL);
-       if (!command)
-               return -ENOMEM;
+       if (!command) {
+               ret = -ENOMEM;
+               goto out;
+       }
 
        command->in_ctx = virt_dev->in_ctx;
        command->completion = &xhci->addr_dev;
@@ -3820,8 +3831,8 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev,
        if (!ctrl_ctx) {
                xhci_warn(xhci, "%s: Could not get input context, bad type.\n",
                                __func__);
-               kfree(command);
-               return -EINVAL;
+               ret = -EINVAL;
+               goto out;
        }
        /*
         * If this is the first Set Address since device plug-in or
@@ -3848,8 +3859,7 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev,
                spin_unlock_irqrestore(&xhci->lock, flags);
                xhci_dbg_trace(xhci, trace_xhci_dbg_address,
                                "FIXME: allocate a command ring segment");
-               kfree(command);
-               return ret;
+               goto out;
        }
        xhci_ring_cmd_db(xhci);
        spin_unlock_irqrestore(&xhci->lock, flags);
@@ -3896,10 +3906,8 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev,
                ret = -EINVAL;
                break;
        }
-       if (ret) {
-               kfree(command);
-               return ret;
-       }
+       if (ret)
+               goto out;
        temp_64 = xhci_read_64(xhci, &xhci->op_regs->dcbaa_ptr);
        xhci_dbg_trace(xhci, trace_xhci_dbg_address,
                        "Op regs DCBAA ptr = %#016llx", temp_64);
@@ -3932,8 +3940,10 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev,
        xhci_dbg_trace(xhci, trace_xhci_dbg_address,
                       "Internal device address = %d",
                       le32_to_cpu(slot_ctx->dev_state) & DEV_ADDR_MASK);
+out:
+       mutex_unlock(&xhci->mutex);
        kfree(command);
-       return 0;
+       return ret;
 }
 
 int xhci_address_device(struct usb_hcd *hcd, struct usb_device *udev)
@@ -4855,6 +4865,7 @@ int xhci_gen_setup(struct usb_hcd *hcd, xhci_get_quirks_t get_quirks)
                return 0;
        }
 
+       mutex_init(&xhci->mutex);
        xhci->cap_regs = hcd->regs;
        xhci->op_regs = hcd->regs +
                HC_LENGTH(readl(&xhci->cap_regs->hc_capbase));
@@ -5011,4 +5022,12 @@ static int __init xhci_hcd_init(void)
        BUILD_BUG_ON(sizeof(struct xhci_run_regs) != (8+8*128)*32/8);
        return 0;
 }
+
+/*
+ * If an init function is provided, an exit function must also be provided
+ * to allow module unload.
+ */
+static void __exit xhci_hcd_fini(void) { }
+
 module_init(xhci_hcd_init);
+module_exit(xhci_hcd_fini);
index 8e421b8..6977f84 100644 (file)
@@ -1267,7 +1267,7 @@ union xhci_trb {
  * since the command ring is 64-byte aligned.
  * It must also be greater than 16.
  */
-#define TRBS_PER_SEGMENT       64
+#define TRBS_PER_SEGMENT       256
 /* Allow two commands + a link TRB, along with any reserved command TRBs */
 #define MAX_RSVD_CMD_TRBS      (TRBS_PER_SEGMENT - 3)
 #define TRB_SEGMENT_SIZE       (TRBS_PER_SEGMENT*16)
@@ -1497,6 +1497,8 @@ struct xhci_hcd {
        struct list_head        lpm_failed_devs;
 
        /* slot enabling and address device helpers */
+       /* these are not thread safe so use mutex */
+       struct mutex mutex;
        struct completion       addr_dev;
        int slot_id;
        /* For USB 3.0 LPM enable/disable. */
index 3789b08..6dca3d7 100644 (file)
@@ -2021,13 +2021,7 @@ musb_init_controller(struct device *dev, int nIrq, void __iomem *ctrl)
        if (musb->ops->quirks)
                musb->io.quirks = musb->ops->quirks;
 
-       /* At least tusb6010 has it's own offsets.. */
-       if (musb->ops->ep_offset)
-               musb->io.ep_offset = musb->ops->ep_offset;
-       if (musb->ops->ep_select)
-               musb->io.ep_select = musb->ops->ep_select;
-
-       /* ..and some devices use indexed offset or flat offset */
+       /* Most devices use indexed offset or flat offset */
        if (musb->io.quirks & MUSB_INDEXED_EP) {
                musb->io.ep_offset = musb_indexed_ep_offset;
                musb->io.ep_select = musb_indexed_ep_select;
@@ -2036,6 +2030,12 @@ musb_init_controller(struct device *dev, int nIrq, void __iomem *ctrl)
                musb->io.ep_select = musb_flat_ep_select;
        }
 
+       /* At least tusb6010 has its own offsets */
+       if (musb->ops->ep_offset)
+               musb->io.ep_offset = musb->ops->ep_offset;
+       if (musb->ops->ep_select)
+               musb->io.ep_select = musb->ops->ep_select;
+
        if (musb->ops->fifo_mode)
                fifo_mode = musb->ops->fifo_mode;
        else
index 7225d52..03ab0c6 100644 (file)
@@ -1179,7 +1179,7 @@ static int ab8500_usb_irq_setup(struct platform_device *pdev,
                }
                err = devm_request_threaded_irq(&pdev->dev, irq, NULL,
                                ab8500_usb_link_status_irq,
-                               IRQF_NO_SUSPEND | IRQF_SHARED,
+                               IRQF_NO_SUSPEND | IRQF_SHARED | IRQF_ONESHOT,
                                "usb-link-status", ab);
                if (err < 0) {
                        dev_err(ab->dev, "request_irq failed for link status irq\n");
@@ -1195,7 +1195,7 @@ static int ab8500_usb_irq_setup(struct platform_device *pdev,
                }
                err = devm_request_threaded_irq(&pdev->dev, irq, NULL,
                                ab8500_usb_disconnect_irq,
-                               IRQF_NO_SUSPEND | IRQF_SHARED,
+                               IRQF_NO_SUSPEND | IRQF_SHARED | IRQF_ONESHOT,
                                "usb-id-fall", ab);
                if (err < 0) {
                        dev_err(ab->dev, "request_irq failed for ID fall irq\n");
@@ -1211,7 +1211,7 @@ static int ab8500_usb_irq_setup(struct platform_device *pdev,
                }
                err = devm_request_threaded_irq(&pdev->dev, irq, NULL,
                                ab8500_usb_disconnect_irq,
-                               IRQF_NO_SUSPEND | IRQF_SHARED,
+                               IRQF_NO_SUSPEND | IRQF_SHARED | IRQF_ONESHOT,
                                "usb-vbus-fall", ab);
                if (err < 0) {
                        dev_err(ab->dev, "request_irq failed for Vbus fall irq\n");
index 1e0e10d..3af263c 100644 (file)
@@ -94,7 +94,7 @@ struct isp1301 {
 
 #if defined(CONFIG_MACH_OMAP_H2) || defined(CONFIG_MACH_OMAP_H3)
 
-#if    defined(CONFIG_TPS65010) || defined(CONFIG_TPS65010_MODULE)
+#if    defined(CONFIG_TPS65010) || (defined(CONFIG_TPS65010_MODULE) && defined(MODULE))
 
 #include <linux/i2c/tps65010.h>
 
index 845f658..2b28443 100644 (file)
@@ -401,7 +401,8 @@ static int tahvo_usb_probe(struct platform_device *pdev)
        dev_set_drvdata(&pdev->dev, tu);
 
        tu->irq = platform_get_irq(pdev, 0);
-       ret = request_threaded_irq(tu->irq, NULL, tahvo_usb_vbus_interrupt, 0,
+       ret = request_threaded_irq(tu->irq, NULL, tahvo_usb_vbus_interrupt,
+                                  IRQF_ONESHOT,
                                   "tahvo-vbus", tu);
        if (ret) {
                dev_err(&pdev->dev, "could not register tahvo-vbus irq: %d\n",
index 8597cf9..c0f5c65 100644 (file)
@@ -611,6 +611,8 @@ struct usbhs_pkt_handle usbhs_fifo_pio_push_handler = {
 static int usbhsf_prepare_pop(struct usbhs_pkt *pkt, int *is_done)
 {
        struct usbhs_pipe *pipe = pkt->pipe;
+       struct usbhs_priv *priv = usbhs_pipe_to_priv(pipe);
+       struct usbhs_fifo *fifo = usbhsf_get_cfifo(priv);
 
        if (usbhs_pipe_is_busy(pipe))
                return 0;
@@ -624,6 +626,9 @@ static int usbhsf_prepare_pop(struct usbhs_pkt *pkt, int *is_done)
        usbhs_pipe_data_sequence(pipe, pkt->sequence);
        pkt->sequence = -1; /* -1 sequence will be ignored */
 
+       if (usbhs_pipe_is_dcp(pipe))
+               usbhsf_fifo_clear(pipe, fifo);
+
        usbhs_pipe_set_trans_count_if_bulk(pipe, pkt->length);
        usbhs_pipe_enable(pipe);
        usbhs_pipe_running(pipe, 1);
@@ -673,7 +678,14 @@ static int usbhsf_pio_try_pop(struct usbhs_pkt *pkt, int *is_done)
                *is_done = 1;
                usbhsf_rx_irq_ctrl(pipe, 0);
                usbhs_pipe_running(pipe, 0);
-               usbhs_pipe_disable(pipe);       /* disable pipe first */
+               /*
+                * If function mode, since this controller is possible to enter
+                * Control Write status stage at this timing, this driver
+                * should not disable the pipe. If such a case happens, this
+                * controller is not able to complete the status stage.
+                */
+               if (!usbhs_mod_is_host(priv) && !usbhs_pipe_is_dcp(pipe))
+                       usbhs_pipe_disable(pipe);       /* disable pipe first */
        }
 
        /*
@@ -1227,15 +1239,21 @@ static void usbhsf_dma_init_dt(struct device *dev, struct usbhs_fifo *fifo,
 {
        char name[16];
 
-       snprintf(name, sizeof(name), "tx%d", channel);
-       fifo->tx_chan = dma_request_slave_channel_reason(dev, name);
-       if (IS_ERR(fifo->tx_chan))
-               fifo->tx_chan = NULL;
-
-       snprintf(name, sizeof(name), "rx%d", channel);
-       fifo->rx_chan = dma_request_slave_channel_reason(dev, name);
-       if (IS_ERR(fifo->rx_chan))
-               fifo->rx_chan = NULL;
+       /*
+        * To avoid complex handing for DnFIFOs, the driver uses each
+        * DnFIFO as TX or RX direction (not bi-direction).
+        * So, the driver uses odd channels for TX, even channels for RX.
+        */
+       snprintf(name, sizeof(name), "ch%d", channel);
+       if (channel & 1) {
+               fifo->tx_chan = dma_request_slave_channel_reason(dev, name);
+               if (IS_ERR(fifo->tx_chan))
+                       fifo->tx_chan = NULL;
+       } else {
+               fifo->rx_chan = dma_request_slave_channel_reason(dev, name);
+               if (IS_ERR(fifo->rx_chan))
+                       fifo->rx_chan = NULL;
+       }
 }
 
 static void usbhsf_dma_init(struct usbhs_priv *priv, struct usbhs_fifo *fifo,
index 84ce2d7..ffd739e 100644 (file)
@@ -127,6 +127,8 @@ static const struct usb_device_id id_table[] = {
        { USB_DEVICE(0x10C4, 0x88A5) }, /* Planet Innovation Ingeni ZigBee USB Device */
        { USB_DEVICE(0x10C4, 0x8946) }, /* Ketra N1 Wireless Interface */
        { USB_DEVICE(0x10C4, 0x8977) }, /* CEL MeshWorks DevKit Device */
+       { USB_DEVICE(0x10C4, 0x8998) }, /* KCF Technologies PRN */
+       { USB_DEVICE(0x10C4, 0x8A2A) }, /* HubZ dual ZigBee and Z-Wave dongle */
        { USB_DEVICE(0x10C4, 0xEA60) }, /* Silicon Labs factory default */
        { USB_DEVICE(0x10C4, 0xEA61) }, /* Silicon Labs factory default */
        { USB_DEVICE(0x10C4, 0xEA70) }, /* Silicon Labs factory default */
index 8eb68a3..4c8b3b8 100644 (file)
@@ -699,6 +699,7 @@ static const struct usb_device_id id_table_combined[] = {
        { USB_DEVICE(XSENS_VID, XSENS_AWINDA_DONGLE_PID) },
        { USB_DEVICE(XSENS_VID, XSENS_AWINDA_STATION_PID) },
        { USB_DEVICE(XSENS_VID, XSENS_CONVERTER_PID) },
+       { USB_DEVICE(XSENS_VID, XSENS_MTDEVBOARD_PID) },
        { USB_DEVICE(XSENS_VID, XSENS_MTW_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_OMNI1509) },
        { USB_DEVICE(MOBILITY_VID, MOBILITY_USB_SERIAL_PID) },
index 4e4f46f..792e054 100644 (file)
 #define XSENS_AWINDA_STATION_PID 0x0101
 #define XSENS_AWINDA_DONGLE_PID 0x0102
 #define XSENS_MTW_PID          0x0200  /* Xsens MTw */
+#define XSENS_MTDEVBOARD_PID   0x0300  /* Motion Tracker Development Board */
 #define XSENS_CONVERTER_PID    0xD00D  /* Xsens USB-serial converter */
 
 /* Xsens devices using FTDI VID */
index 829604d..f5257af 100644 (file)
@@ -61,7 +61,6 @@ static const struct usb_device_id id_table[] = {
        { USB_DEVICE(DCU10_VENDOR_ID, DCU10_PRODUCT_ID) },
        { USB_DEVICE(SITECOM_VENDOR_ID, SITECOM_PRODUCT_ID) },
        { USB_DEVICE(ALCATEL_VENDOR_ID, ALCATEL_PRODUCT_ID) },
-       { USB_DEVICE(SAMSUNG_VENDOR_ID, SAMSUNG_PRODUCT_ID) },
        { USB_DEVICE(SIEMENS_VENDOR_ID, SIEMENS_PRODUCT_ID_SX1),
                .driver_info = PL2303_QUIRK_UART_STATE_IDX0 },
        { USB_DEVICE(SIEMENS_VENDOR_ID, SIEMENS_PRODUCT_ID_X65),
index 71fd9da..e3b7af8 100644 (file)
 #define ALCATEL_VENDOR_ID      0x11f7
 #define ALCATEL_PRODUCT_ID     0x02df
 
-/* Samsung I330 phone cradle */
-#define SAMSUNG_VENDOR_ID      0x04e8
-#define SAMSUNG_PRODUCT_ID     0x8001
-
 #define SIEMENS_VENDOR_ID      0x11f5
 #define SIEMENS_PRODUCT_ID_SX1 0x0001
 #define SIEMENS_PRODUCT_ID_X65 0x0003
index bf2bd40..60afb39 100644 (file)
@@ -95,7 +95,7 @@ static const struct usb_device_id id_table[] = {
                .driver_info = (kernel_ulong_t)&palm_os_4_probe },
        { USB_DEVICE(ACER_VENDOR_ID, ACER_S10_ID),
                .driver_info = (kernel_ulong_t)&palm_os_4_probe },
-       { USB_DEVICE(SAMSUNG_VENDOR_ID, SAMSUNG_SCH_I330_ID),
+       { USB_DEVICE_INTERFACE_CLASS(SAMSUNG_VENDOR_ID, SAMSUNG_SCH_I330_ID, 0xff),
                .driver_info = (kernel_ulong_t)&palm_os_4_probe },
        { USB_DEVICE(SAMSUNG_VENDOR_ID, SAMSUNG_SPH_I500_ID),
                .driver_info = (kernel_ulong_t)&palm_os_4_probe },
index d684b4b..caf1888 100644 (file)
@@ -766,6 +766,13 @@ UNUSUAL_DEV(  0x059f, 0x0643, 0x0000, 0x0000,
                USB_SC_DEVICE, USB_PR_DEVICE, NULL,
                US_FL_GO_SLOW ),
 
+/* Reported by Christian Schaller <cschalle@redhat.com> */
+UNUSUAL_DEV(  0x059f, 0x0651, 0x0000, 0x0000,
+               "LaCie",
+               "External HDD",
+               USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+               US_FL_NO_WP_DETECT ),
+
 /* Submitted by Joel Bourquard <numlock@freesurf.ch>
  * Some versions of this device need the SubClass and Protocol overrides
  * while others don't.
index 69fab0f..e9851ad 100644 (file)
@@ -907,8 +907,14 @@ static void vfio_pci_request(void *device_data, unsigned int count)
        mutex_lock(&vdev->igate);
 
        if (vdev->req_trigger) {
-               dev_dbg(&vdev->pdev->dev, "Requesting device from user\n");
+               if (!(count % 10))
+                       dev_notice_ratelimited(&vdev->pdev->dev,
+                               "Relaying device request to user (#%u)\n",
+                               count);
                eventfd_signal(vdev->req_trigger, 1);
+       } else if (count == 0) {
+               dev_warn(&vdev->pdev->dev,
+                       "No device request channel registered, blocked until released by user\n");
        }
 
        mutex_unlock(&vdev->igate);
index 0d33662..e1278fe 100644 (file)
@@ -710,6 +710,8 @@ void *vfio_del_group_dev(struct device *dev)
        void *device_data = device->device_data;
        struct vfio_unbound_dev *unbound;
        unsigned int i = 0;
+       long ret;
+       bool interrupted = false;
 
        /*
         * The group exists so long as we have a device reference.  Get
@@ -755,9 +757,22 @@ void *vfio_del_group_dev(struct device *dev)
 
                vfio_device_put(device);
 
-       } while (wait_event_interruptible_timeout(vfio.release_q,
-                                                 !vfio_dev_present(group, dev),
-                                                 HZ * 10) <= 0);
+               if (interrupted) {
+                       ret = wait_event_timeout(vfio.release_q,
+                                       !vfio_dev_present(group, dev), HZ * 10);
+               } else {
+                       ret = wait_event_interruptible_timeout(vfio.release_q,
+                                       !vfio_dev_present(group, dev), HZ * 10);
+                       if (ret == -ERESTARTSYS) {
+                               interrupted = true;
+                               dev_warn(dev,
+                                        "Device is currently in use, task"
+                                        " \"%s\" (%d) "
+                                        "blocked until device is released",
+                                        current->comm, task_pid_nr(current));
+                       }
+               }
+       } while (ret <= 0);
 
        vfio_group_put(group);
 
index 5e19bb5..ea32b38 100644 (file)
@@ -1409,8 +1409,7 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs,
                         * dependency now.
                         */
                        se_tpg = &tpg->se_tpg;
-                       ret = configfs_depend_item(se_tpg->se_tpg_tfo->tf_subsys,
-                                                  &se_tpg->tpg_group.cg_item);
+                       ret = target_depend_item(&se_tpg->tpg_group.cg_item);
                        if (ret) {
                                pr_warn("configfs_depend_item() failed: %d\n", ret);
                                kfree(vs_tpg);
@@ -1513,8 +1512,7 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs,
                 * to allow vhost-scsi WWPN se_tpg->tpg_group shutdown to occur.
                 */
                se_tpg = &tpg->se_tpg;
-               configfs_undepend_item(se_tpg->se_tpg_tfo->tf_subsys,
-                                      &se_tpg->tpg_group.cg_item);
+               target_undepend_item(&se_tpg->tpg_group.cg_item);
        }
        if (match) {
                for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
index 3a145a6..6897f1c 100644 (file)
@@ -274,6 +274,10 @@ static int pwm_backlight_probe(struct platform_device *pdev)
 
        pb->pwm = devm_pwm_get(&pdev->dev, NULL);
        if (IS_ERR(pb->pwm)) {
+               ret = PTR_ERR(pb->pwm);
+               if (ret == -EPROBE_DEFER)
+                       goto err_alloc;
+
                dev_err(&pdev->dev, "unable to request PWM, trying legacy API\n");
                pb->legacy = true;
                pb->pwm = pwm_request(data->pwm_id, "pwm-backlight");
index e894eb2..eba1b7a 100644 (file)
@@ -423,6 +423,7 @@ int vp_set_vq_affinity(struct virtqueue *vq, int cpu)
                if (cpu == -1)
                        irq_set_affinity_hint(irq, NULL);
                else {
+                       cpumask_clear(mask);
                        cpumask_set_cpu(cpu, mask);
                        irq_set_affinity_hint(irq, mask);
                }
index 5db43fc..7dd4631 100644 (file)
@@ -345,6 +345,15 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
        return IRQ_HANDLED;
 }
 
+static void evtchn_2l_resume(void)
+{
+       int i;
+
+       for_each_online_cpu(i)
+               memset(per_cpu(cpu_evtchn_mask, i), 0, sizeof(xen_ulong_t) *
+                               EVTCHN_2L_NR_CHANNELS/BITS_PER_EVTCHN_WORD);
+}
+
 static const struct evtchn_ops evtchn_ops_2l = {
        .max_channels      = evtchn_2l_max_channels,
        .nr_channels       = evtchn_2l_max_channels,
@@ -356,6 +365,7 @@ static const struct evtchn_ops evtchn_ops_2l = {
        .mask              = evtchn_2l_mask,
        .unmask            = evtchn_2l_unmask,
        .handle_events     = evtchn_2l_handle_events,
+       .resume            = evtchn_2l_resume,
 };
 
 void __init xen_evtchn_2l_init(void)
index 70fba97..3838795 100644 (file)
@@ -529,8 +529,8 @@ static unsigned int __startup_pirq(unsigned int irq)
        if (rc)
                goto err;
 
-       bind_evtchn_to_cpu(evtchn, 0);
        info->evtchn = evtchn;
+       bind_evtchn_to_cpu(evtchn, 0);
 
        rc = xen_evtchn_port_setup(info);
        if (rc)
@@ -957,7 +957,7 @@ unsigned xen_evtchn_nr_channels(void)
 }
 EXPORT_SYMBOL_GPL(xen_evtchn_nr_channels);
 
-int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
+int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu)
 {
        struct evtchn_bind_virq bind_virq;
        int evtchn, irq, ret;
@@ -971,8 +971,12 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
                if (irq < 0)
                        goto out;
 
-               irq_set_chip_and_handler_name(irq, &xen_percpu_chip,
-                                             handle_percpu_irq, "virq");
+               if (percpu)
+                       irq_set_chip_and_handler_name(irq, &xen_percpu_chip,
+                                                     handle_percpu_irq, "virq");
+               else
+                       irq_set_chip_and_handler_name(irq, &xen_dynamic_chip,
+                                                     handle_edge_irq, "virq");
 
                bind_virq.virq = virq;
                bind_virq.vcpu = cpu;
@@ -1062,7 +1066,7 @@ int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
 {
        int irq, retval;
 
-       irq = bind_virq_to_irq(virq, cpu);
+       irq = bind_virq_to_irq(virq, cpu, irqflags & IRQF_PERCPU);
        if (irq < 0)
                return irq;
        retval = request_irq(irq, handler, irqflags, devname, dev_id);
@@ -1279,8 +1283,9 @@ void rebind_evtchn_irq(int evtchn, int irq)
 
        mutex_unlock(&irq_mapping_update_lock);
 
-       /* new event channels are always bound to cpu 0 */
-       irq_set_affinity(irq, cpumask_of(0));
+        bind_evtchn_to_cpu(evtchn, info->cpu);
+       /* This will be deferred until interrupt is processed */
+       irq_set_affinity(irq, cpumask_of(info->cpu));
 
        /* Unmask the event channel. */
        enable_irq(irq);
index d5bb1a3..8927485 100644 (file)
@@ -327,30 +327,10 @@ static int map_grant_pages(struct grant_map *map)
        return err;
 }
 
-struct unmap_grant_pages_callback_data
-{
-       struct completion completion;
-       int result;
-};
-
-static void unmap_grant_callback(int result,
-                                struct gntab_unmap_queue_data *data)
-{
-       struct unmap_grant_pages_callback_data* d = data->data;
-
-       d->result = result;
-       complete(&d->completion);
-}
-
 static int __unmap_grant_pages(struct grant_map *map, int offset, int pages)
 {
        int i, err = 0;
        struct gntab_unmap_queue_data unmap_data;
-       struct unmap_grant_pages_callback_data data;
-
-       init_completion(&data.completion);
-       unmap_data.data = &data;
-       unmap_data.done= &unmap_grant_callback;
 
        if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
                int pgno = (map->notify.addr >> PAGE_SHIFT);
@@ -367,11 +347,9 @@ static int __unmap_grant_pages(struct grant_map *map, int offset, int pages)
        unmap_data.pages = map->pages + offset;
        unmap_data.count = pages;
 
-       gnttab_unmap_refs_async(&unmap_data);
-
-       wait_for_completion(&data.completion);
-       if (data.result)
-               return data.result;
+       err = gnttab_unmap_refs_sync(&unmap_data);
+       if (err)
+               return err;
 
        for (i = 0; i < pages; i++) {
                if (map->unmap_ops[offset+i].status)
index 17972fb..b1c7170 100644 (file)
@@ -123,6 +123,11 @@ struct gnttab_ops {
        int (*query_foreign_access)(grant_ref_t ref);
 };
 
+struct unmap_refs_callback_data {
+       struct completion completion;
+       int result;
+};
+
 static struct gnttab_ops *gnttab_interface;
 
 static int grant_table_version;
@@ -863,6 +868,29 @@ void gnttab_unmap_refs_async(struct gntab_unmap_queue_data* item)
 }
 EXPORT_SYMBOL_GPL(gnttab_unmap_refs_async);
 
+static void unmap_refs_callback(int result,
+               struct gntab_unmap_queue_data *data)
+{
+       struct unmap_refs_callback_data *d = data->data;
+
+       d->result = result;
+       complete(&d->completion);
+}
+
+int gnttab_unmap_refs_sync(struct gntab_unmap_queue_data *item)
+{
+       struct unmap_refs_callback_data data;
+
+       init_completion(&data.completion);
+       item->data = &data;
+       item->done = &unmap_refs_callback;
+       gnttab_unmap_refs_async(item);
+       wait_for_completion(&data.completion);
+
+       return data.result;
+}
+EXPORT_SYMBOL_GPL(gnttab_unmap_refs_sync);
+
 static int gnttab_map_frames_v1(xen_pfn_t *frames, unsigned int nr_gframes)
 {
        int rc;
index bf19407..9e6a851 100644 (file)
@@ -131,6 +131,8 @@ static void do_suspend(void)
                goto out_resume;
        }
 
+       xen_arch_suspend();
+
        si.cancelled = 1;
 
        err = stop_machine(xen_suspend, &si, cpumask_of(0));
@@ -148,11 +150,12 @@ static void do_suspend(void)
                si.cancelled = 1;
        }
 
+       xen_arch_resume();
+
 out_resume:
-       if (!si.cancelled) {
-               xen_arch_resume();
+       if (!si.cancelled)
                xs_resume();
-       else
+       else
                xs_suspend_cancel();
 
        dpm_resume_end(si.cancelled ? PMSG_THAW : PMSG_RESTORE);
index 810ad41..4c54932 100644 (file)
@@ -235,7 +235,7 @@ retry:
 #define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
 #define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
                while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
-                       xen_io_tlb_start = (void *)__get_free_pages(__GFP_NOWARN, order);
+                       xen_io_tlb_start = (void *)xen_get_swiotlb_free_pages(order);
                        if (xen_io_tlb_start)
                                break;
                        order--;
index 75fe3d4..9c23420 100644 (file)
@@ -16,8 +16,8 @@
 #include "conf_space.h"
 #include "conf_space_quirks.h"
 
-bool permissive;
-module_param(permissive, bool, 0644);
+bool xen_pcibk_permissive;
+module_param_named(permissive, xen_pcibk_permissive, bool, 0644);
 
 /* This is where xen_pcibk_read_config_byte, xen_pcibk_read_config_word,
  * xen_pcibk_write_config_word, and xen_pcibk_write_config_byte are created. */
@@ -262,7 +262,7 @@ int xen_pcibk_config_write(struct pci_dev *dev, int offset, int size, u32 value)
                 * This means that some fields may still be read-only because
                 * they have entries in the config_field list that intercept
                 * the write and do nothing. */
-               if (dev_data->permissive || permissive) {
+               if (dev_data->permissive || xen_pcibk_permissive) {
                        switch (size) {
                        case 1:
                                err = pci_write_config_byte(dev, offset,
index 2e1d73d..62461a8 100644 (file)
@@ -64,7 +64,7 @@ struct config_field_entry {
        void *data;
 };
 
-extern bool permissive;
+extern bool xen_pcibk_permissive;
 
 #define OFFSET(cfg_entry) ((cfg_entry)->base_offset+(cfg_entry)->field->offset)
 
index c2260a0..ad3d17d 100644 (file)
@@ -118,7 +118,7 @@ static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
 
        cmd->val = value;
 
-       if (!permissive && (!dev_data || !dev_data->permissive))
+       if (!xen_pcibk_permissive && (!dev_data || !dev_data->permissive))
                return 0;
 
        /* Only allow the guest to control certain bits. */
index 564b315..5390a67 100644 (file)
@@ -57,6 +57,7 @@
 #include <xen/xen.h>
 #include <xen/xenbus.h>
 #include <xen/events.h>
+#include <xen/xen-ops.h>
 #include <xen/page.h>
 
 #include <xen/hvm.h>
@@ -735,6 +736,30 @@ static int __init xenstored_local_init(void)
        return err;
 }
 
+static int xenbus_resume_cb(struct notifier_block *nb,
+                           unsigned long action, void *data)
+{
+       int err = 0;
+
+       if (xen_hvm_domain()) {
+               uint64_t v;
+
+               err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v);
+               if (!err && v)
+                       xen_store_evtchn = v;
+               else
+                       pr_warn("Cannot update xenstore event channel: %d\n",
+                               err);
+       } else
+               xen_store_evtchn = xen_start_info->store_evtchn;
+
+       return err;
+}
+
+static struct notifier_block xenbus_resume_nb = {
+       .notifier_call = xenbus_resume_cb,
+};
+
 static int __init xenbus_init(void)
 {
        int err = 0;
@@ -793,6 +818,10 @@ static int __init xenbus_init(void)
                goto out_error;
        }
 
+       if ((xen_store_domain_type != XS_LOCAL) &&
+           (xen_store_domain_type != XS_UNKNOWN))
+               xen_resume_notifier_register(&xenbus_resume_nb);
+
 #ifdef CONFIG_XEN_COMPAT_XENFS
        /*
         * Create xenfs mountpoint in /proc for compatibility with
index fb9ffcb..0923f2c 100644 (file)
@@ -149,8 +149,6 @@ extern int v9fs_vfs_unlink(struct inode *i, struct dentry *d);
 extern int v9fs_vfs_rmdir(struct inode *i, struct dentry *d);
 extern int v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                        struct inode *new_dir, struct dentry *new_dentry);
-extern void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd,
-                       void *p);
 extern struct inode *v9fs_inode_from_fid(struct v9fs_session_info *v9ses,
                                         struct p9_fid *fid,
                                         struct super_block *sb, int new);
index 703342e..510040b 100644 (file)
@@ -1224,100 +1224,43 @@ ino_t v9fs_qid2ino(struct p9_qid *qid)
 }
 
 /**
- * v9fs_readlink - read a symlink's location (internal version)
+ * v9fs_vfs_follow_link - follow a symlink path
  * @dentry: dentry for symlink
- * @buffer: buffer to load symlink location into
- * @buflen: length of buffer
- *
+ * @cookie: place to pass the data to put_link()
  */
 
-static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
+static const char *v9fs_vfs_follow_link(struct dentry *dentry, void **cookie)
 {
-       int retval;
-
-       struct v9fs_session_info *v9ses;
-       struct p9_fid *fid;
+       struct v9fs_session_info *v9ses = v9fs_dentry2v9ses(dentry);
+       struct p9_fid *fid = v9fs_fid_lookup(dentry);
        struct p9_wstat *st;
+       char *res;
+
+       p9_debug(P9_DEBUG_VFS, "%pd\n", dentry);
 
-       p9_debug(P9_DEBUG_VFS, " %pd\n", dentry);
-       retval = -EPERM;
-       v9ses = v9fs_dentry2v9ses(dentry);
-       fid = v9fs_fid_lookup(dentry);
        if (IS_ERR(fid))
-               return PTR_ERR(fid);
+               return ERR_CAST(fid);
 
        if (!v9fs_proto_dotu(v9ses))
-               return -EBADF;
+               return ERR_PTR(-EBADF);
 
        st = p9_client_stat(fid);
        if (IS_ERR(st))
-               return PTR_ERR(st);
+               return ERR_CAST(st);
 
        if (!(st->mode & P9_DMSYMLINK)) {
-               retval = -EINVAL;
-               goto done;
+               p9stat_free(st);
+               kfree(st);
+               return ERR_PTR(-EINVAL);
        }
+       res = st->extension;
+       st->extension = NULL;
+       if (strlen(res) >= PATH_MAX)
+               res[PATH_MAX - 1] = '\0';
 
-       /* copy extension buffer into buffer */
-       retval = min(strlen(st->extension)+1, (size_t)buflen);
-       memcpy(buffer, st->extension, retval);
-
-       p9_debug(P9_DEBUG_VFS, "%pd -> %s (%.*s)\n",
-                dentry, st->extension, buflen, buffer);
-
-done:
        p9stat_free(st);
        kfree(st);
-       return retval;
-}
-
-/**
- * v9fs_vfs_follow_link - follow a symlink path
- * @dentry: dentry for symlink
- * @nd: nameidata
- *
- */
-
-static void *v9fs_vfs_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
-       int len = 0;
-       char *link = __getname();
-
-       p9_debug(P9_DEBUG_VFS, "%pd\n", dentry);
-
-       if (!link)
-               link = ERR_PTR(-ENOMEM);
-       else {
-               len = v9fs_readlink(dentry, link, PATH_MAX);
-
-               if (len < 0) {
-                       __putname(link);
-                       link = ERR_PTR(len);
-               } else
-                       link[min(len, PATH_MAX-1)] = 0;
-       }
-       nd_set_link(nd, link);
-
-       return NULL;
-}
-
-/**
- * v9fs_vfs_put_link - release a symlink path
- * @dentry: dentry for symlink
- * @nd: nameidata
- * @p: unused
- *
- */
-
-void
-v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
-{
-       char *s = nd_get_link(nd);
-
-       p9_debug(P9_DEBUG_VFS, " %pd %s\n",
-                dentry, IS_ERR(s) ? "<error>" : s);
-       if (!IS_ERR(s))
-               __putname(s);
+       return *cookie = res;
 }
 
 /**
@@ -1370,6 +1313,8 @@ v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
        return v9fs_vfs_mkspecial(dir, dentry, P9_DMSYMLINK, symname);
 }
 
+#define U32_MAX_DIGITS 10
+
 /**
  * v9fs_vfs_link - create a hardlink
  * @old_dentry: dentry for file to link to
@@ -1383,7 +1328,7 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
              struct dentry *dentry)
 {
        int retval;
-       char *name;
+       char name[1 + U32_MAX_DIGITS + 2]; /* sign + number + \n + \0 */
        struct p9_fid *oldfid;
 
        p9_debug(P9_DEBUG_VFS, " %lu,%pd,%pd\n",
@@ -1393,20 +1338,12 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
        if (IS_ERR(oldfid))
                return PTR_ERR(oldfid);
 
-       name = __getname();
-       if (unlikely(!name)) {
-               retval = -ENOMEM;
-               goto clunk_fid;
-       }
-
        sprintf(name, "%d\n", oldfid->fid);
        retval = v9fs_vfs_mkspecial(dir, dentry, P9_DMLINK, name);
-       __putname(name);
        if (!retval) {
                v9fs_refresh_inode(oldfid, d_inode(old_dentry));
                v9fs_invalidate_inode_attr(dir);
        }
-clunk_fid:
        p9_client_clunk(oldfid);
        return retval;
 }
@@ -1425,7 +1362,7 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rde
 {
        struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
        int retval;
-       char *name;
+       char name[2 + U32_MAX_DIGITS + 1 + U32_MAX_DIGITS + 1];
        u32 perm;
 
        p9_debug(P9_DEBUG_VFS, " %lu,%pd mode: %hx MAJOR: %u MINOR: %u\n",
@@ -1435,26 +1372,16 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rde
        if (!new_valid_dev(rdev))
                return -EINVAL;
 
-       name = __getname();
-       if (!name)
-               return -ENOMEM;
        /* build extension */
        if (S_ISBLK(mode))
                sprintf(name, "b %u %u", MAJOR(rdev), MINOR(rdev));
        else if (S_ISCHR(mode))
                sprintf(name, "c %u %u", MAJOR(rdev), MINOR(rdev));
-       else if (S_ISFIFO(mode))
-               *name = 0;
-       else if (S_ISSOCK(mode))
+       else
                *name = 0;
-       else {
-               __putname(name);
-               return -EINVAL;
-       }
 
        perm = unixmode2p9mode(v9ses, mode);
        retval = v9fs_vfs_mkspecial(dir, dentry, perm, name);
-       __putname(name);
 
        return retval;
 }
@@ -1530,7 +1457,7 @@ static const struct inode_operations v9fs_file_inode_operations = {
 static const struct inode_operations v9fs_symlink_inode_operations = {
        .readlink = generic_readlink,
        .follow_link = v9fs_vfs_follow_link,
-       .put_link = v9fs_vfs_put_link,
+       .put_link = kfree_put_link,
        .getattr = v9fs_vfs_getattr,
        .setattr = v9fs_vfs_setattr,
 };
index 9861c7c..09e4433 100644 (file)
@@ -905,41 +905,24 @@ error:
 /**
  * v9fs_vfs_follow_link_dotl - follow a symlink path
  * @dentry: dentry for symlink
- * @nd: nameidata
- *
+ * @cookie: place to pass the data to put_link()
  */
 
-static void *
-v9fs_vfs_follow_link_dotl(struct dentry *dentry, struct nameidata *nd)
+static const char *
+v9fs_vfs_follow_link_dotl(struct dentry *dentry, void **cookie)
 {
-       int retval;
-       struct p9_fid *fid;
-       char *link = __getname();
+       struct p9_fid *fid = v9fs_fid_lookup(dentry);
        char *target;
+       int retval;
 
        p9_debug(P9_DEBUG_VFS, "%pd\n", dentry);
 
-       if (!link) {
-               link = ERR_PTR(-ENOMEM);
-               goto ndset;
-       }
-       fid = v9fs_fid_lookup(dentry);
-       if (IS_ERR(fid)) {
-               __putname(link);
-               link = ERR_CAST(fid);
-               goto ndset;
-       }
+       if (IS_ERR(fid))
+               return ERR_CAST(fid);
        retval = p9_client_readlink(fid, &target);
-       if (!retval) {
-               strcpy(link, target);
-               kfree(target);
-               goto ndset;
-       }
-       __putname(link);
-       link = ERR_PTR(retval);
-ndset:
-       nd_set_link(nd, link);
-       return NULL;
+       if (retval)
+               return ERR_PTR(retval);
+       return *cookie = target;
 }
 
 int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode)
@@ -1006,7 +989,7 @@ const struct inode_operations v9fs_file_inode_operations_dotl = {
 const struct inode_operations v9fs_symlink_inode_operations_dotl = {
        .readlink = generic_readlink,
        .follow_link = v9fs_vfs_follow_link_dotl,
-       .put_link = v9fs_vfs_put_link,
+       .put_link = kfree_put_link,
        .getattr = v9fs_vfs_getattr_dotl,
        .setattr = v9fs_vfs_setattr_dotl,
        .setxattr = generic_setxattr,
index de58cc7..da0c334 100644 (file)
 
 #include "autofs_i.h"
 
-static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
+static const char *autofs4_follow_link(struct dentry *dentry, void **cookie)
 {
        struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
        struct autofs_info *ino = autofs4_dentry_ino(dentry);
        if (ino && !autofs4_oz_mode(sbi))
                ino->last_used = jiffies;
-       nd_set_link(nd, d_inode(dentry)->i_private);
-       return NULL;
+       return d_inode(dentry)->i_private;
 }
 
 const struct inode_operations autofs4_symlink_inode_operations = {
index 7943533..46aedac 100644 (file)
@@ -42,8 +42,7 @@ static struct inode *befs_iget(struct super_block *, unsigned long);
 static struct inode *befs_alloc_inode(struct super_block *sb);
 static void befs_destroy_inode(struct inode *inode);
 static void befs_destroy_inodecache(void);
-static void *befs_follow_link(struct dentry *, struct nameidata *);
-static void *befs_fast_follow_link(struct dentry *, struct nameidata *);
+static const char *befs_follow_link(struct dentry *, void **);
 static int befs_utf2nls(struct super_block *sb, const char *in, int in_len,
                        char **out, int *out_len);
 static int befs_nls2utf(struct super_block *sb, const char *in, int in_len,
@@ -80,11 +79,6 @@ static const struct address_space_operations befs_aops = {
        .bmap           = befs_bmap,
 };
 
-static const struct inode_operations befs_fast_symlink_inode_operations = {
-       .readlink       = generic_readlink,
-       .follow_link    = befs_fast_follow_link,
-};
-
 static const struct inode_operations befs_symlink_inode_operations = {
        .readlink       = generic_readlink,
        .follow_link    = befs_follow_link,
@@ -403,10 +397,12 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino)
                inode->i_op = &befs_dir_inode_operations;
                inode->i_fop = &befs_dir_operations;
        } else if (S_ISLNK(inode->i_mode)) {
-               if (befs_ino->i_flags & BEFS_LONG_SYMLINK)
+               if (befs_ino->i_flags & BEFS_LONG_SYMLINK) {
                        inode->i_op = &befs_symlink_inode_operations;
-               else
-                       inode->i_op = &befs_fast_symlink_inode_operations;
+               } else {
+                       inode->i_link = befs_ino->i_data.symlink;
+                       inode->i_op = &simple_symlink_inode_operations;
+               }
        } else {
                befs_error(sb, "Inode %lu is not a regular file, "
                           "directory or symlink. THAT IS WRONG! BeFS has no "
@@ -467,8 +463,8 @@ befs_destroy_inodecache(void)
  * The data stream become link name. Unless the LONG_SYMLINK
  * flag is set.
  */
-static void *
-befs_follow_link(struct dentry *dentry, struct nameidata *nd)
+static const char *
+befs_follow_link(struct dentry *dentry, void **cookie)
 {
        struct super_block *sb = dentry->d_sb;
        struct befs_inode_info *befs_ino = BEFS_I(d_inode(dentry));
@@ -478,33 +474,20 @@ befs_follow_link(struct dentry *dentry, struct nameidata *nd)
 
        if (len == 0) {
                befs_error(sb, "Long symlink with illegal length");
-               link = ERR_PTR(-EIO);
-       } else {
-               befs_debug(sb, "Follow long symlink");
-
-               link = kmalloc(len, GFP_NOFS);
-               if (!link) {
-                       link = ERR_PTR(-ENOMEM);
-               } else if (befs_read_lsymlink(sb, data, link, len) != len) {
-                       kfree(link);
-                       befs_error(sb, "Failed to read entire long symlink");
-                       link = ERR_PTR(-EIO);
-               } else {
-                       link[len - 1] = '\0';
-               }
+               return ERR_PTR(-EIO);
        }
-       nd_set_link(nd, link);
-       return NULL;
-}
-
-
-static void *
-befs_fast_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
-       struct befs_inode_info *befs_ino = BEFS_I(d_inode(dentry));
+       befs_debug(sb, "Follow long symlink");
 
-       nd_set_link(nd, befs_ino->i_data.symlink);
-       return NULL;
+       link = kmalloc(len, GFP_NOFS);
+       if (!link)
+               return ERR_PTR(-ENOMEM);
+       if (befs_read_lsymlink(sb, data, link, len) != len) {
+               kfree(link);
+               befs_error(sb, "Failed to read entire long symlink");
+               return ERR_PTR(-EIO);
+       }
+       link[len - 1] = '\0';
+       return *cookie = link;
 }
 
 /*
index 241ef68..cd46e41 100644 (file)
@@ -918,7 +918,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
                        total_size = total_mapping_size(elf_phdata,
                                                        loc->elf_ex.e_phnum);
                        if (!total_size) {
-                               error = -EINVAL;
+                               retval = -EINVAL;
                                goto out_free_dentry;
                        }
                }
index 9de772e..614aaa1 100644 (file)
@@ -880,6 +880,8 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
  * indirect refs to their parent bytenr.
  * When roots are found, they're added to the roots list
  *
+ * NOTE: This can return values > 0
+ *
  * FIXME some caching might speed things up
  */
 static int find_parent_nodes(struct btrfs_trans_handle *trans,
@@ -1198,6 +1200,19 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
        return ret;
 }
 
+/**
+ * btrfs_check_shared - tell us whether an extent is shared
+ *
+ * @trans: optional trans handle
+ *
+ * btrfs_check_shared uses the backref walking code but will short
+ * circuit as soon as it finds a root or inode that doesn't match the
+ * one passed in. This provides a significant performance benefit for
+ * callers (such as fiemap) which want to know whether the extent is
+ * shared but do not need a ref count.
+ *
+ * Return: 0 if extent is not shared, 1 if it is shared, < 0 on error.
+ */
 int btrfs_check_shared(struct btrfs_trans_handle *trans,
                       struct btrfs_fs_info *fs_info, u64 root_objectid,
                       u64 inum, u64 bytenr)
@@ -1226,11 +1241,13 @@ int btrfs_check_shared(struct btrfs_trans_handle *trans,
                ret = find_parent_nodes(trans, fs_info, bytenr, elem.seq, tmp,
                                        roots, NULL, root_objectid, inum);
                if (ret == BACKREF_FOUND_SHARED) {
+                       /* this is the only condition under which we return 1 */
                        ret = 1;
                        break;
                }
                if (ret < 0 && ret != -ENOENT)
                        break;
+               ret = 0;
                node = ulist_next(tmp, &uiter);
                if (!node)
                        break;
index 0ec8e22..0ec3acd 100644 (file)
@@ -3180,8 +3180,6 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
        btrfs_mark_buffer_dirty(leaf);
 fail:
        btrfs_release_path(path);
-       if (ret)
-               btrfs_abort_transaction(trans, root, ret);
        return ret;
 
 }
@@ -3487,8 +3485,30 @@ again:
                                ret = 0;
                        }
                }
-               if (!ret)
+               if (!ret) {
                        ret = write_one_cache_group(trans, root, path, cache);
+                       /*
+                        * Our block group might still be attached to the list
+                        * of new block groups in the transaction handle of some
+                        * other task (struct btrfs_trans_handle->new_bgs). This
+                        * means its block group item isn't yet in the extent
+                        * tree. If this happens ignore the error, as we will
+                        * try again later in the critical section of the
+                        * transaction commit.
+                        */
+                       if (ret == -ENOENT) {
+                               ret = 0;
+                               spin_lock(&cur_trans->dirty_bgs_lock);
+                               if (list_empty(&cache->dirty_list)) {
+                                       list_add_tail(&cache->dirty_list,
+                                                     &cur_trans->dirty_bgs);
+                                       btrfs_get_block_group(cache);
+                               }
+                               spin_unlock(&cur_trans->dirty_bgs_lock);
+                       } else if (ret) {
+                               btrfs_abort_transaction(trans, root, ret);
+                       }
+               }
 
                /* if its not on the io list, we need to put the block group */
                if (should_put)
@@ -3597,8 +3617,11 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
                                ret = 0;
                        }
                }
-               if (!ret)
+               if (!ret) {
                        ret = write_one_cache_group(trans, root, path, cache);
+                       if (ret)
+                               btrfs_abort_transaction(trans, root, ret);
+               }
 
                /* if its not on the io list, we need to put the block group */
                if (should_put)
@@ -8806,6 +8829,24 @@ again:
                goto again;
        }
 
+       /*
+        * if we are changing raid levels, try to allocate a corresponding
+        * block group with the new raid level.
+        */
+       alloc_flags = update_block_group_flags(root, cache->flags);
+       if (alloc_flags != cache->flags) {
+               ret = do_chunk_alloc(trans, root, alloc_flags,
+                                    CHUNK_ALLOC_FORCE);
+               /*
+                * ENOSPC is allowed here, we may have enough space
+                * already allocated at the new raid level to
+                * carry on
+                */
+               if (ret == -ENOSPC)
+                       ret = 0;
+               if (ret < 0)
+                       goto out;
+       }
 
        ret = set_block_group_ro(cache, 0);
        if (!ret)
@@ -8819,7 +8860,9 @@ again:
 out:
        if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
                alloc_flags = update_block_group_flags(root, cache->flags);
+               lock_chunks(root->fs_info->chunk_root);
                check_system_chunk(trans, root, alloc_flags);
+               unlock_chunks(root->fs_info->chunk_root);
        }
        mutex_unlock(&root->fs_info->ro_block_group_mutex);
 
index 43af5a6..c32d226 100644 (file)
@@ -4772,6 +4772,25 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
                               start >> PAGE_CACHE_SHIFT);
        if (eb && atomic_inc_not_zero(&eb->refs)) {
                rcu_read_unlock();
+               /*
+                * Lock our eb's refs_lock to avoid races with
+                * free_extent_buffer. When we get our eb it might be flagged
+                * with EXTENT_BUFFER_STALE and another task running
+                * free_extent_buffer might have seen that flag set,
+                * eb->refs == 2, that the buffer isn't under IO (dirty and
+                * writeback flags not set) and it's still in the tree (flag
+                * EXTENT_BUFFER_TREE_REF set), therefore being in the process
+                * of decrementing the extent buffer's reference count twice.
+                * So here we could race and increment the eb's reference count,
+                * clear its stale flag, mark it as dirty and drop our reference
+                * before the other task finishes executing free_extent_buffer,
+                * which would later result in an attempt to free an extent
+                * buffer that is dirty.
+                */
+               if (test_bit(EXTENT_BUFFER_STALE, &eb->bflags)) {
+                       spin_lock(&eb->refs_lock);
+                       spin_unlock(&eb->refs_lock);
+               }
                mark_extent_buffer_accessed(eb, NULL);
                return eb;
        }
index 41c510b..9dbe5b5 100644 (file)
@@ -86,7 +86,7 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
 
        mapping_set_gfp_mask(inode->i_mapping,
                        mapping_gfp_mask(inode->i_mapping) &
-                       ~(GFP_NOFS & ~__GFP_HIGHMEM));
+                       ~(__GFP_FS | __GFP_HIGHMEM));
 
        return inode;
 }
@@ -3466,6 +3466,7 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
        struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
        int ret;
        struct btrfs_io_ctl io_ctl;
+       bool release_metadata = true;
 
        if (!btrfs_test_opt(root, INODE_MAP_CACHE))
                return 0;
@@ -3473,11 +3474,20 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
        memset(&io_ctl, 0, sizeof(io_ctl));
        ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl,
                                      trans, path, 0);
-       if (!ret)
+       if (!ret) {
+               /*
+                * At this point writepages() didn't error out, so our metadata
+                * reservation is released when the writeback finishes, at
+                * inode.c:btrfs_finish_ordered_io(), regardless of it finishing
+                * with or without an error.
+                */
+               release_metadata = false;
                ret = btrfs_wait_cache_io(root, trans, NULL, &io_ctl, path, 0);
+       }
 
        if (ret) {
-               btrfs_delalloc_release_metadata(inode, inode->i_size);
+               if (release_metadata)
+                       btrfs_delalloc_release_metadata(inode, inode->i_size);
 #ifdef DEBUG
                btrfs_err(root->fs_info,
                        "failed to write free ino cache for root %llu",
index 157cc54..760c4a5 100644 (file)
@@ -722,6 +722,7 @@ void btrfs_start_ordered_extent(struct inode *inode,
 int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
 {
        int ret = 0;
+       int ret_wb = 0;
        u64 end;
        u64 orig_end;
        struct btrfs_ordered_extent *ordered;
@@ -741,9 +742,14 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
        if (ret)
                return ret;
 
-       ret = filemap_fdatawait_range(inode->i_mapping, start, orig_end);
-       if (ret)
-               return ret;
+       /*
+        * If we have a writeback error don't return immediately. Wait first
+        * for any ordered extents that haven't completed yet. This is to make
+        * sure no one can dirty the same page ranges and call writepages()
+        * before the ordered extents complete - to avoid failures (-EEXIST)
+        * when adding the new ordered extents to the ordered tree.
+        */
+       ret_wb = filemap_fdatawait_range(inode->i_mapping, start, orig_end);
 
        end = orig_end;
        while (1) {
@@ -767,7 +773,7 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
                        break;
                end--;
        }
-       return ret;
+       return ret_wb ? ret_wb : ret;
 }
 
 /*
index 96aebf3..174f5e1 100644 (file)
@@ -4625,6 +4625,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 {
        u64 chunk_offset;
 
+       ASSERT(mutex_is_locked(&extent_root->fs_info->chunk_mutex));
        chunk_offset = find_next_chunk(extent_root->fs_info);
        return __btrfs_alloc_chunk(trans, extent_root, chunk_offset, type);
 }
index e876e19..571acd8 100644 (file)
@@ -6,7 +6,6 @@
 #include <linux/string.h>
 #include <linux/uaccess.h>
 #include <linux/kernel.h>
-#include <linux/namei.h>
 #include <linux/writeback.h>
 #include <linux/vmalloc.h>
 #include <linux/posix_acl.h>
@@ -819,6 +818,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
                        else
                                kfree(sym); /* lost a race */
                }
+               inode->i_link = ci->i_symlink;
                break;
        case S_IFDIR:
                inode->i_op = &ceph_dir_iops;
@@ -1691,16 +1691,9 @@ retry:
 /*
  * symlinks
  */
-static void *ceph_sym_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
-       struct ceph_inode_info *ci = ceph_inode(d_inode(dentry));
-       nd_set_link(nd, ci->i_symlink);
-       return NULL;
-}
-
 static const struct inode_operations ceph_symlink_iops = {
        .readlink = generic_readlink,
-       .follow_link = ceph_sym_follow_link,
+       .follow_link = simple_follow_link,
        .setattr = ceph_setattr,
        .getattr = ceph_getattr,
        .setxattr = ceph_setxattr,
index 430e034..7dc886c 100644 (file)
@@ -24,6 +24,7 @@
 #include "cifsfs.h"
 #include "dns_resolve.h"
 #include "cifs_debug.h"
+#include "cifs_unicode.h"
 
 static LIST_HEAD(cifs_dfs_automount_list);
 
@@ -312,7 +313,7 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt)
        xid = get_xid();
        rc = get_dfs_path(xid, ses, full_path + 1, cifs_sb->local_nls,
                &num_referrals, &referrals,
-               cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+               cifs_remap(cifs_sb));
        free_xid(xid);
 
        cifs_put_tlink(tlink);
index 0303c67..5a53ac6 100644 (file)
 #include "cifsglob.h"
 #include "cifs_debug.h"
 
-/*
- * cifs_utf16_bytes - how long will a string be after conversion?
- * @utf16 - pointer to input string
- * @maxbytes - don't go past this many bytes of input string
- * @codepage - destination codepage
- *
- * Walk a utf16le string and return the number of bytes that the string will
- * be after being converted to the given charset, not including any null
- * termination required. Don't walk past maxbytes in the source buffer.
- */
-int
-cifs_utf16_bytes(const __le16 *from, int maxbytes,
-               const struct nls_table *codepage)
-{
-       int i;
-       int charlen, outlen = 0;
-       int maxwords = maxbytes / 2;
-       char tmp[NLS_MAX_CHARSET_SIZE];
-       __u16 ftmp;
-
-       for (i = 0; i < maxwords; i++) {
-               ftmp = get_unaligned_le16(&from[i]);
-               if (ftmp == 0)
-                       break;
-
-               charlen = codepage->uni2char(ftmp, tmp, NLS_MAX_CHARSET_SIZE);
-               if (charlen > 0)
-                       outlen += charlen;
-               else
-                       outlen++;
-       }
-
-       return outlen;
-}
-
 int cifs_remap(struct cifs_sb_info *cifs_sb)
 {
        int map_type;
@@ -155,10 +120,13 @@ convert_sfm_char(const __u16 src_char, char *target)
  * enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE).
  */
 static int
-cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp,
+cifs_mapchar(char *target, const __u16 *from, const struct nls_table *cp,
             int maptype)
 {
        int len = 1;
+       __u16 src_char;
+
+       src_char = *from;
 
        if ((maptype == SFM_MAP_UNI_RSVD) && convert_sfm_char(src_char, target))
                return len;
@@ -168,10 +136,23 @@ cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp,
 
        /* if character not one of seven in special remap set */
        len = cp->uni2char(src_char, target, NLS_MAX_CHARSET_SIZE);
-       if (len <= 0) {
-               *target = '?';
-               len = 1;
-       }
+       if (len <= 0)
+               goto surrogate_pair;
+
+       return len;
+
+surrogate_pair:
+       /* convert SURROGATE_PAIR and IVS */
+       if (strcmp(cp->charset, "utf8"))
+               goto unknown;
+       len = utf16s_to_utf8s(from, 3, UTF16_LITTLE_ENDIAN, target, 6);
+       if (len <= 0)
+               goto unknown;
+       return len;
+
+unknown:
+       *target = '?';
+       len = 1;
        return len;
 }
 
@@ -206,7 +187,7 @@ cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen,
        int nullsize = nls_nullsize(codepage);
        int fromwords = fromlen / 2;
        char tmp[NLS_MAX_CHARSET_SIZE];
-       __u16 ftmp;
+       __u16 ftmp[3];          /* ftmp[3] = 3array x 2bytes = 6bytes UTF-16 */
 
        /*
         * because the chars can be of varying widths, we need to take care
@@ -217,9 +198,17 @@ cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen,
        safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize);
 
        for (i = 0; i < fromwords; i++) {
-               ftmp = get_unaligned_le16(&from[i]);
-               if (ftmp == 0)
+               ftmp[0] = get_unaligned_le16(&from[i]);
+               if (ftmp[0] == 0)
                        break;
+               if (i + 1 < fromwords)
+                       ftmp[1] = get_unaligned_le16(&from[i + 1]);
+               else
+                       ftmp[1] = 0;
+               if (i + 2 < fromwords)
+                       ftmp[2] = get_unaligned_le16(&from[i + 2]);
+               else
+                       ftmp[2] = 0;
 
                /*
                 * check to see if converting this character might make the
@@ -234,6 +223,17 @@ cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen,
                /* put converted char into 'to' buffer */
                charlen = cifs_mapchar(&to[outlen], ftmp, codepage, map_type);
                outlen += charlen;
+
+               /* charlen (=bytes of UTF-8 for 1 character)
+                * 4bytes UTF-8(surrogate pair) is charlen=4
+                *   (4bytes UTF-16 code)
+                * 7-8bytes UTF-8(IVS) is charlen=3+4 or 4+4
+                *   (2 UTF-8 pairs divided to 2 UTF-16 pairs) */
+               if (charlen == 4)
+                       i++;
+               else if (charlen >= 5)
+                       /* 5-6bytes UTF-8 */
+                       i += 2;
        }
 
        /* properly null-terminate string */
@@ -296,6 +296,46 @@ success:
 }
 
 /*
+ * cifs_utf16_bytes - how long will a string be after conversion?
+ * @utf16 - pointer to input string
+ * @maxbytes - don't go past this many bytes of input string
+ * @codepage - destination codepage
+ *
+ * Walk a utf16le string and return the number of bytes that the string will
+ * be after being converted to the given charset, not including any null
+ * termination required. Don't walk past maxbytes in the source buffer.
+ */
+int
+cifs_utf16_bytes(const __le16 *from, int maxbytes,
+               const struct nls_table *codepage)
+{
+       int i;
+       int charlen, outlen = 0;
+       int maxwords = maxbytes / 2;
+       char tmp[NLS_MAX_CHARSET_SIZE];
+       __u16 ftmp[3];
+
+       for (i = 0; i < maxwords; i++) {
+               ftmp[0] = get_unaligned_le16(&from[i]);
+               if (ftmp[0] == 0)
+                       break;
+               if (i + 1 < maxwords)
+                       ftmp[1] = get_unaligned_le16(&from[i + 1]);
+               else
+                       ftmp[1] = 0;
+               if (i + 2 < maxwords)
+                       ftmp[2] = get_unaligned_le16(&from[i + 2]);
+               else
+                       ftmp[2] = 0;
+
+               charlen = cifs_mapchar(tmp, ftmp, codepage, NO_MAP_UNI_RSVD);
+               outlen += charlen;
+       }
+
+       return outlen;
+}
+
+/*
  * cifs_strndup_from_utf16 - copy a string from wire format to the local
  * codepage
  * @src - source string
@@ -409,10 +449,15 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
        char src_char;
        __le16 dst_char;
        wchar_t tmp;
+       wchar_t *wchar_to;      /* UTF-16 */
+       int ret;
+       unicode_t u;
 
        if (map_chars == NO_MAP_UNI_RSVD)
                return cifs_strtoUTF16(target, source, PATH_MAX, cp);
 
+       wchar_to = kzalloc(6, GFP_KERNEL);
+
        for (i = 0; i < srclen; j++) {
                src_char = source[i];
                charlen = 1;
@@ -441,11 +486,55 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
                         * if no match, use question mark, which at least in
                         * some cases serves as wild card
                         */
-                       if (charlen < 1) {
-                               dst_char = cpu_to_le16(0x003f);
-                               charlen = 1;
+                       if (charlen > 0)
+                               goto ctoUTF16;
+
+                       /* convert SURROGATE_PAIR */
+                       if (strcmp(cp->charset, "utf8") || !wchar_to)
+                               goto unknown;
+                       if (*(source + i) & 0x80) {
+                               charlen = utf8_to_utf32(source + i, 6, &u);
+                               if (charlen < 0)
+                                       goto unknown;
+                       } else
+                               goto unknown;
+                       ret  = utf8s_to_utf16s(source + i, charlen,
+                                              UTF16_LITTLE_ENDIAN,
+                                              wchar_to, 6);
+                       if (ret < 0)
+                               goto unknown;
+
+                       i += charlen;
+                       dst_char = cpu_to_le16(*wchar_to);
+                       if (charlen <= 3)
+                               /* 1-3bytes UTF-8 to 2bytes UTF-16 */
+                               put_unaligned(dst_char, &target[j]);
+                       else if (charlen == 4) {
+                               /* 4bytes UTF-8(surrogate pair) to 4bytes UTF-16
+                                * 7-8bytes UTF-8(IVS) divided to 2 UTF-16
+                                *   (charlen=3+4 or 4+4) */
+                               put_unaligned(dst_char, &target[j]);
+                               dst_char = cpu_to_le16(*(wchar_to + 1));
+                               j++;
+                               put_unaligned(dst_char, &target[j]);
+                       } else if (charlen >= 5) {
+                               /* 5-6bytes UTF-8 to 6bytes UTF-16 */
+                               put_unaligned(dst_char, &target[j]);
+                               dst_char = cpu_to_le16(*(wchar_to + 1));
+                               j++;
+                               put_unaligned(dst_char, &target[j]);
+                               dst_char = cpu_to_le16(*(wchar_to + 2));
+                               j++;
+                               put_unaligned(dst_char, &target[j]);
                        }
+                       continue;
+
+unknown:
+                       dst_char = cpu_to_le16(0x003f);
+                       charlen = 1;
                }
+
+ctoUTF16:
                /*
                 * character may take more than one byte in the source string,
                 * but will take exactly two bytes in the target string
@@ -456,6 +545,7 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
 
 ctoUTF16_out:
        put_unaligned(0, &target[j]); /* Null terminate target unicode string */
+       kfree(wchar_to);
        return j;
 }
 
index f5089bd..0a9fb6b 100644 (file)
@@ -469,6 +469,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
                seq_puts(s, ",nouser_xattr");
        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR)
                seq_puts(s, ",mapchars");
+       if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SFM_CHR)
+               seq_puts(s, ",mapposix");
        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)
                seq_puts(s, ",sfu");
        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
index 252f5c1..a782b22 100644 (file)
@@ -120,7 +120,7 @@ extern struct vfsmount *cifs_dfs_d_automount(struct path *path);
 #endif
 
 /* Functions related to symlinks */
-extern void *cifs_follow_link(struct dentry *direntry, struct nameidata *nd);
+extern const char *cifs_follow_link(struct dentry *direntry, void **cookie);
 extern int cifs_readlink(struct dentry *direntry, char __user *buffer,
                         int buflen);
 extern int cifs_symlink(struct inode *inode, struct dentry *direntry,
index c31ce98..c63fd1d 100644 (file)
@@ -361,11 +361,11 @@ extern int CIFSUnixCreateHardLink(const unsigned int xid,
 extern int CIFSUnixCreateSymLink(const unsigned int xid,
                        struct cifs_tcon *tcon,
                        const char *fromName, const char *toName,
-                       const struct nls_table *nls_codepage);
+                       const struct nls_table *nls_codepage, int remap);
 extern int CIFSSMBUnixQuerySymLink(const unsigned int xid,
                        struct cifs_tcon *tcon,
                        const unsigned char *searchName, char **syminfo,
-                       const struct nls_table *nls_codepage);
+                       const struct nls_table *nls_codepage, int remap);
 extern int CIFSSMBQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon,
                               __u16 fid, char **symlinkinfo,
                               const struct nls_table *nls_codepage);
index 84650a5..f26ffbf 100644 (file)
@@ -2784,7 +2784,7 @@ copyRetry:
 int
 CIFSUnixCreateSymLink(const unsigned int xid, struct cifs_tcon *tcon,
                      const char *fromName, const char *toName,
-                     const struct nls_table *nls_codepage)
+                     const struct nls_table *nls_codepage, int remap)
 {
        TRANSACTION2_SPI_REQ *pSMB = NULL;
        TRANSACTION2_SPI_RSP *pSMBr = NULL;
@@ -2804,9 +2804,9 @@ createSymLinkRetry:
 
        if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) {
                name_len =
-                   cifs_strtoUTF16((__le16 *) pSMB->FileName, fromName,
-                                   /* find define for this maxpathcomponent */
-                                   PATH_MAX, nls_codepage);
+                   cifsConvertToUTF16((__le16 *) pSMB->FileName, fromName,
+                               /* find define for this maxpathcomponent */
+                                       PATH_MAX, nls_codepage, remap);
                name_len++;     /* trailing null */
                name_len *= 2;
 
@@ -2828,9 +2828,9 @@ createSymLinkRetry:
        data_offset = (char *) (&pSMB->hdr.Protocol) + offset;
        if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) {
                name_len_target =
-                   cifs_strtoUTF16((__le16 *) data_offset, toName, PATH_MAX
-                                   /* find define for this maxpathcomponent */
-                                   , nls_codepage);
+                   cifsConvertToUTF16((__le16 *) data_offset, toName,
+                               /* find define for this maxpathcomponent */
+                                       PATH_MAX, nls_codepage, remap);
                name_len_target++;      /* trailing null */
                name_len_target *= 2;
        } else {        /* BB improve the check for buffer overruns BB */
@@ -3034,7 +3034,7 @@ winCreateHardLinkRetry:
 int
 CIFSSMBUnixQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon,
                        const unsigned char *searchName, char **symlinkinfo,
-                       const struct nls_table *nls_codepage)
+                       const struct nls_table *nls_codepage, int remap)
 {
 /* SMB_QUERY_FILE_UNIX_LINK */
        TRANSACTION2_QPI_REQ *pSMB = NULL;
@@ -3055,8 +3055,9 @@ querySymLinkRetry:
 
        if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) {
                name_len =
-                       cifs_strtoUTF16((__le16 *) pSMB->FileName, searchName,
-                                       PATH_MAX, nls_codepage);
+                       cifsConvertToUTF16((__le16 *) pSMB->FileName,
+                                          searchName, PATH_MAX, nls_codepage,
+                                          remap);
                name_len++;     /* trailing null */
                name_len *= 2;
        } else {        /* BB improve the check for buffer overruns BB */
@@ -4917,7 +4918,7 @@ getDFSRetry:
                strncpy(pSMB->RequestFileName, search_name, name_len);
        }
 
-       if (ses->server && ses->server->sign)
+       if (ses->server->sign)
                pSMB->hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
 
        pSMB->hdr.Uid = ses->Suid;
index f3bfe08..8383d5e 100644 (file)
@@ -386,6 +386,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
                rc = generic_ip_connect(server);
                if (rc) {
                        cifs_dbg(FYI, "reconnect error %d\n", rc);
+                       mutex_unlock(&server->srv_mutex);
                        msleep(3000);
                } else {
                        atomic_inc(&tcpSesReconnectCount);
@@ -393,8 +394,8 @@ cifs_reconnect(struct TCP_Server_Info *server)
                        if (server->tcpStatus != CifsExiting)
                                server->tcpStatus = CifsNeedNegotiate;
                        spin_unlock(&GlobalMid_Lock);
+                       mutex_unlock(&server->srv_mutex);
                }
-               mutex_unlock(&server->srv_mutex);
        } while (server->tcpStatus == CifsNeedReconnect);
 
        return rc;
index 338d569..c3eb998 100644 (file)
@@ -620,8 +620,7 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, umode_t mode,
                }
                rc = CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args,
                                            cifs_sb->local_nls,
-                                           cifs_sb->mnt_cifs_flags &
-                                               CIFS_MOUNT_MAP_SPECIAL_CHR);
+                                           cifs_remap(cifs_sb));
                if (rc)
                        goto mknod_out;
 
index cafbf10..3f50cee 100644 (file)
@@ -140,8 +140,7 @@ int cifs_posix_open(char *full_path, struct inode **pinode,
        posix_flags = cifs_posix_convert_flags(f_flags);
        rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
                             poplock, full_path, cifs_sb->local_nls,
-                            cifs_sb->mnt_cifs_flags &
-                                       CIFS_MOUNT_MAP_SPECIAL_CHR);
+                            cifs_remap(cifs_sb));
        cifs_put_tlink(tlink);
 
        if (rc)
@@ -1553,8 +1552,8 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
                rc = server->ops->mand_unlock_range(cfile, flock, xid);
 
 out:
-       if (flock->fl_flags & FL_POSIX)
-               posix_lock_file_wait(file, flock);
+       if (flock->fl_flags & FL_POSIX && !rc)
+               rc = posix_lock_file_wait(file, flock);
        return rc;
 }
 
index 55b5811..f621b44 100644 (file)
@@ -373,8 +373,7 @@ int cifs_get_inode_info_unix(struct inode **pinode,
 
        /* could have done a find first instead but this returns more info */
        rc = CIFSSMBUnixQPathInfo(xid, tcon, full_path, &find_data,
-                                 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
-                                       CIFS_MOUNT_MAP_SPECIAL_CHR);
+                                 cifs_sb->local_nls, cifs_remap(cifs_sb));
        cifs_put_tlink(tlink);
 
        if (!rc) {
@@ -402,9 +401,25 @@ int cifs_get_inode_info_unix(struct inode **pinode,
                        rc = -ENOMEM;
        } else {
                /* we already have inode, update it */
+
+               /* if uniqueid is different, return error */
+               if (unlikely(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM &&
+                   CIFS_I(*pinode)->uniqueid != fattr.cf_uniqueid)) {
+                       rc = -ESTALE;
+                       goto cgiiu_exit;
+               }
+
+               /* if filetype is different, return error */
+               if (unlikely(((*pinode)->i_mode & S_IFMT) !=
+                   (fattr.cf_mode & S_IFMT))) {
+                       rc = -ESTALE;
+                       goto cgiiu_exit;
+               }
+
                cifs_fattr_to_inode(*pinode, &fattr);
        }
 
+cgiiu_exit:
        return rc;
 }
 
@@ -839,6 +854,15 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
                if (!*inode)
                        rc = -ENOMEM;
        } else {
+               /* we already have inode, update it */
+
+               /* if filetype is different, return error */
+               if (unlikely(((*inode)->i_mode & S_IFMT) !=
+                   (fattr.cf_mode & S_IFMT))) {
+                       rc = -ESTALE;
+                       goto cgii_exit;
+               }
+
                cifs_fattr_to_inode(*inode, &fattr);
        }
 
@@ -2215,8 +2239,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
                pTcon = tlink_tcon(tlink);
                rc = CIFSSMBUnixSetPathInfo(xid, pTcon, full_path, args,
                                    cifs_sb->local_nls,
-                                   cifs_sb->mnt_cifs_flags &
-                                       CIFS_MOUNT_MAP_SPECIAL_CHR);
+                                   cifs_remap(cifs_sb));
                cifs_put_tlink(tlink);
        }
 
index 252e672..e3548f7 100644 (file)
@@ -626,8 +626,8 @@ cifs_hl_exit:
        return rc;
 }
 
-void *
-cifs_follow_link(struct dentry *direntry, struct nameidata *nd)
+const char *
+cifs_follow_link(struct dentry *direntry, void **cookie)
 {
        struct inode *inode = d_inode(direntry);
        int rc = -ENOMEM;
@@ -643,16 +643,18 @@ cifs_follow_link(struct dentry *direntry, struct nameidata *nd)
 
        tlink = cifs_sb_tlink(cifs_sb);
        if (IS_ERR(tlink)) {
-               rc = PTR_ERR(tlink);
-               tlink = NULL;
-               goto out;
+               free_xid(xid);
+               return ERR_CAST(tlink);
        }
        tcon = tlink_tcon(tlink);
        server = tcon->ses->server;
 
        full_path = build_path_from_dentry(direntry);
-       if (!full_path)
-               goto out;
+       if (!full_path) {
+               free_xid(xid);
+               cifs_put_tlink(tlink);
+               return ERR_PTR(-ENOMEM);
+       }
 
        cifs_dbg(FYI, "Full path: %s inode = 0x%p\n", full_path, inode);
 
@@ -670,17 +672,13 @@ cifs_follow_link(struct dentry *direntry, struct nameidata *nd)
                                                &target_path, cifs_sb);
 
        kfree(full_path);
-out:
+       free_xid(xid);
+       cifs_put_tlink(tlink);
        if (rc != 0) {
                kfree(target_path);
-               target_path = ERR_PTR(rc);
+               return ERR_PTR(rc);
        }
-
-       free_xid(xid);
-       if (tlink)
-               cifs_put_tlink(tlink);
-       nd_set_link(nd, target_path);
-       return NULL;
+       return *cookie = target_path;
 }
 
 int
@@ -717,7 +715,8 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname)
                rc = create_mf_symlink(xid, pTcon, cifs_sb, full_path, symname);
        else if (pTcon->unix_ext)
                rc = CIFSUnixCreateSymLink(xid, pTcon, full_path, symname,
-                                          cifs_sb->local_nls);
+                                          cifs_sb->local_nls,
+                                          cifs_remap(cifs_sb));
        /* else
           rc = CIFSCreateReparseSymLink(xid, pTcon, fromName, toName,
                                        cifs_sb_target->local_nls); */
index b4a4723..b1eede3 100644 (file)
@@ -90,6 +90,8 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name,
        if (dentry) {
                inode = d_inode(dentry);
                if (inode) {
+                       if (d_mountpoint(dentry))
+                               goto out;
                        /*
                         * If we're generating inode numbers, then we don't
                         * want to clobber the existing one with the one that
index 7bfdd60..fc537c2 100644 (file)
@@ -960,7 +960,8 @@ cifs_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
        /* Check for unix extensions */
        if (cap_unix(tcon->ses)) {
                rc = CIFSSMBUnixQuerySymLink(xid, tcon, full_path, target_path,
-                                            cifs_sb->local_nls);
+                                            cifs_sb->local_nls,
+                                            cifs_remap(cifs_sb));
                if (rc == -EREMOTE)
                        rc = cifs_unix_dfs_readlink(xid, tcon, full_path,
                                                    target_path,
index 65cd7a8..54cbe19 100644 (file)
@@ -110,7 +110,7 @@ smb2_hdr_assemble(struct smb2_hdr *hdr, __le16 smb2_cmd /* command */ ,
 
        /* GLOBAL_CAP_LARGE_MTU will only be set if dialect > SMB2.02 */
        /* See sections 2.2.4 and 3.2.4.1.5 of MS-SMB2 */
-       if ((tcon->ses) &&
+       if ((tcon->ses) && (tcon->ses->server) &&
            (tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU))
                hdr->CreditCharge = cpu_to_le16(1);
        /* else CreditCharge MBZ */
index cc9f254..ec5c832 100644 (file)
@@ -279,36 +279,27 @@ static int configfs_getlink(struct dentry *dentry, char * path)
 
 }
 
-static void *configfs_follow_link(struct dentry *dentry, struct nameidata *nd)
+static const char *configfs_follow_link(struct dentry *dentry, void **cookie)
 {
-       int error = -ENOMEM;
        unsigned long page = get_zeroed_page(GFP_KERNEL);
+       int error;
 
-       if (page) {
-               error = configfs_getlink(dentry, (char *)page);
-               if (!error) {
-                       nd_set_link(nd, (char *)page);
-                       return (void *)page;
-               }
-       }
-
-       nd_set_link(nd, ERR_PTR(error));
-       return NULL;
-}
+       if (!page)
+               return ERR_PTR(-ENOMEM);
 
-static void configfs_put_link(struct dentry *dentry, struct nameidata *nd,
-                             void *cookie)
-{
-       if (cookie) {
-               unsigned long page = (unsigned long)cookie;
-               free_page(page);
+       error = configfs_getlink(dentry, (char *)page);
+       if (!error) {
+               return *cookie = (void *)page;
        }
+
+       free_page(page);
+       return ERR_PTR(error);
 }
 
 const struct inode_operations configfs_symlink_inode_operations = {
        .follow_link = configfs_follow_link,
        .readlink = generic_readlink,
-       .put_link = configfs_put_link,
+       .put_link = free_page_put_link,
        .setattr = configfs_setattr,
 };
 
index 656ce52..37b5afd 100644 (file)
@@ -1239,13 +1239,13 @@ ascend:
                /* might go back up the wrong parent if we have had a rename. */
                if (need_seqretry(&rename_lock, seq))
                        goto rename_retry;
-               next = child->d_child.next;
-               while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED)) {
+               /* go into the first sibling still alive */
+               do {
+                       next = child->d_child.next;
                        if (next == &this_parent->d_subdirs)
                                goto ascend;
                        child = list_entry(next, struct dentry, d_child);
-                       next = next->next;
-               }
+               } while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED));
                rcu_read_unlock();
                goto resume;
        }
index 830a7e7..284f9aa 100644 (file)
@@ -17,7 +17,6 @@
 #include <linux/fs.h>
 #include <linux/seq_file.h>
 #include <linux/pagemap.h>
-#include <linux/namei.h>
 #include <linux/debugfs.h>
 #include <linux/io.h>
 #include <linux/slab.h>
@@ -43,17 +42,6 @@ const struct file_operations debugfs_file_operations = {
        .llseek =       noop_llseek,
 };
 
-static void *debugfs_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
-       nd_set_link(nd, d_inode(dentry)->i_private);
-       return NULL;
-}
-
-const struct inode_operations debugfs_link_operations = {
-       .readlink       = generic_readlink,
-       .follow_link    = debugfs_follow_link,
-};
-
 static int debugfs_u8_set(void *data, u64 val)
 {
        *(u8 *)data = val;
index c1e7ffb..7eaec88 100644 (file)
@@ -174,7 +174,7 @@ static void debugfs_evict_inode(struct inode *inode)
        truncate_inode_pages_final(&inode->i_data);
        clear_inode(inode);
        if (S_ISLNK(inode->i_mode))
-               kfree(inode->i_private);
+               kfree(inode->i_link);
 }
 
 static const struct super_operations debugfs_super_operations = {
@@ -511,8 +511,8 @@ struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent,
                return failed_creating(dentry);
        }
        inode->i_mode = S_IFLNK | S_IRWXUGO;
-       inode->i_op = &debugfs_link_operations;
-       inode->i_private = link;
+       inode->i_op = &simple_symlink_inode_operations;
+       inode->i_link = link;
        d_instantiate(dentry, inode);
        return end_creating(dentry);
 }
index fc850b5..3c4db11 100644 (file)
@@ -170,7 +170,6 @@ out_unlock:
  * @directory_inode: inode of the new file's dentry's parent in ecryptfs
  * @ecryptfs_dentry: New file's dentry in ecryptfs
  * @mode: The mode of the new file
- * @nd: nameidata of ecryptfs' parent's dentry & vfsmount
  *
  * Creates the underlying file and the eCryptfs inode which will link to
  * it. It will also update the eCryptfs directory inode to mimic the
@@ -384,7 +383,7 @@ static int ecryptfs_lookup_interpose(struct dentry *dentry,
  * ecryptfs_lookup
  * @ecryptfs_dir_inode: The eCryptfs directory inode
  * @ecryptfs_dentry: The eCryptfs dentry that we are looking up
- * @ecryptfs_nd: nameidata; may be NULL
+ * @flags: lookup flags
  *
  * Find a file on disk. If the file does not exist, then we'll add it to the
  * dentry cache and continue on to read it from the disk.
@@ -675,18 +674,16 @@ out:
        return rc ? ERR_PTR(rc) : buf;
 }
 
-static void *ecryptfs_follow_link(struct dentry *dentry, struct nameidata *nd)
+static const char *ecryptfs_follow_link(struct dentry *dentry, void **cookie)
 {
        size_t len;
        char *buf = ecryptfs_readlink_lower(dentry, &len);
        if (IS_ERR(buf))
-               goto out;
+               return buf;
        fsstack_copy_attr_atime(d_inode(dentry),
                                d_inode(ecryptfs_dentry_to_lower(dentry)));
        buf[len] = '\0';
-out:
-       nd_set_link(nd, buf);
-       return NULL;
+       return *cookie = buf;
 }
 
 /**
index 59fedbc..86a2121 100644 (file)
@@ -121,7 +121,7 @@ static int efivarfs_callback(efi_char16_t *name16, efi_guid_t vendor,
        int len, i;
        int err = -ENOMEM;
 
-       entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+       entry = kzalloc(sizeof(*entry), GFP_KERNEL);
        if (!entry)
                return err;
 
index 49a1c61..1977c2a 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -659,6 +659,9 @@ int setup_arg_pages(struct linux_binprm *bprm,
        if (stack_base > STACK_SIZE_MAX)
                stack_base = STACK_SIZE_MAX;
 
+       /* Add space for stack randomization. */
+       stack_base += (STACK_RND_MASK << PAGE_SHIFT);
+
        /* Make sure we didn't let the argument array grow too large. */
        if (vma->vm_end - vma->vm_start > stack_base)
                return -ENOMEM;
index b47c7b8..a364fd0 100644 (file)
@@ -16,5 +16,5 @@
 libore-y := ore.o ore_raid.o
 obj-$(CONFIG_ORE) += libore.o
 
-exofs-y := inode.o file.o symlink.o namei.o dir.o super.o sys.o
+exofs-y := inode.o file.o namei.o dir.o super.o sys.o
 obj-$(CONFIG_EXOFS_FS) += exofs.o
index ad9cac6..2e86086 100644 (file)
@@ -207,10 +207,6 @@ extern const struct address_space_operations exofs_aops;
 extern const struct inode_operations exofs_dir_inode_operations;
 extern const struct inode_operations exofs_special_inode_operations;
 
-/* symlink.c         */
-extern const struct inode_operations exofs_symlink_inode_operations;
-extern const struct inode_operations exofs_fast_symlink_inode_operations;
-
 /* exofs_init_comps will initialize an ore_components device array
  * pointing to a single ore_comp struct, and a round-robin view
  * of the device table.
index 786e4cc..73c64da 100644 (file)
@@ -1222,10 +1222,11 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
                inode->i_fop = &exofs_dir_operations;
                inode->i_mapping->a_ops = &exofs_aops;
        } else if (S_ISLNK(inode->i_mode)) {
-               if (exofs_inode_is_fast_symlink(inode))
-                       inode->i_op = &exofs_fast_symlink_inode_operations;
-               else {
-                       inode->i_op = &exofs_symlink_inode_operations;
+               if (exofs_inode_is_fast_symlink(inode)) {
+                       inode->i_op = &simple_symlink_inode_operations;
+                       inode->i_link = (char *)oi->i_data;
+               } else {
+                       inode->i_op = &page_symlink_inode_operations;
                        inode->i_mapping->a_ops = &exofs_aops;
                }
        } else {
index 5ae25e4..09a6bb1 100644 (file)
@@ -113,7 +113,7 @@ static int exofs_symlink(struct inode *dir, struct dentry *dentry,
        oi = exofs_i(inode);
        if (l > sizeof(oi->i_data)) {
                /* slow symlink */
-               inode->i_op = &exofs_symlink_inode_operations;
+               inode->i_op = &page_symlink_inode_operations;
                inode->i_mapping->a_ops = &exofs_aops;
                memset(oi->i_data, 0, sizeof(oi->i_data));
 
@@ -122,7 +122,8 @@ static int exofs_symlink(struct inode *dir, struct dentry *dentry,
                        goto out_fail;
        } else {
                /* fast symlink */
-               inode->i_op = &exofs_fast_symlink_inode_operations;
+               inode->i_op = &simple_symlink_inode_operations;
+               inode->i_link = (char *)oi->i_data;
                memcpy(oi->i_data, symname, l);
                inode->i_size = l-1;
        }
diff --git a/fs/exofs/symlink.c b/fs/exofs/symlink.c
deleted file mode 100644 (file)
index 6f6f3a4..0000000
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (C) 2005, 2006
- * Avishay Traeger (avishay@gmail.com)
- * Copyright (C) 2008, 2009
- * Boaz Harrosh <ooo@electrozaur.com>
- *
- * Copyrights for code taken from ext2:
- *     Copyright (C) 1992, 1993, 1994, 1995
- *     Remy Card (card@masi.ibp.fr)
- *     Laboratoire MASI - Institut Blaise Pascal
- *     Universite Pierre et Marie Curie (Paris VI)
- *     from
- *     linux/fs/minix/inode.c
- *     Copyright (C) 1991, 1992  Linus Torvalds
- *
- * This file is part of exofs.
- *
- * exofs is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation.  Since it is based on ext2, and the only
- * valid version of GPL for the Linux kernel is version 2, the only valid
- * version of GPL for exofs is version 2.
- *
- * exofs is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with exofs; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-
-#include <linux/namei.h>
-
-#include "exofs.h"
-
-static void *exofs_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
-       struct exofs_i_info *oi = exofs_i(d_inode(dentry));
-
-       nd_set_link(nd, (char *)oi->i_data);
-       return NULL;
-}
-
-const struct inode_operations exofs_symlink_inode_operations = {
-       .readlink       = generic_readlink,
-       .follow_link    = page_follow_link_light,
-       .put_link       = page_put_link,
-};
-
-const struct inode_operations exofs_fast_symlink_inode_operations = {
-       .readlink       = generic_readlink,
-       .follow_link    = exofs_follow_link,
-};
index f460ae3..5c09776 100644 (file)
@@ -1403,6 +1403,7 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
                        inode->i_mapping->a_ops = &ext2_aops;
        } else if (S_ISLNK(inode->i_mode)) {
                if (ext2_inode_is_fast_symlink(inode)) {
+                       inode->i_link = (char *)ei->i_data;
                        inode->i_op = &ext2_fast_symlink_inode_operations;
                        nd_terminate_link(ei->i_data, inode->i_size,
                                sizeof(ei->i_data) - 1);
index 3e074a9..13ec54a 100644 (file)
@@ -189,7 +189,8 @@ static int ext2_symlink (struct inode * dir, struct dentry * dentry,
        } else {
                /* fast symlink */
                inode->i_op = &ext2_fast_symlink_inode_operations;
-               memcpy((char*)(EXT2_I(inode)->i_data),symname,l);
+               inode->i_link = (char*)EXT2_I(inode)->i_data;
+               memcpy(inode->i_link, symname, l);
                inode->i_size = l-1;
        }
        mark_inode_dirty(inode);
index 20608f1..ae17179 100644 (file)
 
 #include "ext2.h"
 #include "xattr.h"
-#include <linux/namei.h>
-
-static void *ext2_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
-       struct ext2_inode_info *ei = EXT2_I(d_inode(dentry));
-       nd_set_link(nd, (char *)ei->i_data);
-       return NULL;
-}
 
 const struct inode_operations ext2_symlink_inode_operations = {
        .readlink       = generic_readlink,
@@ -43,7 +35,7 @@ const struct inode_operations ext2_symlink_inode_operations = {
  
 const struct inode_operations ext2_fast_symlink_inode_operations = {
        .readlink       = generic_readlink,
-       .follow_link    = ext2_follow_link,
+       .follow_link    = simple_follow_link,
        .setattr        = ext2_setattr,
 #ifdef CONFIG_EXT2_FS_XATTR
        .setxattr       = generic_setxattr,
index 2ee2dc4..6c7e546 100644 (file)
@@ -2999,6 +2999,7 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino)
                        inode->i_op = &ext3_fast_symlink_inode_operations;
                        nd_terminate_link(ei->i_data, inode->i_size,
                                sizeof(ei->i_data) - 1);
+                       inode->i_link = (char *)ei->i_data;
                } else {
                        inode->i_op = &ext3_symlink_inode_operations;
                        ext3_set_aops(inode);
index 4264b9b..c9e767c 100644 (file)
@@ -2308,7 +2308,8 @@ retry:
                }
        } else {
                inode->i_op = &ext3_fast_symlink_inode_operations;
-               memcpy((char*)&EXT3_I(inode)->i_data,symname,l);
+               inode->i_link = (char*)&EXT3_I(inode)->i_data;
+               memcpy(inode->i_link, symname, l);
                inode->i_size = l-1;
        }
        EXT3_I(inode)->i_disksize = inode->i_size;
index ea96df3..c08c590 100644 (file)
  *  ext3 symlink handling code
  */
 
-#include <linux/namei.h>
 #include "ext3.h"
 #include "xattr.h"
 
-static void * ext3_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
-       struct ext3_inode_info *ei = EXT3_I(d_inode(dentry));
-       nd_set_link(nd, (char*)ei->i_data);
-       return NULL;
-}
-
 const struct inode_operations ext3_symlink_inode_operations = {
        .readlink       = generic_readlink,
        .follow_link    = page_follow_link_light,
@@ -43,7 +35,7 @@ const struct inode_operations ext3_symlink_inode_operations = {
 
 const struct inode_operations ext3_fast_symlink_inode_operations = {
        .readlink       = generic_readlink,
-       .follow_link    = ext3_follow_link,
+       .follow_link    = simple_follow_link,
        .setattr        = ext3_setattr,
 #ifdef CONFIG_EXT3_FS_XATTR
        .setxattr       = generic_setxattr,
index 009a059..0a3b72d 100644 (file)
@@ -2847,6 +2847,7 @@ extern int ext4_mpage_readpages(struct address_space *mapping,
                                unsigned nr_pages);
 
 /* symlink.c */
+extern const struct inode_operations ext4_encrypted_symlink_inode_operations;
 extern const struct inode_operations ext4_symlink_inode_operations;
 extern const struct inode_operations ext4_fast_symlink_inode_operations;
 
@@ -2889,7 +2890,6 @@ extern int ext4_map_blocks(handle_t *handle, struct inode *inode,
                           struct ext4_map_blocks *map, int flags);
 extern int ext4_ext_calc_metadata_amount(struct inode *inode,
                                         ext4_lblk_t lblocks);
-extern int ext4_extent_tree_init(handle_t *, struct inode *);
 extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
                                                   int num,
                                                   struct ext4_ext_path *path);
index 3445035..d418431 100644 (file)
@@ -87,6 +87,12 @@ int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle)
                ext4_put_nojournal(handle);
                return 0;
        }
+
+       if (!handle->h_transaction) {
+               err = jbd2_journal_stop(handle);
+               return handle->h_err ? handle->h_err : err;
+       }
+
        sb = handle->h_transaction->t_journal->j_private;
        err = handle->h_err;
        rc = jbd2_journal_stop(handle);
index d74e080..e003a1e 100644 (file)
@@ -377,7 +377,7 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
        ext4_lblk_t lblock = le32_to_cpu(ext->ee_block);
        ext4_lblk_t last = lblock + len - 1;
 
-       if (lblock > last)
+       if (len == 0 || lblock > last)
                return 0;
        return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
 }
@@ -5396,6 +5396,14 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
        loff_t new_size, ioffset;
        int ret;
 
+       /*
+        * We need to test this early because xfstests assumes that a
+        * collapse range of (0, 1) will return EOPNOTSUPP if the file
+        * system does not support collapse range.
+        */
+       if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
+               return -EOPNOTSUPP;
+
        /* Collapse range works only on fs block size aligned offsets. */
        if (offset & (EXT4_CLUSTER_SIZE(sb) - 1) ||
            len & (EXT4_CLUSTER_SIZE(sb) - 1))
index 55b187c..5168c9b 100644 (file)
@@ -4213,8 +4213,11 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                inode->i_op = &ext4_dir_inode_operations;
                inode->i_fop = &ext4_dir_operations;
        } else if (S_ISLNK(inode->i_mode)) {
-               if (ext4_inode_is_fast_symlink(inode) &&
-                   !ext4_encrypted_inode(inode)) {
+               if (ext4_encrypted_inode(inode)) {
+                       inode->i_op = &ext4_encrypted_symlink_inode_operations;
+                       ext4_set_aops(inode);
+               } else if (ext4_inode_is_fast_symlink(inode)) {
+                       inode->i_link = (char *)ei->i_data;
                        inode->i_op = &ext4_fast_symlink_inode_operations;
                        nd_terminate_link(ei->i_data, inode->i_size,
                                sizeof(ei->i_data) - 1);
@@ -4345,7 +4348,7 @@ static void ext4_update_other_inodes_time(struct super_block *sb,
        int inode_size = EXT4_INODE_SIZE(sb);
 
        oi.orig_ino = orig_ino;
-       ino = orig_ino & ~(inodes_per_block - 1);
+       ino = (orig_ino & ~(inodes_per_block - 1)) + 1;
        for (i = 0; i < inodes_per_block; i++, ino++, buf += inode_size) {
                if (ino == orig_ino)
                        continue;
index 814f3be..5fdb9f6 100644 (file)
@@ -3206,10 +3206,12 @@ static int ext4_symlink(struct inode *dir,
                        goto err_drop_inode;
                sd->len = cpu_to_le16(ostr.len);
                disk_link.name = (char *) sd;
+               inode->i_op = &ext4_encrypted_symlink_inode_operations;
        }
 
        if ((disk_link.len > EXT4_N_BLOCKS * 4)) {
-               inode->i_op = &ext4_symlink_inode_operations;
+               if (!encryption_required)
+                       inode->i_op = &ext4_symlink_inode_operations;
                ext4_set_aops(inode);
                /*
                 * We cannot call page_symlink() with transaction started
@@ -3249,9 +3251,10 @@ static int ext4_symlink(struct inode *dir,
        } else {
                /* clear the extent format for fast symlink */
                ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS);
-               inode->i_op = encryption_required ?
-                       &ext4_symlink_inode_operations :
-                       &ext4_fast_symlink_inode_operations;
+               if (!encryption_required) {
+                       inode->i_op = &ext4_fast_symlink_inode_operations;
+                       inode->i_link = (char *)&EXT4_I(inode)->i_data;
+               }
                memcpy((char *)&EXT4_I(inode)->i_data, disk_link.name,
                       disk_link.len);
                inode->i_size = disk_link.len - 1;
index f06d058..ca9d4a2 100644 (file)
@@ -294,6 +294,8 @@ static void __save_error_info(struct super_block *sb, const char *func,
        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 
        EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
+       if (bdev_read_only(sb->s_bdev))
+               return;
        es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
        es->s_last_error_time = cpu_to_le32(get_seconds());
        strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
index 187b789..ba5bd18 100644 (file)
@@ -23,7 +23,7 @@
 #include "xattr.h"
 
 #ifdef CONFIG_EXT4_FS_ENCRYPTION
-static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd)
+static const char *ext4_follow_link(struct dentry *dentry, void **cookie)
 {
        struct page *cpage = NULL;
        char *caddr, *paddr = NULL;
@@ -35,12 +35,9 @@ static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd)
        int res;
        u32 plen, max_size = inode->i_sb->s_blocksize;
 
-       if (!ext4_encrypted_inode(inode))
-               return page_follow_link_light(dentry, nd);
-
        ctx = ext4_get_fname_crypto_ctx(inode, inode->i_sb->s_blocksize);
        if (IS_ERR(ctx))
-               return ctx;
+               return ERR_CAST(ctx);
 
        if (ext4_inode_is_fast_symlink(inode)) {
                caddr = (char *) EXT4_I(inode)->i_data;
@@ -49,7 +46,7 @@ static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd)
                cpage = read_mapping_page(inode->i_mapping, 0, NULL);
                if (IS_ERR(cpage)) {
                        ext4_put_fname_crypto_ctx(&ctx);
-                       return cpage;
+                       return ERR_CAST(cpage);
                }
                caddr = kmap(cpage);
                caddr[size] = 0;
@@ -80,13 +77,12 @@ static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd)
        /* Null-terminate the name */
        if (res <= plen)
                paddr[res] = '\0';
-       nd_set_link(nd, paddr);
        ext4_put_fname_crypto_ctx(&ctx);
        if (cpage) {
                kunmap(cpage);
                page_cache_release(cpage);
        }
-       return NULL;
+       return *cookie = paddr;
 errout:
        ext4_put_fname_crypto_ctx(&ctx);
        if (cpage) {
@@ -97,36 +93,22 @@ errout:
        return ERR_PTR(res);
 }
 
-static void ext4_put_link(struct dentry *dentry, struct nameidata *nd,
-                         void *cookie)
-{
-       struct page *page = cookie;
-
-       if (!page) {
-               kfree(nd_get_link(nd));
-       } else {
-               kunmap(page);
-               page_cache_release(page);
-       }
-}
+const struct inode_operations ext4_encrypted_symlink_inode_operations = {
+       .readlink       = generic_readlink,
+       .follow_link    = ext4_follow_link,
+       .put_link       = kfree_put_link,
+       .setattr        = ext4_setattr,
+       .setxattr       = generic_setxattr,
+       .getxattr       = generic_getxattr,
+       .listxattr      = ext4_listxattr,
+       .removexattr    = generic_removexattr,
+};
 #endif
 
-static void *ext4_follow_fast_link(struct dentry *dentry, struct nameidata *nd)
-{
-       struct ext4_inode_info *ei = EXT4_I(d_inode(dentry));
-       nd_set_link(nd, (char *) ei->i_data);
-       return NULL;
-}
-
 const struct inode_operations ext4_symlink_inode_operations = {
        .readlink       = generic_readlink,
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
-       .follow_link    = ext4_follow_link,
-       .put_link       = ext4_put_link,
-#else
        .follow_link    = page_follow_link_light,
        .put_link       = page_put_link,
-#endif
        .setattr        = ext4_setattr,
        .setxattr       = generic_setxattr,
        .getxattr       = generic_getxattr,
@@ -136,7 +118,7 @@ const struct inode_operations ext4_symlink_inode_operations = {
 
 const struct inode_operations ext4_fast_symlink_inode_operations = {
        .readlink       = generic_readlink,
-       .follow_link    = ext4_follow_fast_link,
+       .follow_link    = simple_follow_link,
        .setattr        = ext4_setattr,
        .setxattr       = generic_setxattr,
        .getxattr       = generic_getxattr,
index b91b0e1..1e1aae6 100644 (file)
@@ -1513,6 +1513,7 @@ static int f2fs_write_data_pages(struct address_space *mapping,
 {
        struct inode *inode = mapping->host;
        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+       bool locked = false;
        int ret;
        long diff;
 
@@ -1533,7 +1534,13 @@ static int f2fs_write_data_pages(struct address_space *mapping,
 
        diff = nr_pages_to_write(sbi, DATA, wbc);
 
+       if (!S_ISDIR(inode->i_mode)) {
+               mutex_lock(&sbi->writepages);
+               locked = true;
+       }
        ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping);
+       if (locked)
+               mutex_unlock(&sbi->writepages);
 
        f2fs_submit_merged_bio(sbi, DATA, WRITE);
 
index d8921cf..8de34ab 100644 (file)
@@ -625,6 +625,7 @@ struct f2fs_sb_info {
        struct mutex cp_mutex;                  /* checkpoint procedure lock */
        struct rw_semaphore cp_rwsem;           /* blocking FS operations */
        struct rw_semaphore node_write;         /* locking node writes */
+       struct mutex writepages;                /* mutex for writepages() */
        wait_queue_head_t cp_wait;
 
        struct inode_management im[MAX_INO_ENTRY];      /* manage inode cache */
index 7e3794e..71765d0 100644 (file)
@@ -296,21 +296,15 @@ fail:
        return err;
 }
 
-static void *f2fs_follow_link(struct dentry *dentry, struct nameidata *nd)
+static const char *f2fs_follow_link(struct dentry *dentry, void **cookie)
 {
-       struct page *page;
-
-       page = page_follow_link_light(dentry, nd);
-       if (IS_ERR(page))
-               return page;
-
-       /* this is broken symlink case */
-       if (*nd_get_link(nd) == 0) {
-               kunmap(page);
-               page_cache_release(page);
-               return ERR_PTR(-ENOENT);
+       const char *link = page_follow_link_light(dentry, cookie);
+       if (!IS_ERR(link) && !*link) {
+               /* this is broken symlink case */
+               page_put_link(NULL, *cookie);
+               link = ERR_PTR(-ENOENT);
        }
-       return page;
+       return link;
 }
 
 static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
index 160b883..b2dd1b0 100644 (file)
@@ -1035,6 +1035,7 @@ try_onemore:
        sbi->raw_super = raw_super;
        sbi->raw_super_buf = raw_super_buf;
        mutex_init(&sbi->gc_mutex);
+       mutex_init(&sbi->writepages);
        mutex_init(&sbi->cp_mutex);
        init_rwsem(&sbi->node_write);
        clear_sbi_flag(sbi, SBI_POR_DOING);
index 999ff5c..d59712d 100644 (file)
@@ -195,8 +195,9 @@ static int handle_to_path(int mountdirfd, struct file_handle __user *ufh,
                goto out_err;
        }
        /* copy the full handle */
-       if (copy_from_user(handle, ufh,
-                          sizeof(struct file_handle) +
+       *handle = f_handle;
+       if (copy_from_user(&handle->f_handle,
+                          &ufh->f_handle,
                           f_handle.handle_bytes)) {
                retval = -EFAULT;
                goto out_handle;
index 881aa3d..e3dcb44 100644 (file)
@@ -50,9 +50,6 @@ extern daddr_t                        vxfs_bmap1(struct inode *, long);
 /* vxfs_fshead.c */
 extern int                     vxfs_read_fshead(struct super_block *);
 
-/* vxfs_immed.c */
-extern const struct inode_operations vxfs_immed_symlink_iops;
-
 /* vxfs_inode.c */
 extern const struct address_space_operations vxfs_immed_aops;
 extern struct kmem_cache       *vxfs_inode_cachep;
index 8b9229e..cb84f0f 100644 (file)
  */
 #include <linux/fs.h>
 #include <linux/pagemap.h>
-#include <linux/namei.h>
 
 #include "vxfs.h"
 #include "vxfs_extern.h"
 #include "vxfs_inode.h"
 
 
-static void *  vxfs_immed_follow_link(struct dentry *, struct nameidata *);
-
 static int     vxfs_immed_readpage(struct file *, struct page *);
 
 /*
- * Inode operations for immed symlinks.
- *
- * Unliked all other operations we do not go through the pagecache,
- * but do all work directly on the inode.
- */
-const struct inode_operations vxfs_immed_symlink_iops = {
-       .readlink =             generic_readlink,
-       .follow_link =          vxfs_immed_follow_link,
-};
-
-/*
  * Address space operations for immed files and directories.
  */
 const struct address_space_operations vxfs_immed_aops = {
@@ -62,26 +48,6 @@ const struct address_space_operations vxfs_immed_aops = {
 };
 
 /**
- * vxfs_immed_follow_link - follow immed symlink
- * @dp:                dentry for the link
- * @np:                pathname lookup data for the current path walk
- *
- * Description:
- *   vxfs_immed_follow_link restarts the pathname lookup with
- *   the data obtained from @dp.
- *
- * Returns:
- *   Zero on success, else a negative error code.
- */
-static void *
-vxfs_immed_follow_link(struct dentry *dp, struct nameidata *np)
-{
-       struct vxfs_inode_info          *vip = VXFS_INO(d_inode(dp));
-       nd_set_link(np, vip->vii_immed.vi_immed);
-       return NULL;
-}
-
-/**
  * vxfs_immed_readpage - read part of an immed inode into pagecache
  * @file:      file context (unused)
  * @page:      page frame to fill in.
index 363e3ae..ef73ed6 100644 (file)
@@ -35,6 +35,7 @@
 #include <linux/pagemap.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
+#include <linux/namei.h>
 
 #include "vxfs.h"
 #include "vxfs_inode.h"
@@ -327,8 +328,10 @@ vxfs_iget(struct super_block *sbp, ino_t ino)
                        ip->i_op = &page_symlink_inode_operations;
                        ip->i_mapping->a_ops = &vxfs_aops;
                } else {
-                       ip->i_op = &vxfs_immed_symlink_iops;
-                       vip->vii_immed.vi_immed[ip->i_size] = '\0';
+                       ip->i_op = &simple_symlink_inode_operations;
+                       ip->i_link = vip->vii_immed.vi_immed;
+                       nd_terminate_link(ip->i_link, ip->i_size,
+                                         sizeof(vip->vii_immed.vi_immed) - 1);
                }
        } else
                init_special_inode(ip, ip->i_mode, old_decode_dev(vip->vii_rdev));
index 0572bca..5e2e087 100644 (file)
@@ -1365,7 +1365,7 @@ static int fuse_readdir(struct file *file, struct dir_context *ctx)
        return err;
 }
 
-static char *read_link(struct dentry *dentry)
+static const char *fuse_follow_link(struct dentry *dentry, void **cookie)
 {
        struct inode *inode = d_inode(dentry);
        struct fuse_conn *fc = get_fuse_conn(inode);
@@ -1389,28 +1389,12 @@ static char *read_link(struct dentry *dentry)
                link = ERR_PTR(ret);
        } else {
                link[ret] = '\0';
+               *cookie = link;
        }
        fuse_invalidate_atime(inode);
        return link;
 }
 
-static void free_link(char *link)
-{
-       if (!IS_ERR(link))
-               free_page((unsigned long) link);
-}
-
-static void *fuse_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
-       nd_set_link(nd, read_link(dentry));
-       return NULL;
-}
-
-static void fuse_put_link(struct dentry *dentry, struct nameidata *nd, void *c)
-{
-       free_link(nd_get_link(nd));
-}
-
 static int fuse_dir_open(struct inode *inode, struct file *file)
 {
        return fuse_open_common(inode, file, true);
@@ -1926,7 +1910,7 @@ static const struct inode_operations fuse_common_inode_operations = {
 static const struct inode_operations fuse_symlink_inode_operations = {
        .setattr        = fuse_setattr,
        .follow_link    = fuse_follow_link,
-       .put_link       = fuse_put_link,
+       .put_link       = free_page_put_link,
        .readlink       = generic_readlink,
        .getattr        = fuse_getattr,
        .setxattr       = fuse_setxattr,
index 1b3ca7a..3a1461d 100644 (file)
@@ -1548,7 +1548,7 @@ out:
  * Returns: 0 on success or error code
  */
 
-static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
+static const char *gfs2_follow_link(struct dentry *dentry, void **cookie)
 {
        struct gfs2_inode *ip = GFS2_I(d_inode(dentry));
        struct gfs2_holder i_gh;
@@ -1561,8 +1561,7 @@ static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
        error = gfs2_glock_nq(&i_gh);
        if (error) {
                gfs2_holder_uninit(&i_gh);
-               nd_set_link(nd, ERR_PTR(error));
-               return NULL;
+               return ERR_PTR(error);
        }
 
        size = (unsigned int)i_size_read(&ip->i_inode);
@@ -1586,8 +1585,9 @@ static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
        brelse(dibh);
 out:
        gfs2_glock_dq_uninit(&i_gh);
-       nd_set_link(nd, buf);
-       return NULL;
+       if (!IS_ERR(buf))
+               *cookie = buf;
+       return buf;
 }
 
 /**
index ef26317..059597b 100644 (file)
@@ -581,7 +581,7 @@ static int hostfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
        if (name == NULL)
                goto out_put;
 
-       fd = file_create(name, mode & S_IFMT);
+       fd = file_create(name, mode & 0777);
        if (fd < 0)
                error = fd;
        else
@@ -892,7 +892,7 @@ static const struct inode_operations hostfs_dir_iops = {
        .setattr        = hostfs_setattr,
 };
 
-static void *hostfs_follow_link(struct dentry *dentry, struct nameidata *nd)
+static const char *hostfs_follow_link(struct dentry *dentry, void **cookie)
 {
        char *link = __getname();
        if (link) {
@@ -906,21 +906,18 @@ static void *hostfs_follow_link(struct dentry *dentry, struct nameidata *nd)
                }
                if (err < 0) {
                        __putname(link);
-                       link = ERR_PTR(err);
+                       return ERR_PTR(err);
                }
        } else {
-               link = ERR_PTR(-ENOMEM);
+               return ERR_PTR(-ENOMEM);
        }
 
-       nd_set_link(nd, link);
-       return NULL;
+       return *cookie = link;
 }
 
-static void hostfs_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
+static void hostfs_put_link(struct inode *unused, void *cookie)
 {
-       char *s = nd_get_link(nd);
-       if (!IS_ERR(s))
-               __putname(s);
+       __putname(cookie);
 }
 
 static const struct inode_operations hostfs_link_iops = {
index fa2bd53..2867837 100644 (file)
@@ -642,20 +642,19 @@ static int hppfs_readlink(struct dentry *dentry, char __user *buffer,
                                                    buflen);
 }
 
-static void *hppfs_follow_link(struct dentry *dentry, struct nameidata *nd)
+static const char *hppfs_follow_link(struct dentry *dentry, void **cookie)
 {
        struct dentry *proc_dentry = HPPFS_I(d_inode(dentry))->proc_dentry;
 
-       return d_inode(proc_dentry)->i_op->follow_link(proc_dentry, nd);
+       return d_inode(proc_dentry)->i_op->follow_link(proc_dentry, cookie);
 }
 
-static void hppfs_put_link(struct dentry *dentry, struct nameidata *nd,
-                          void *cookie)
+static void hppfs_put_link(struct inode *inode, void *cookie)
 {
-       struct dentry *proc_dentry = HPPFS_I(d_inode(dentry))->proc_dentry;
+       struct inode *proc_inode = d_inode(HPPFS_I(inode)->proc_dentry);
 
-       if (d_inode(proc_dentry)->i_op->put_link)
-               d_inode(proc_dentry)->i_op->put_link(proc_dentry, nd, cookie);
+       if (proc_inode->i_op->put_link)
+               proc_inode->i_op->put_link(proc_inode, cookie);
 }
 
 static const struct inode_operations hppfs_dir_iops = {
index ea37cd1..e8d6268 100644 (file)
@@ -152,6 +152,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
        inode->i_pipe = NULL;
        inode->i_bdev = NULL;
        inode->i_cdev = NULL;
+       inode->i_link = NULL;
        inode->i_rdev = 0;
        inode->dirtied_when = 0;
 
@@ -1584,36 +1585,47 @@ static int update_time(struct inode *inode, struct timespec *time, int flags)
  *     This function automatically handles read only file systems and media,
  *     as well as the "noatime" flag and inode specific "noatime" markers.
  */
-void touch_atime(const struct path *path)
+bool atime_needs_update(const struct path *path, struct inode *inode)
 {
        struct vfsmount *mnt = path->mnt;
-       struct inode *inode = d_inode(path->dentry);
        struct timespec now;
 
        if (inode->i_flags & S_NOATIME)
-               return;
+               return false;
        if (IS_NOATIME(inode))
-               return;
+               return false;
        if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
-               return;
+               return false;
 
        if (mnt->mnt_flags & MNT_NOATIME)
-               return;
+               return false;
        if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
-               return;
+               return false;
 
        now = current_fs_time(inode->i_sb);
 
        if (!relatime_need_update(mnt, inode, now))
-               return;
+               return false;
 
        if (timespec_equal(&inode->i_atime, &now))
+               return false;
+
+       return true;
+}
+
+void touch_atime(const struct path *path)
+{
+       struct vfsmount *mnt = path->mnt;
+       struct inode *inode = d_inode(path->dentry);
+       struct timespec now;
+
+       if (!atime_needs_update(path, inode))
                return;
 
        if (!sb_start_write_trylock(inode->i_sb))
                return;
 
-       if (__mnt_want_write(mnt))
+       if (__mnt_want_write(mnt) != 0)
                goto skip_update;
        /*
         * File systems can error out when updating inodes if they need to
@@ -1624,6 +1636,7 @@ void touch_atime(const struct path *path)
         * We may also fail on filesystems that have the ability to make parts
         * of the fs read only, e.g. subvolumes in Btrfs.
         */
+       now = current_fs_time(inode->i_sb);
        update_time(inode, &now, S_ATIME);
        __mnt_drop_write(mnt);
 skip_update:
index b5128c6..a9079d0 100644 (file)
@@ -842,15 +842,23 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
 {
        jbd2_journal_revoke_header_t *header;
        int offset, max;
+       int csum_size = 0;
+       __u32 rcount;
        int record_len = 4;
 
        header = (jbd2_journal_revoke_header_t *) bh->b_data;
        offset = sizeof(jbd2_journal_revoke_header_t);
-       max = be32_to_cpu(header->r_count);
+       rcount = be32_to_cpu(header->r_count);
 
        if (!jbd2_revoke_block_csum_verify(journal, header))
                return -EINVAL;
 
+       if (jbd2_journal_has_csum_v2or3(journal))
+               csum_size = sizeof(struct jbd2_journal_revoke_tail);
+       if (rcount > journal->j_blocksize - csum_size)
+               return -EINVAL;
+       max = rcount;
+
        if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
                record_len = 8;
 
index c6cbaef..14214da 100644 (file)
@@ -577,7 +577,7 @@ static void write_one_revoke_record(journal_t *journal,
 {
        int csum_size = 0;
        struct buffer_head *descriptor;
-       int offset;
+       int sz, offset;
        journal_header_t *header;
 
        /* If we are already aborting, this all becomes a noop.  We
@@ -594,9 +594,14 @@ static void write_one_revoke_record(journal_t *journal,
        if (jbd2_journal_has_csum_v2or3(journal))
                csum_size = sizeof(struct jbd2_journal_revoke_tail);
 
+       if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
+               sz = 8;
+       else
+               sz = 4;
+
        /* Make sure we have a descriptor with space left for the record */
        if (descriptor) {
-               if (offset >= journal->j_blocksize - csum_size) {
+               if (offset + sz > journal->j_blocksize - csum_size) {
                        flush_descriptor(journal, descriptor, offset, write_op);
                        descriptor = NULL;
                }
@@ -619,16 +624,13 @@ static void write_one_revoke_record(journal_t *journal,
                *descriptorp = descriptor;
        }
 
-       if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) {
+       if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
                * ((__be64 *)(&descriptor->b_data[offset])) =
                        cpu_to_be64(record->blocknr);
-               offset += 8;
-
-       } else {
+       else
                * ((__be32 *)(&descriptor->b_data[offset])) =
                        cpu_to_be32(record->blocknr);
-               offset += 4;
-       }
+       offset += sz;
 
        *offsetp = offset;
 }
index 5f09370..ff2f2e6 100644 (file)
@@ -551,7 +551,6 @@ int jbd2_journal_extend(handle_t *handle, int nblocks)
        int result;
        int wanted;
 
-       WARN_ON(!transaction);
        if (is_handle_aborted(handle))
                return -EROFS;
        journal = transaction->t_journal;
@@ -627,7 +626,6 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask)
        tid_t           tid;
        int             need_to_start, ret;
 
-       WARN_ON(!transaction);
        /* If we've had an abort of any type, don't even think about
         * actually doing the restart! */
        if (is_handle_aborted(handle))
@@ -785,7 +783,6 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
        int need_copy = 0;
        unsigned long start_lock, time_lock;
 
-       WARN_ON(!transaction);
        if (is_handle_aborted(handle))
                return -EROFS;
        journal = transaction->t_journal;
@@ -1051,7 +1048,6 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
        int err;
 
        jbd_debug(5, "journal_head %p\n", jh);
-       WARN_ON(!transaction);
        err = -EROFS;
        if (is_handle_aborted(handle))
                goto out;
@@ -1266,7 +1262,6 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
        struct journal_head *jh;
        int ret = 0;
 
-       WARN_ON(!transaction);
        if (is_handle_aborted(handle))
                return -EROFS;
        journal = transaction->t_journal;
@@ -1397,7 +1392,6 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
        int err = 0;
        int was_modified = 0;
 
-       WARN_ON(!transaction);
        if (is_handle_aborted(handle))
                return -EROFS;
        journal = transaction->t_journal;
@@ -1530,8 +1524,22 @@ int jbd2_journal_stop(handle_t *handle)
        tid_t tid;
        pid_t pid;
 
-       if (!transaction)
-               goto free_and_exit;
+       if (!transaction) {
+               /*
+                * Handle is already detached from the transaction so
+                * there is nothing to do other than decrease a refcount,
+                * or free the handle if refcount drops to zero
+                */
+               if (--handle->h_ref > 0) {
+                       jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1,
+                                                        handle->h_ref);
+                       return err;
+               } else {
+                       if (handle->h_rsv_handle)
+                               jbd2_free_handle(handle->h_rsv_handle);
+                       goto free_and_exit;
+               }
+       }
        journal = transaction->t_journal;
 
        J_ASSERT(journal_current_handle() == handle);
@@ -2373,7 +2381,6 @@ int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode)
        transaction_t *transaction = handle->h_transaction;
        journal_t *journal;
 
-       WARN_ON(!transaction);
        if (is_handle_aborted(handle))
                return -EROFS;
        journal = transaction->t_journal;
index 1ba5c97..8118002 100644 (file)
@@ -354,6 +354,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
                ret = -ENOMEM;
                goto fail;
        }
+       inode->i_link = f->target;
 
        jffs2_dbg(1, "%s(): symlink's target '%s' cached\n",
                  __func__, (char *)f->target);
index fe5ea08..60d86e8 100644 (file)
@@ -294,6 +294,7 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino)
 
        case S_IFLNK:
                inode->i_op = &jffs2_symlink_inode_operations;
+               inode->i_link = f->target;
                break;
 
        case S_IFDIR:
index 1fefa25..8ce2f24 100644 (file)
@@ -9,58 +9,15 @@
  *
  */
 
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/kernel.h>
-#include <linux/fs.h>
-#include <linux/namei.h>
 #include "nodelist.h"
 
-static void *jffs2_follow_link(struct dentry *dentry, struct nameidata *nd);
-
 const struct inode_operations jffs2_symlink_inode_operations =
 {
        .readlink =     generic_readlink,
-       .follow_link =  jffs2_follow_link,
+       .follow_link =  simple_follow_link,
        .setattr =      jffs2_setattr,
        .setxattr =     jffs2_setxattr,
        .getxattr =     jffs2_getxattr,
        .listxattr =    jffs2_listxattr,
        .removexattr =  jffs2_removexattr
 };
-
-static void *jffs2_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
-       struct jffs2_inode_info *f = JFFS2_INODE_INFO(d_inode(dentry));
-       char *p = (char *)f->target;
-
-       /*
-        * We don't acquire the f->sem mutex here since the only data we
-        * use is f->target.
-        *
-        * 1. If we are here the inode has already built and f->target has
-        * to point to the target path.
-        * 2. Nobody uses f->target (if the inode is symlink's inode). The
-        * exception is inode freeing function which frees f->target. But
-        * it can't be called while we are here and before VFS has
-        * stopped using our f->target string which we provide by means of
-        * nd_set_link() call.
-        */
-
-       if (!p) {
-               pr_err("%s(): can't find symlink target\n", __func__);
-               p = ERR_PTR(-EIO);
-       }
-       jffs2_dbg(1, "%s(): target path is '%s'\n",
-                 __func__, (char *)f->target);
-
-       nd_set_link(nd, p);
-
-       /*
-        * We will unlock the f->sem mutex but VFS will use the f->target string. This is safe
-        * since the only way that may cause f->target to be changed is iput() operation.
-        * But VFS will not use f->target after iput() has been called.
-        */
-       return NULL;
-}
-
index 070dc4b..6f1cb2b 100644 (file)
@@ -63,11 +63,12 @@ struct inode *jfs_iget(struct super_block *sb, unsigned long ino)
                        inode->i_mapping->a_ops = &jfs_aops;
                } else {
                        inode->i_op = &jfs_fast_symlink_inode_operations;
+                       inode->i_link = JFS_IP(inode)->i_inline;
                        /*
                         * The inline data should be null-terminated, but
                         * don't let on-disk corruption crash the kernel
                         */
-                       JFS_IP(inode)->i_inline[inode->i_size] = '\0';
+                       inode->i_link[inode->i_size] = '\0';
                }
        } else {
                inode->i_op = &jfs_file_inode_operations;
index 66db7bc..e33be92 100644 (file)
@@ -880,7 +880,6 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
        int ssize;              /* source pathname size */
        struct btstack btstack;
        struct inode *ip = d_inode(dentry);
-       unchar *i_fastsymlink;
        s64 xlen = 0;
        int bmask = 0, xsize;
        s64 xaddr;
@@ -946,8 +945,8 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
        if (ssize <= IDATASIZE) {
                ip->i_op = &jfs_fast_symlink_inode_operations;
 
-               i_fastsymlink = JFS_IP(ip)->i_inline;
-               memcpy(i_fastsymlink, name, ssize);
+               ip->i_link = JFS_IP(ip)->i_inline;
+               memcpy(ip->i_link, name, ssize);
                ip->i_size = ssize - 1;
 
                /*
index 80f42bc..5929e23 100644 (file)
  */
 
 #include <linux/fs.h>
-#include <linux/namei.h>
 #include "jfs_incore.h"
 #include "jfs_inode.h"
 #include "jfs_xattr.h"
 
-static void *jfs_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
-       char *s = JFS_IP(d_inode(dentry))->i_inline;
-       nd_set_link(nd, s);
-       return NULL;
-}
-
 const struct inode_operations jfs_fast_symlink_inode_operations = {
        .readlink       = generic_readlink,
-       .follow_link    = jfs_follow_link,
+       .follow_link    = simple_follow_link,
        .setattr        = jfs_setattr,
        .setxattr       = jfs_setxattr,
        .getxattr       = jfs_getxattr,
index f131fc2..fffca95 100644 (file)
@@ -518,7 +518,14 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
        if (!kn)
                goto err_out1;
 
-       ret = ida_simple_get(&root->ino_ida, 1, 0, GFP_KERNEL);
+       /*
+        * If the ino of the sysfs entry created for a kmem cache gets
+        * allocated from an ida layer, which is accounted to the memcg that
+        * owns the cache, the memcg will get pinned forever. So do not account
+        * ino ida allocations.
+        */
+       ret = ida_simple_get(&root->ino_ida, 1, 0,
+                            GFP_KERNEL | __GFP_NOACCOUNT);
        if (ret < 0)
                goto err_out2;
        kn->ino = ret;
index 8a19889..db27252 100644 (file)
@@ -112,25 +112,18 @@ static int kernfs_getlink(struct dentry *dentry, char *path)
        return error;
 }
 
-static void *kernfs_iop_follow_link(struct dentry *dentry, struct nameidata *nd)
+static const char *kernfs_iop_follow_link(struct dentry *dentry, void **cookie)
 {
        int error = -ENOMEM;
        unsigned long page = get_zeroed_page(GFP_KERNEL);
-       if (page) {
-               error = kernfs_getlink(dentry, (char *) page);
-               if (error < 0)
-                       free_page((unsigned long)page);
-       }
-       nd_set_link(nd, error ? ERR_PTR(error) : (char *)page);
-       return NULL;
-}
-
-static void kernfs_iop_put_link(struct dentry *dentry, struct nameidata *nd,
-                               void *cookie)
-{
-       char *page = nd_get_link(nd);
-       if (!IS_ERR(page))
+       if (!page)
+               return ERR_PTR(-ENOMEM);
+       error = kernfs_getlink(dentry, (char *)page);
+       if (unlikely(error < 0)) {
                free_page((unsigned long)page);
+               return ERR_PTR(error);
+       }
+       return *cookie = (char *)page;
 }
 
 const struct inode_operations kernfs_symlink_iops = {
@@ -140,7 +133,7 @@ const struct inode_operations kernfs_symlink_iops = {
        .listxattr      = kernfs_iop_listxattr,
        .readlink       = generic_readlink,
        .follow_link    = kernfs_iop_follow_link,
-       .put_link       = kernfs_iop_put_link,
+       .put_link       = free_page_put_link,
        .setattr        = kernfs_iop_setattr,
        .getattr        = kernfs_iop_getattr,
        .permission     = kernfs_iop_permission,
index cb1fb4b..65e1fec 100644 (file)
@@ -1024,15 +1024,18 @@ int noop_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 }
 EXPORT_SYMBOL(noop_fsync);
 
-void kfree_put_link(struct dentry *dentry, struct nameidata *nd,
-                               void *cookie)
+void kfree_put_link(struct inode *unused, void *cookie)
 {
-       char *s = nd_get_link(nd);
-       if (!IS_ERR(s))
-               kfree(s);
+       kfree(cookie);
 }
 EXPORT_SYMBOL(kfree_put_link);
 
+void free_page_put_link(struct inode *unused, void *cookie)
+{
+       free_page((unsigned long) cookie);
+}
+EXPORT_SYMBOL(free_page_put_link);
+
 /*
  * nop .set_page_dirty method so that people can use .page_mkwrite on
  * anon inodes.
@@ -1093,3 +1096,15 @@ simple_nosetlease(struct file *filp, long arg, struct file_lock **flp,
        return -EINVAL;
 }
 EXPORT_SYMBOL(simple_nosetlease);
+
+const char *simple_follow_link(struct dentry *dentry, void **cookie)
+{
+       return d_inode(dentry)->i_link;
+}
+EXPORT_SYMBOL(simple_follow_link);
+
+const struct inode_operations simple_symlink_inode_operations = {
+       .follow_link = simple_follow_link,
+       .readlink = generic_readlink
+};
+EXPORT_SYMBOL(simple_symlink_inode_operations);
index 4cf38f1..f9b45d4 100644 (file)
@@ -779,6 +779,7 @@ fail:
 const struct inode_operations logfs_symlink_iops = {
        .readlink       = generic_readlink,
        .follow_link    = page_follow_link_light,
+       .put_link       = page_put_link,
 };
 
 const struct inode_operations logfs_dir_iops = {
index 6a61c2b..b5b8082 100644 (file)
@@ -88,6 +88,7 @@ static inline int is_mounted(struct vfsmount *mnt)
 extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *);
 extern struct mount *__lookup_mnt_last(struct vfsmount *, struct dentry *);
 
+extern int __legitimize_mnt(struct vfsmount *, unsigned);
 extern bool legitimize_mnt(struct vfsmount *, unsigned);
 
 extern void __detach_mounts(struct dentry *dentry);
index 4a8d998..2dad0ea 100644 (file)
@@ -492,6 +492,7 @@ void path_put(const struct path *path)
 }
 EXPORT_SYMBOL(path_put);
 
+#define EMBEDDED_LEVELS 2
 struct nameidata {
        struct path     path;
        struct qstr     last;
@@ -501,10 +502,139 @@ struct nameidata {
        unsigned        seq, m_seq;
        int             last_type;
        unsigned        depth;
-       struct file     *base;
-       char *saved_names[MAX_NESTED_LINKS + 1];
+       int             total_link_count;
+       struct saved {
+               struct path link;
+               void *cookie;
+               const char *name;
+               struct inode *inode;
+               unsigned seq;
+       } *stack, internal[EMBEDDED_LEVELS];
+       struct filename *name;
+       struct nameidata *saved;
+       unsigned        root_seq;
+       int             dfd;
 };
 
+static void set_nameidata(struct nameidata *p, int dfd, struct filename *name)
+{
+       struct nameidata *old = current->nameidata;
+       p->stack = p->internal;
+       p->dfd = dfd;
+       p->name = name;
+       p->total_link_count = old ? old->total_link_count : 0;
+       p->saved = old;
+       current->nameidata = p;
+}
+
+static void restore_nameidata(void)
+{
+       struct nameidata *now = current->nameidata, *old = now->saved;
+
+       current->nameidata = old;
+       if (old)
+               old->total_link_count = now->total_link_count;
+       if (now->stack != now->internal) {
+               kfree(now->stack);
+               now->stack = now->internal;
+       }
+}
+
+static int __nd_alloc_stack(struct nameidata *nd)
+{
+       struct saved *p;
+
+       if (nd->flags & LOOKUP_RCU) {
+               p= kmalloc(MAXSYMLINKS * sizeof(struct saved),
+                                 GFP_ATOMIC);
+               if (unlikely(!p))
+                       return -ECHILD;
+       } else {
+               p= kmalloc(MAXSYMLINKS * sizeof(struct saved),
+                                 GFP_KERNEL);
+               if (unlikely(!p))
+                       return -ENOMEM;
+       }
+       memcpy(p, nd->internal, sizeof(nd->internal));
+       nd->stack = p;
+       return 0;
+}
+
+static inline int nd_alloc_stack(struct nameidata *nd)
+{
+       if (likely(nd->depth != EMBEDDED_LEVELS))
+               return 0;
+       if (likely(nd->stack != nd->internal))
+               return 0;
+       return __nd_alloc_stack(nd);
+}
+
+static void drop_links(struct nameidata *nd)
+{
+       int i = nd->depth;
+       while (i--) {
+               struct saved *last = nd->stack + i;
+               struct inode *inode = last->inode;
+               if (last->cookie && inode->i_op->put_link) {
+                       inode->i_op->put_link(inode, last->cookie);
+                       last->cookie = NULL;
+               }
+       }
+}
+
+static void terminate_walk(struct nameidata *nd)
+{
+       drop_links(nd);
+       if (!(nd->flags & LOOKUP_RCU)) {
+               int i;
+               path_put(&nd->path);
+               for (i = 0; i < nd->depth; i++)
+                       path_put(&nd->stack[i].link);
+               if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
+                       path_put(&nd->root);
+                       nd->root.mnt = NULL;
+               }
+       } else {
+               nd->flags &= ~LOOKUP_RCU;
+               if (!(nd->flags & LOOKUP_ROOT))
+                       nd->root.mnt = NULL;
+               rcu_read_unlock();
+       }
+       nd->depth = 0;
+}
+
+/* path_put is needed afterwards regardless of success or failure */
+static bool legitimize_path(struct nameidata *nd,
+                           struct path *path, unsigned seq)
+{
+       int res = __legitimize_mnt(path->mnt, nd->m_seq);
+       if (unlikely(res)) {
+               if (res > 0)
+                       path->mnt = NULL;
+               path->dentry = NULL;
+               return false;
+       }
+       if (unlikely(!lockref_get_not_dead(&path->dentry->d_lockref))) {
+               path->dentry = NULL;
+               return false;
+       }
+       return !read_seqcount_retry(&path->dentry->d_seq, seq);
+}
+
+static bool legitimize_links(struct nameidata *nd)
+{
+       int i;
+       for (i = 0; i < nd->depth; i++) {
+               struct saved *last = nd->stack + i;
+               if (unlikely(!legitimize_path(nd, &last->link, last->seq))) {
+                       drop_links(nd);
+                       nd->depth = i + 1;
+                       return false;
+               }
+       }
+       return true;
+}
+
 /*
  * Path walking has 2 modes, rcu-walk and ref-walk (see
  * Documentation/filesystems/path-lookup.txt).  In situations when we can't
@@ -520,35 +650,28 @@ struct nameidata {
  * unlazy_walk - try to switch to ref-walk mode.
  * @nd: nameidata pathwalk data
  * @dentry: child of nd->path.dentry or NULL
+ * @seq: seq number to check dentry against
  * Returns: 0 on success, -ECHILD on failure
  *
  * unlazy_walk attempts to legitimize the current nd->path, nd->root and dentry
  * for ref-walk mode.  @dentry must be a path found by a do_lookup call on
  * @nd or NULL.  Must be called from rcu-walk context.
+ * Nothing should touch nameidata between unlazy_walk() failure and
+ * terminate_walk().
  */
-static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
+static int unlazy_walk(struct nameidata *nd, struct dentry *dentry, unsigned seq)
 {
-       struct fs_struct *fs = current->fs;
        struct dentry *parent = nd->path.dentry;
 
        BUG_ON(!(nd->flags & LOOKUP_RCU));
 
-       /*
-        * After legitimizing the bastards, terminate_walk()
-        * will do the right thing for non-RCU mode, and all our
-        * subsequent exit cases should rcu_read_unlock()
-        * before returning.  Do vfsmount first; if dentry
-        * can't be legitimized, just set nd->path.dentry to NULL
-        * and rely on dput(NULL) being a no-op.
-        */
-       if (!legitimize_mnt(nd->path.mnt, nd->m_seq))
-               return -ECHILD;
        nd->flags &= ~LOOKUP_RCU;
-
-       if (!lockref_get_not_dead(&parent->d_lockref)) {
-               nd->path.dentry = NULL; 
-               goto out;
-       }
+       if (unlikely(!legitimize_links(nd)))
+               goto out2;
+       if (unlikely(!legitimize_mnt(nd->path.mnt, nd->m_seq)))
+               goto out2;
+       if (unlikely(!lockref_get_not_dead(&parent->d_lockref)))
+               goto out1;
 
        /*
         * For a negative lookup, the lookup sequence point is the parents
@@ -568,7 +691,7 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
        } else {
                if (!lockref_get_not_dead(&dentry->d_lockref))
                        goto out;
-               if (read_seqcount_retry(&dentry->d_seq, nd->seq))
+               if (read_seqcount_retry(&dentry->d_seq, seq))
                        goto drop_dentry;
        }
 
@@ -577,22 +700,24 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
         * still valid and get it if required.
         */
        if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
-               spin_lock(&fs->lock);
-               if (nd->root.mnt != fs->root.mnt || nd->root.dentry != fs->root.dentry)
-                       goto unlock_and_drop_dentry;
-               path_get(&nd->root);
-               spin_unlock(&fs->lock);
+               if (unlikely(!legitimize_path(nd, &nd->root, nd->root_seq))) {
+                       rcu_read_unlock();
+                       dput(dentry);
+                       return -ECHILD;
+               }
        }
 
        rcu_read_unlock();
        return 0;
 
-unlock_and_drop_dentry:
-       spin_unlock(&fs->lock);
 drop_dentry:
        rcu_read_unlock();
        dput(dentry);
        goto drop_root_mnt;
+out2:
+       nd->path.mnt = NULL;
+out1:
+       nd->path.dentry = NULL;
 out:
        rcu_read_unlock();
 drop_root_mnt:
@@ -601,6 +726,24 @@ drop_root_mnt:
        return -ECHILD;
 }
 
+static int unlazy_link(struct nameidata *nd, struct path *link, unsigned seq)
+{
+       if (unlikely(!legitimize_path(nd, link, seq))) {
+               drop_links(nd);
+               nd->depth = 0;
+               nd->flags &= ~LOOKUP_RCU;
+               nd->path.mnt = NULL;
+               nd->path.dentry = NULL;
+               if (!(nd->flags & LOOKUP_ROOT))
+                       nd->root.mnt = NULL;
+               rcu_read_unlock();
+       } else if (likely(unlazy_walk(nd, NULL, 0)) == 0) {
+               return 0;
+       }
+       path_put(link);
+       return -ECHILD;
+}
+
 static inline int d_revalidate(struct dentry *dentry, unsigned int flags)
 {
        return dentry->d_op->d_revalidate(dentry, flags);
@@ -622,26 +765,10 @@ static int complete_walk(struct nameidata *nd)
        int status;
 
        if (nd->flags & LOOKUP_RCU) {
-               nd->flags &= ~LOOKUP_RCU;
                if (!(nd->flags & LOOKUP_ROOT))
                        nd->root.mnt = NULL;
-
-               if (!legitimize_mnt(nd->path.mnt, nd->m_seq)) {
-                       rcu_read_unlock();
+               if (unlikely(unlazy_walk(nd, NULL, 0)))
                        return -ECHILD;
-               }
-               if (unlikely(!lockref_get_not_dead(&dentry->d_lockref))) {
-                       rcu_read_unlock();
-                       mntput(nd->path.mnt);
-                       return -ECHILD;
-               }
-               if (read_seqcount_retry(&dentry->d_seq, nd->seq)) {
-                       rcu_read_unlock();
-                       dput(dentry);
-                       mntput(nd->path.mnt);
-                       return -ECHILD;
-               }
-               rcu_read_unlock();
        }
 
        if (likely(!(nd->flags & LOOKUP_JUMPED)))
@@ -657,28 +784,25 @@ static int complete_walk(struct nameidata *nd)
        if (!status)
                status = -ESTALE;
 
-       path_put(&nd->path);
        return status;
 }
 
-static __always_inline void set_root(struct nameidata *nd)
+static void set_root(struct nameidata *nd)
 {
        get_fs_root(current->fs, &nd->root);
 }
 
-static int link_path_walk(const char *, struct nameidata *);
-
-static __always_inline unsigned set_root_rcu(struct nameidata *nd)
+static unsigned set_root_rcu(struct nameidata *nd)
 {
        struct fs_struct *fs = current->fs;
-       unsigned seq, res;
+       unsigned seq;
 
        do {
                seq = read_seqcount_begin(&fs->seq);
                nd->root = fs->root;
-               res = __read_seqcount_begin(&nd->root.dentry->d_seq);
+               nd->root_seq = __read_seqcount_begin(&nd->root.dentry->d_seq);
        } while (read_seqcount_retry(&fs->seq, seq));
-       return res;
+       return nd->root_seq;
 }
 
 static void path_put_conditional(struct path *path, struct nameidata *nd)
@@ -704,8 +828,9 @@ static inline void path_to_nameidata(const struct path *path,
  * Helper to directly jump to a known parsed path from ->follow_link,
  * caller must have taken a reference to path beforehand.
  */
-void nd_jump_link(struct nameidata *nd, struct path *path)
+void nd_jump_link(struct path *path)
 {
+       struct nameidata *nd = current->nameidata;
        path_put(&nd->path);
 
        nd->path = *path;
@@ -713,24 +838,14 @@ void nd_jump_link(struct nameidata *nd, struct path *path)
        nd->flags |= LOOKUP_JUMPED;
 }
 
-void nd_set_link(struct nameidata *nd, char *path)
+static inline void put_link(struct nameidata *nd)
 {
-       nd->saved_names[nd->depth] = path;
-}
-EXPORT_SYMBOL(nd_set_link);
-
-char *nd_get_link(struct nameidata *nd)
-{
-       return nd->saved_names[nd->depth];
-}
-EXPORT_SYMBOL(nd_get_link);
-
-static inline void put_link(struct nameidata *nd, struct path *link, void *cookie)
-{
-       struct inode *inode = link->dentry->d_inode;
-       if (inode->i_op->put_link)
-               inode->i_op->put_link(link->dentry, nd, cookie);
-       path_put(link);
+       struct saved *last = nd->stack + --nd->depth;
+       struct inode *inode = last->inode;
+       if (last->cookie && inode->i_op->put_link)
+               inode->i_op->put_link(inode, last->cookie);
+       if (!(nd->flags & LOOKUP_RCU))
+               path_put(&last->link);
 }
 
 int sysctl_protected_symlinks __read_mostly = 0;
@@ -738,7 +853,6 @@ int sysctl_protected_hardlinks __read_mostly = 0;
 
 /**
  * may_follow_link - Check symlink following for unsafe situations
- * @link: The path of the symlink
  * @nd: nameidata pathwalk data
  *
  * In the case of the sysctl_protected_symlinks sysctl being enabled,
@@ -752,7 +866,7 @@ int sysctl_protected_hardlinks __read_mostly = 0;
  *
  * Returns 0 if following the symlink is allowed, -ve on error.
  */
-static inline int may_follow_link(struct path *link, struct nameidata *nd)
+static inline int may_follow_link(struct nameidata *nd)
 {
        const struct inode *inode;
        const struct inode *parent;
@@ -761,7 +875,7 @@ static inline int may_follow_link(struct path *link, struct nameidata *nd)
                return 0;
 
        /* Allowed if owner and follower match. */
-       inode = link->dentry->d_inode;
+       inode = nd->stack[0].inode;
        if (uid_eq(current_cred()->fsuid, inode->i_uid))
                return 0;
 
@@ -774,9 +888,10 @@ static inline int may_follow_link(struct path *link, struct nameidata *nd)
        if (uid_eq(parent->i_uid, inode->i_uid))
                return 0;
 
-       audit_log_link_denied("follow_link", link);
-       path_put_conditional(link, nd);
-       path_put(&nd->path);
+       if (nd->flags & LOOKUP_RCU)
+               return -ECHILD;
+
+       audit_log_link_denied("follow_link", &nd->stack[0].link);
        return -EACCES;
 }
 
@@ -849,82 +964,68 @@ static int may_linkat(struct path *link)
        return -EPERM;
 }
 
-static __always_inline int
-follow_link(struct path *link, struct nameidata *nd, void **p)
+static __always_inline
+const char *get_link(struct nameidata *nd)
 {
-       struct dentry *dentry = link->dentry;
+       struct saved *last = nd->stack + nd->depth - 1;
+       struct dentry *dentry = last->link.dentry;
+       struct inode *inode = last->inode;
        int error;
-       char *s;
-
-       BUG_ON(nd->flags & LOOKUP_RCU);
-
-       if (link->mnt == nd->path.mnt)
-               mntget(link->mnt);
+       const char *res;
 
-       error = -ELOOP;
-       if (unlikely(current->total_link_count >= 40))
-               goto out_put_nd_path;
-
-       cond_resched();
-       current->total_link_count++;
-
-       touch_atime(link);
-       nd_set_link(nd, NULL);
+       if (!(nd->flags & LOOKUP_RCU)) {
+               touch_atime(&last->link);
+               cond_resched();
+       } else if (atime_needs_update(&last->link, inode)) {
+               if (unlikely(unlazy_walk(nd, NULL, 0)))
+                       return ERR_PTR(-ECHILD);
+               touch_atime(&last->link);
+       }
 
-       error = security_inode_follow_link(link->dentry, nd);
-       if (error)
-               goto out_put_nd_path;
+       error = security_inode_follow_link(dentry, inode,
+                                          nd->flags & LOOKUP_RCU);
+       if (unlikely(error))
+               return ERR_PTR(error);
 
        nd->last_type = LAST_BIND;
-       *p = dentry->d_inode->i_op->follow_link(dentry, nd);
-       error = PTR_ERR(*p);
-       if (IS_ERR(*p))
-               goto out_put_nd_path;
-
-       error = 0;
-       s = nd_get_link(nd);
-       if (s) {
-               if (unlikely(IS_ERR(s))) {
-                       path_put(&nd->path);
-                       put_link(nd, link, *p);
-                       return PTR_ERR(s);
+       res = inode->i_link;
+       if (!res) {
+               if (nd->flags & LOOKUP_RCU) {
+                       if (unlikely(unlazy_walk(nd, NULL, 0)))
+                               return ERR_PTR(-ECHILD);
                }
-               if (*s == '/') {
+               res = inode->i_op->follow_link(dentry, &last->cookie);
+               if (IS_ERR_OR_NULL(res)) {
+                       last->cookie = NULL;
+                       return res;
+               }
+       }
+       if (*res == '/') {
+               if (nd->flags & LOOKUP_RCU) {
+                       struct dentry *d;
+                       if (!nd->root.mnt)
+                               set_root_rcu(nd);
+                       nd->path = nd->root;
+                       d = nd->path.dentry;
+                       nd->inode = d->d_inode;
+                       nd->seq = nd->root_seq;
+                       if (unlikely(read_seqcount_retry(&d->d_seq, nd->seq)))
+                               return ERR_PTR(-ECHILD);
+               } else {
                        if (!nd->root.mnt)
                                set_root(nd);
                        path_put(&nd->path);
                        nd->path = nd->root;
                        path_get(&nd->root);
-                       nd->flags |= LOOKUP_JUMPED;
+                       nd->inode = nd->path.dentry->d_inode;
                }
-               nd->inode = nd->path.dentry->d_inode;
-               error = link_path_walk(s, nd);
-               if (unlikely(error))
-                       put_link(nd, link, *p);
+               nd->flags |= LOOKUP_JUMPED;
+               while (unlikely(*++res == '/'))
+                       ;
        }
-
-       return error;
-
-out_put_nd_path:
-       *p = NULL;
-       path_put(&nd->path);
-       path_put(link);
-       return error;
-}
-
-static int follow_up_rcu(struct path *path)
-{
-       struct mount *mnt = real_mount(path->mnt);
-       struct mount *parent;
-       struct dentry *mountpoint;
-
-       parent = mnt->mnt_parent;
-       if (&parent->mnt == path->mnt)
-               return 0;
-       mountpoint = mnt->mnt_mountpoint;
-       path->dentry = mountpoint;
-       path->mnt = &parent->mnt;
-       return 1;
+       if (!*res)
+               res = NULL;
+       return res;
 }
 
 /*
@@ -965,7 +1066,7 @@ EXPORT_SYMBOL(follow_up);
  * - return -EISDIR to tell follow_managed() to stop and return the path we
  *   were called with.
  */
-static int follow_automount(struct path *path, unsigned flags,
+static int follow_automount(struct path *path, struct nameidata *nd,
                            bool *need_mntput)
 {
        struct vfsmount *mnt;
@@ -985,13 +1086,13 @@ static int follow_automount(struct path *path, unsigned flags,
         * as being automount points.  These will need the attentions
         * of the daemon to instantiate them before they can be used.
         */
-       if (!(flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY |
-                    LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_AUTOMOUNT)) &&
+       if (!(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY |
+                          LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_AUTOMOUNT)) &&
            path->dentry->d_inode)
                return -EISDIR;
 
-       current->total_link_count++;
-       if (current->total_link_count >= 40)
+       nd->total_link_count++;
+       if (nd->total_link_count >= 40)
                return -ELOOP;
 
        mnt = path->dentry->d_op->d_automount(path);
@@ -1005,7 +1106,7 @@ static int follow_automount(struct path *path, unsigned flags,
                 * the path being looked up; if it wasn't then the remainder of
                 * the path is inaccessible and we should say so.
                 */
-               if (PTR_ERR(mnt) == -EISDIR && (flags & LOOKUP_PARENT))
+               if (PTR_ERR(mnt) == -EISDIR && (nd->flags & LOOKUP_PARENT))
                        return -EREMOTE;
                return PTR_ERR(mnt);
        }
@@ -1045,7 +1146,7 @@ static int follow_automount(struct path *path, unsigned flags,
  *
  * Serialization is taken care of in namespace.c
  */
-static int follow_managed(struct path *path, unsigned flags)
+static int follow_managed(struct path *path, struct nameidata *nd)
 {
        struct vfsmount *mnt = path->mnt; /* held by caller, must be left alone */
        unsigned managed;
@@ -1089,7 +1190,7 @@ static int follow_managed(struct path *path, unsigned flags)
 
                /* Handle an automount point */
                if (managed & DCACHE_NEED_AUTOMOUNT) {
-                       ret = follow_automount(path, flags, &need_mntput);
+                       ret = follow_automount(path, nd, &need_mntput);
                        if (ret < 0)
                                break;
                        continue;
@@ -1103,7 +1204,11 @@ static int follow_managed(struct path *path, unsigned flags)
                mntput(path->mnt);
        if (ret == -EISDIR)
                ret = 0;
-       return ret < 0 ? ret : need_mntput;
+       if (need_mntput)
+               nd->flags |= LOOKUP_JUMPED;
+       if (unlikely(ret < 0))
+               path_put_conditional(path, nd);
+       return ret;
 }
 
 int follow_down_one(struct path *path)
@@ -1133,7 +1238,7 @@ static inline int managed_dentry_rcu(struct dentry *dentry)
  * we meet a managed dentry that would need blocking.
  */
 static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
-                              struct inode **inode)
+                              struct inode **inode, unsigned *seqp)
 {
        for (;;) {
                struct mount *mounted;
@@ -1160,7 +1265,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
                path->mnt = &mounted->mnt;
                path->dentry = mounted->mnt.mnt_root;
                nd->flags |= LOOKUP_JUMPED;
-               nd->seq = read_seqcount_begin(&path->dentry->d_seq);
+               *seqp = read_seqcount_begin(&path->dentry->d_seq);
                /*
                 * Update the inode too. We don't need to re-check the
                 * dentry sequence number here after this d_inode read,
@@ -1179,10 +1284,8 @@ static int follow_dotdot_rcu(struct nameidata *nd)
                set_root_rcu(nd);
 
        while (1) {
-               if (nd->path.dentry == nd->root.dentry &&
-                   nd->path.mnt == nd->root.mnt) {
+               if (path_equal(&nd->path, &nd->root))
                        break;
-               }
                if (nd->path.dentry != nd->path.mnt->mnt_root) {
                        struct dentry *old = nd->path.dentry;
                        struct dentry *parent = old->d_parent;
@@ -1190,38 +1293,42 @@ static int follow_dotdot_rcu(struct nameidata *nd)
 
                        inode = parent->d_inode;
                        seq = read_seqcount_begin(&parent->d_seq);
-                       if (read_seqcount_retry(&old->d_seq, nd->seq))
-                               goto failed;
+                       if (unlikely(read_seqcount_retry(&old->d_seq, nd->seq)))
+                               return -ECHILD;
                        nd->path.dentry = parent;
                        nd->seq = seq;
                        break;
+               } else {
+                       struct mount *mnt = real_mount(nd->path.mnt);
+                       struct mount *mparent = mnt->mnt_parent;
+                       struct dentry *mountpoint = mnt->mnt_mountpoint;
+                       struct inode *inode2 = mountpoint->d_inode;
+                       unsigned seq = read_seqcount_begin(&mountpoint->d_seq);
+                       if (unlikely(read_seqretry(&mount_lock, nd->m_seq)))
+                               return -ECHILD;
+                       if (&mparent->mnt == nd->path.mnt)
+                               break;
+                       /* we know that mountpoint was pinned */
+                       nd->path.dentry = mountpoint;
+                       nd->path.mnt = &mparent->mnt;
+                       inode = inode2;
+                       nd->seq = seq;
                }
-               if (!follow_up_rcu(&nd->path))
-                       break;
-               inode = nd->path.dentry->d_inode;
-               nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
        }
-       while (d_mountpoint(nd->path.dentry)) {
+       while (unlikely(d_mountpoint(nd->path.dentry))) {
                struct mount *mounted;
                mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry);
+               if (unlikely(read_seqretry(&mount_lock, nd->m_seq)))
+                       return -ECHILD;
                if (!mounted)
                        break;
                nd->path.mnt = &mounted->mnt;
                nd->path.dentry = mounted->mnt.mnt_root;
                inode = nd->path.dentry->d_inode;
                nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
-               if (read_seqretry(&mount_lock, nd->m_seq))
-                       goto failed;
        }
        nd->inode = inode;
        return 0;
-
-failed:
-       nd->flags &= ~LOOKUP_RCU;
-       if (!(nd->flags & LOOKUP_ROOT))
-               nd->root.mnt = NULL;
-       rcu_read_unlock();
-       return -ECHILD;
 }
 
 /*
@@ -1400,7 +1507,8 @@ static struct dentry *__lookup_hash(struct qstr *name,
  *  It _is_ time-critical.
  */
 static int lookup_fast(struct nameidata *nd,
-                      struct path *path, struct inode **inode)
+                      struct path *path, struct inode **inode,
+                      unsigned *seqp)
 {
        struct vfsmount *mnt = nd->path.mnt;
        struct dentry *dentry, *parent = nd->path.dentry;
@@ -1415,6 +1523,7 @@ static int lookup_fast(struct nameidata *nd,
         */
        if (nd->flags & LOOKUP_RCU) {
                unsigned seq;
+               bool negative;
                dentry = __d_lookup_rcu(parent, &nd->last, &seq);
                if (!dentry)
                        goto unlazy;
@@ -1423,9 +1532,12 @@ static int lookup_fast(struct nameidata *nd,
                 * This sequence count validates that the inode matches
                 * the dentry name information from lookup.
                 */
-               *inode = dentry->d_inode;
+               *inode = d_backing_inode(dentry);
+               negative = d_is_negative(dentry);
                if (read_seqcount_retry(&dentry->d_seq, seq))
                        return -ECHILD;
+               if (negative)
+                       return -ENOENT;
 
                /*
                 * This sequence count validates that the parent had no
@@ -1436,8 +1548,8 @@ static int lookup_fast(struct nameidata *nd,
                 */
                if (__read_seqcount_retry(&parent->d_seq, nd->seq))
                        return -ECHILD;
-               nd->seq = seq;
 
+               *seqp = seq;
                if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
                        status = d_revalidate(dentry, nd->flags);
                        if (unlikely(status <= 0)) {
@@ -1448,10 +1560,10 @@ static int lookup_fast(struct nameidata *nd,
                }
                path->mnt = mnt;
                path->dentry = dentry;
-               if (likely(__follow_mount_rcu(nd, path, inode)))
+               if (likely(__follow_mount_rcu(nd, path, inode, seqp)))
                        return 0;
 unlazy:
-               if (unlazy_walk(nd, dentry))
+               if (unlazy_walk(nd, dentry, seq))
                        return -ECHILD;
        } else {
                dentry = __d_lookup(parent, &nd->last);
@@ -1472,17 +1584,16 @@ unlazy:
                goto need_lookup;
        }
 
+       if (unlikely(d_is_negative(dentry))) {
+               dput(dentry);
+               return -ENOENT;
+       }
        path->mnt = mnt;
        path->dentry = dentry;
-       err = follow_managed(path, nd->flags);
-       if (unlikely(err < 0)) {
-               path_put_conditional(path, nd);
-               return err;
-       }
-       if (err)
-               nd->flags |= LOOKUP_JUMPED;
-       *inode = path->dentry->d_inode;
-       return 0;
+       err = follow_managed(path, nd);
+       if (likely(!err))
+               *inode = d_backing_inode(path->dentry);
+       return err;
 
 need_lookup:
        return 1;
@@ -1492,7 +1603,6 @@ need_lookup:
 static int lookup_slow(struct nameidata *nd, struct path *path)
 {
        struct dentry *dentry, *parent;
-       int err;
 
        parent = nd->path.dentry;
        BUG_ON(nd->inode != parent->d_inode);
@@ -1504,14 +1614,7 @@ static int lookup_slow(struct nameidata *nd, struct path *path)
                return PTR_ERR(dentry);
        path->mnt = nd->path.mnt;
        path->dentry = dentry;
-       err = follow_managed(path, nd->flags);
-       if (unlikely(err < 0)) {
-               path_put_conditional(path, nd);
-               return err;
-       }
-       if (err)
-               nd->flags |= LOOKUP_JUMPED;
-       return 0;
+       return follow_managed(path, nd);
 }
 
 static inline int may_lookup(struct nameidata *nd)
@@ -1520,7 +1623,7 @@ static inline int may_lookup(struct nameidata *nd)
                int err = inode_permission(nd->inode, MAY_EXEC|MAY_NOT_BLOCK);
                if (err != -ECHILD)
                        return err;
-               if (unlazy_walk(nd, NULL))
+               if (unlazy_walk(nd, NULL, 0))
                        return -ECHILD;
        }
        return inode_permission(nd->inode, MAY_EXEC);
@@ -1530,24 +1633,45 @@ static inline int handle_dots(struct nameidata *nd, int type)
 {
        if (type == LAST_DOTDOT) {
                if (nd->flags & LOOKUP_RCU) {
-                       if (follow_dotdot_rcu(nd))
-                               return -ECHILD;
+                       return follow_dotdot_rcu(nd);
                } else
                        follow_dotdot(nd);
        }
        return 0;
 }
 
-static void terminate_walk(struct nameidata *nd)
+static int pick_link(struct nameidata *nd, struct path *link,
+                    struct inode *inode, unsigned seq)
 {
+       int error;
+       struct saved *last;
+       if (unlikely(nd->total_link_count++ >= MAXSYMLINKS)) {
+               path_to_nameidata(link, nd);
+               return -ELOOP;
+       }
        if (!(nd->flags & LOOKUP_RCU)) {
-               path_put(&nd->path);
-       } else {
-               nd->flags &= ~LOOKUP_RCU;
-               if (!(nd->flags & LOOKUP_ROOT))
-                       nd->root.mnt = NULL;
-               rcu_read_unlock();
+               if (link->mnt == nd->path.mnt)
+                       mntget(link->mnt);
+       }
+       error = nd_alloc_stack(nd);
+       if (unlikely(error)) {
+               if (error == -ECHILD) {
+                       if (unlikely(unlazy_link(nd, link, seq)))
+                               return -ECHILD;
+                       error = nd_alloc_stack(nd);
+               }
+               if (error) {
+                       path_put(link);
+                       return error;
+               }
        }
+
+       last = nd->stack + nd->depth++;
+       last->link = *link;
+       last->cookie = NULL;
+       last->inode = inode;
+       last->seq = seq;
+       return 1;
 }
 
 /*
@@ -1556,98 +1680,68 @@ static void terminate_walk(struct nameidata *nd)
  * so we keep a cache of "no, this doesn't need follow_link"
  * for the common case.
  */
-static inline int should_follow_link(struct dentry *dentry, int follow)
+static inline int should_follow_link(struct nameidata *nd, struct path *link,
+                                    int follow,
+                                    struct inode *inode, unsigned seq)
 {
-       return unlikely(d_is_symlink(dentry)) ? follow : 0;
+       if (likely(!d_is_symlink(link->dentry)))
+               return 0;
+       if (!follow)
+               return 0;
+       return pick_link(nd, link, inode, seq);
 }
 
-static inline int walk_component(struct nameidata *nd, struct path *path,
-               int follow)
+enum {WALK_GET = 1, WALK_PUT = 2};
+
+static int walk_component(struct nameidata *nd, int flags)
 {
+       struct path path;
        struct inode *inode;
+       unsigned seq;
        int err;
        /*
         * "." and ".." are special - ".." especially so because it has
         * to be able to know about the current root directory and
         * parent relationships.
         */
-       if (unlikely(nd->last_type != LAST_NORM))
-               return handle_dots(nd, nd->last_type);
-       err = lookup_fast(nd, path, &inode);
+       if (unlikely(nd->last_type != LAST_NORM)) {
+               err = handle_dots(nd, nd->last_type);
+               if (flags & WALK_PUT)
+                       put_link(nd);
+               return err;
+       }
+       err = lookup_fast(nd, &path, &inode, &seq);
        if (unlikely(err)) {
                if (err < 0)
-                       goto out_err;
+                       return err;
 
-               err = lookup_slow(nd, path);
+               err = lookup_slow(nd, &path);
                if (err < 0)
-                       goto out_err;
+                       return err;
 
-               inode = path->dentry->d_inode;
+               inode = d_backing_inode(path.dentry);
+               seq = 0;        /* we are already out of RCU mode */
+               err = -ENOENT;
+               if (d_is_negative(path.dentry))
+                       goto out_path_put;
        }
-       err = -ENOENT;
-       if (d_is_negative(path->dentry))
-               goto out_path_put;
 
-       if (should_follow_link(path->dentry, follow)) {
-               if (nd->flags & LOOKUP_RCU) {
-                       if (unlikely(nd->path.mnt != path->mnt ||
-                                    unlazy_walk(nd, path->dentry))) {
-                               err = -ECHILD;
-                               goto out_err;
-                       }
-               }
-               BUG_ON(inode != path->dentry->d_inode);
-               return 1;
-       }
-       path_to_nameidata(path, nd);
+       if (flags & WALK_PUT)
+               put_link(nd);
+       err = should_follow_link(nd, &path, flags & WALK_GET, inode, seq);
+       if (unlikely(err))
+               return err;
+       path_to_nameidata(&path, nd);
        nd->inode = inode;
+       nd->seq = seq;
        return 0;
 
 out_path_put:
-       path_to_nameidata(path, nd);
-out_err:
-       terminate_walk(nd);
+       path_to_nameidata(&path, nd);
        return err;
 }
 
 /*
- * This limits recursive symlink follows to 8, while
- * limiting consecutive symlinks to 40.
- *
- * Without that kind of total limit, nasty chains of consecutive
- * symlinks can cause almost arbitrarily long lookups.
- */
-static inline int nested_symlink(struct path *path, struct nameidata *nd)
-{
-       int res;
-
-       if (unlikely(current->link_count >= MAX_NESTED_LINKS)) {
-               path_put_conditional(path, nd);
-               path_put(&nd->path);
-               return -ELOOP;
-       }
-       BUG_ON(nd->depth >= MAX_NESTED_LINKS);
-
-       nd->depth++;
-       current->link_count++;
-
-       do {
-               struct path link = *path;
-               void *cookie;
-
-               res = follow_link(&link, nd, &cookie);
-               if (res)
-                       break;
-               res = walk_component(nd, path, LOOKUP_FOLLOW);
-               put_link(nd, &link, cookie);
-       } while (res > 0);
-
-       current->link_count--;
-       nd->depth--;
-       return res;
-}
-
-/*
  * We can do the critical dentry name comparison and hashing
  * operations one word at a time, but we are limited to:
  *
@@ -1773,9 +1867,8 @@ static inline u64 hash_name(const char *name)
  */
 static int link_path_walk(const char *name, struct nameidata *nd)
 {
-       struct path next;
        int err;
-       
+
        while (*name=='/')
                name++;
        if (!*name)
@@ -1788,7 +1881,7 @@ static int link_path_walk(const char *name, struct nameidata *nd)
 
                err = may_lookup(nd);
                if (err)
-                       break;
+                       return err;
 
                hash_len = hash_name(name);
 
@@ -1810,7 +1903,7 @@ static int link_path_walk(const char *name, struct nameidata *nd)
                                struct qstr this = { { .hash_len = hash_len }, .name = name };
                                err = parent->d_op->d_hash(parent, &this);
                                if (err < 0)
-                                       break;
+                                       return err;
                                hash_len = this.hash_len;
                                name = this.name;
                        }
@@ -1822,7 +1915,7 @@ static int link_path_walk(const char *name, struct nameidata *nd)
 
                name += hashlen_len(hash_len);
                if (!*name)
-                       return 0;
+                       goto OK;
                /*
                 * If it wasn't NUL, we know it was '/'. Skip that
                 * slash, and continue until no more slashes.
@@ -1830,57 +1923,73 @@ static int link_path_walk(const char *name, struct nameidata *nd)
                do {
                        name++;
                } while (unlikely(*name == '/'));
-               if (!*name)
-                       return 0;
-
-               err = walk_component(nd, &next, LOOKUP_FOLLOW);
+               if (unlikely(!*name)) {
+OK:
+                       /* pathname body, done */
+                       if (!nd->depth)
+                               return 0;
+                       name = nd->stack[nd->depth - 1].name;
+                       /* trailing symlink, done */
+                       if (!name)
+                               return 0;
+                       /* last component of nested symlink */
+                       err = walk_component(nd, WALK_GET | WALK_PUT);
+               } else {
+                       err = walk_component(nd, WALK_GET);
+               }
                if (err < 0)
                        return err;
 
                if (err) {
-                       err = nested_symlink(&next, nd);
-                       if (err)
-                               return err;
-               }
-               if (!d_can_lookup(nd->path.dentry)) {
-                       err = -ENOTDIR; 
-                       break;
+                       const char *s = get_link(nd);
+
+                       if (unlikely(IS_ERR(s)))
+                               return PTR_ERR(s);
+                       err = 0;
+                       if (unlikely(!s)) {
+                               /* jumped */
+                               put_link(nd);
+                       } else {
+                               nd->stack[nd->depth - 1].name = name;
+                               name = s;
+                               continue;
+                       }
                }
+               if (unlikely(!d_can_lookup(nd->path.dentry)))
+                       return -ENOTDIR;
        }
-       terminate_walk(nd);
-       return err;
 }
 
-static int path_init(int dfd, const struct filename *name, unsigned int flags,
-                    struct nameidata *nd)
+static const char *path_init(struct nameidata *nd, unsigned flags)
 {
        int retval = 0;
-       const char *s = name->name;
+       const char *s = nd->name->name;
 
        nd->last_type = LAST_ROOT; /* if there are only slashes... */
        nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT;
        nd->depth = 0;
-       nd->base = NULL;
+       nd->total_link_count = 0;
        if (flags & LOOKUP_ROOT) {
                struct dentry *root = nd->root.dentry;
                struct inode *inode = root->d_inode;
                if (*s) {
                        if (!d_can_lookup(root))
-                               return -ENOTDIR;
+                               return ERR_PTR(-ENOTDIR);
                        retval = inode_permission(inode, MAY_EXEC);
                        if (retval)
-                               return retval;
+                               return ERR_PTR(retval);
                }
                nd->path = nd->root;
                nd->inode = inode;
                if (flags & LOOKUP_RCU) {
                        rcu_read_lock();
                        nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
+                       nd->root_seq = nd->seq;
                        nd->m_seq = read_seqbegin(&mount_lock);
                } else {
                        path_get(&nd->path);
                }
-               goto done;
+               return s;
        }
 
        nd->root.mnt = NULL;
@@ -1895,7 +2004,7 @@ static int path_init(int dfd, const struct filename *name, unsigned int flags,
                        path_get(&nd->root);
                }
                nd->path = nd->root;
-       } else if (dfd == AT_FDCWD) {
+       } else if (nd->dfd == AT_FDCWD) {
                if (flags & LOOKUP_RCU) {
                        struct fs_struct *fs = current->fs;
                        unsigned seq;
@@ -1912,180 +2021,205 @@ static int path_init(int dfd, const struct filename *name, unsigned int flags,
                }
        } else {
                /* Caller must check execute permissions on the starting path component */
-               struct fd f = fdget_raw(dfd);
+               struct fd f = fdget_raw(nd->dfd);
                struct dentry *dentry;
 
                if (!f.file)
-                       return -EBADF;
+                       return ERR_PTR(-EBADF);
 
                dentry = f.file->f_path.dentry;
 
                if (*s) {
                        if (!d_can_lookup(dentry)) {
                                fdput(f);
-                               return -ENOTDIR;
+                               return ERR_PTR(-ENOTDIR);
                        }
                }
 
                nd->path = f.file->f_path;
                if (flags & LOOKUP_RCU) {
-                       if (f.flags & FDPUT_FPUT)
-                               nd->base = f.file;
-                       nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
                        rcu_read_lock();
+                       nd->inode = nd->path.dentry->d_inode;
+                       nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
                } else {
                        path_get(&nd->path);
-                       fdput(f);
+                       nd->inode = nd->path.dentry->d_inode;
                }
+               fdput(f);
+               return s;
        }
 
        nd->inode = nd->path.dentry->d_inode;
        if (!(flags & LOOKUP_RCU))
-               goto done;
+               return s;
        if (likely(!read_seqcount_retry(&nd->path.dentry->d_seq, nd->seq)))
-               goto done;
+               return s;
        if (!(nd->flags & LOOKUP_ROOT))
                nd->root.mnt = NULL;
        rcu_read_unlock();
-       return -ECHILD;
-done:
-       current->total_link_count = 0;
-       return link_path_walk(s, nd);
+       return ERR_PTR(-ECHILD);
 }
 
-static void path_cleanup(struct nameidata *nd)
+static const char *trailing_symlink(struct nameidata *nd)
 {
-       if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
-               path_put(&nd->root);
-               nd->root.mnt = NULL;
-       }
-       if (unlikely(nd->base))
-               fput(nd->base);
+       const char *s;
+       int error = may_follow_link(nd);
+       if (unlikely(error))
+               return ERR_PTR(error);
+       nd->flags |= LOOKUP_PARENT;
+       nd->stack[0].name = NULL;
+       s = get_link(nd);
+       return s ? s : "";
 }
 
-static inline int lookup_last(struct nameidata *nd, struct path *path)
+static inline int lookup_last(struct nameidata *nd)
 {
        if (nd->last_type == LAST_NORM && nd->last.name[nd->last.len])
                nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
 
        nd->flags &= ~LOOKUP_PARENT;
-       return walk_component(nd, path, nd->flags & LOOKUP_FOLLOW);
+       return walk_component(nd,
+                       nd->flags & LOOKUP_FOLLOW
+                               ? nd->depth
+                                       ? WALK_PUT | WALK_GET
+                                       : WALK_GET
+                               : 0);
 }
 
 /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
-static int path_lookupat(int dfd, const struct filename *name,
-                               unsigned int flags, struct nameidata *nd)
+static int path_lookupat(struct nameidata *nd, unsigned flags, struct path *path)
 {
-       struct path path;
+       const char *s = path_init(nd, flags);
        int err;
 
-       /*
-        * Path walking is largely split up into 2 different synchronisation
-        * schemes, rcu-walk and ref-walk (explained in
-        * Documentation/filesystems/path-lookup.txt). These share much of the
-        * path walk code, but some things particularly setup, cleanup, and
-        * following mounts are sufficiently divergent that functions are
-        * duplicated. Typically there is a function foo(), and its RCU
-        * analogue, foo_rcu().
-        *
-        * -ECHILD is the error number of choice (just to avoid clashes) that
-        * is returned if some aspect of an rcu-walk fails. Such an error must
-        * be handled by restarting a traditional ref-walk (which will always
-        * be able to complete).
-        */
-       err = path_init(dfd, name, flags, nd);
-       if (!err && !(flags & LOOKUP_PARENT)) {
-               err = lookup_last(nd, &path);
-               while (err > 0) {
-                       void *cookie;
-                       struct path link = path;
-                       err = may_follow_link(&link, nd);
-                       if (unlikely(err))
-                               break;
-                       nd->flags |= LOOKUP_PARENT;
-                       err = follow_link(&link, nd, &cookie);
-                       if (err)
-                               break;
-                       err = lookup_last(nd, &path);
-                       put_link(nd, &link, cookie);
+       if (IS_ERR(s))
+               return PTR_ERR(s);
+       while (!(err = link_path_walk(s, nd))
+               && ((err = lookup_last(nd)) > 0)) {
+               s = trailing_symlink(nd);
+               if (IS_ERR(s)) {
+                       err = PTR_ERR(s);
+                       break;
                }
        }
-
        if (!err)
                err = complete_walk(nd);
 
-       if (!err && nd->flags & LOOKUP_DIRECTORY) {
-               if (!d_can_lookup(nd->path.dentry)) {
-                       path_put(&nd->path);
+       if (!err && nd->flags & LOOKUP_DIRECTORY)
+               if (!d_can_lookup(nd->path.dentry))
                        err = -ENOTDIR;
-               }
+       if (!err) {
+               *path = nd->path;
+               nd->path.mnt = NULL;
+               nd->path.dentry = NULL;
        }
-
-       path_cleanup(nd);
+       terminate_walk(nd);
        return err;
 }
 
-static int filename_lookup(int dfd, struct filename *name,
-                               unsigned int flags, struct nameidata *nd)
+static int filename_lookup(int dfd, struct filename *name, unsigned flags,
+                          struct path *path, struct path *root)
 {
-       int retval = path_lookupat(dfd, name, flags | LOOKUP_RCU, nd);
+       int retval;
+       struct nameidata nd;
+       if (IS_ERR(name))
+               return PTR_ERR(name);
+       if (unlikely(root)) {
+               nd.root = *root;
+               flags |= LOOKUP_ROOT;
+       }
+       set_nameidata(&nd, dfd, name);
+       retval = path_lookupat(&nd, flags | LOOKUP_RCU, path);
        if (unlikely(retval == -ECHILD))
-               retval = path_lookupat(dfd, name, flags, nd);
+               retval = path_lookupat(&nd, flags, path);
        if (unlikely(retval == -ESTALE))
-               retval = path_lookupat(dfd, name, flags | LOOKUP_REVAL, nd);
+               retval = path_lookupat(&nd, flags | LOOKUP_REVAL, path);
 
        if (likely(!retval))
-               audit_inode(name, nd->path.dentry, flags & LOOKUP_PARENT);
+               audit_inode(name, path->dentry, flags & LOOKUP_PARENT);
+       restore_nameidata();
+       putname(name);
        return retval;
 }
 
+/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
+static int path_parentat(struct nameidata *nd, unsigned flags,
+                               struct path *parent)
+{
+       const char *s = path_init(nd, flags);
+       int err;
+       if (IS_ERR(s))
+               return PTR_ERR(s);
+       err = link_path_walk(s, nd);
+       if (!err)
+               err = complete_walk(nd);
+       if (!err) {
+               *parent = nd->path;
+               nd->path.mnt = NULL;
+               nd->path.dentry = NULL;
+       }
+       terminate_walk(nd);
+       return err;
+}
+
+static struct filename *filename_parentat(int dfd, struct filename *name,
+                               unsigned int flags, struct path *parent,
+                               struct qstr *last, int *type)
+{
+       int retval;
+       struct nameidata nd;
+
+       if (IS_ERR(name))
+               return name;
+       set_nameidata(&nd, dfd, name);
+       retval = path_parentat(&nd, flags | LOOKUP_RCU, parent);
+       if (unlikely(retval == -ECHILD))
+               retval = path_parentat(&nd, flags, parent);
+       if (unlikely(retval == -ESTALE))
+               retval = path_parentat(&nd, flags | LOOKUP_REVAL, parent);
+       if (likely(!retval)) {
+               *last = nd.last;
+               *type = nd.last_type;
+               audit_inode(name, parent->dentry, LOOKUP_PARENT);
+       } else {
+               putname(name);
+               name = ERR_PTR(retval);
+       }
+       restore_nameidata();
+       return name;
+}
+
 /* does lookup, returns the object with parent locked */
 struct dentry *kern_path_locked(const char *name, struct path *path)
 {
-       struct filename *filename = getname_kernel(name);
-       struct nameidata nd;
+       struct filename *filename;
        struct dentry *d;
-       int err;
+       struct qstr last;
+       int type;
 
+       filename = filename_parentat(AT_FDCWD, getname_kernel(name), 0, path,
+                                   &last, &type);
        if (IS_ERR(filename))
                return ERR_CAST(filename);
-
-       err = filename_lookup(AT_FDCWD, filename, LOOKUP_PARENT, &nd);
-       if (err) {
-               d = ERR_PTR(err);
-               goto out;
-       }
-       if (nd.last_type != LAST_NORM) {
-               path_put(&nd.path);
-               d = ERR_PTR(-EINVAL);
-               goto out;
+       if (unlikely(type != LAST_NORM)) {
+               path_put(path);
+               putname(filename);
+               return ERR_PTR(-EINVAL);
        }
-       mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
-       d = __lookup_hash(&nd.last, nd.path.dentry, 0);
+       mutex_lock_nested(&path->dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+       d = __lookup_hash(&last, path->dentry, 0);
        if (IS_ERR(d)) {
-               mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
-               path_put(&nd.path);
-               goto out;
+               mutex_unlock(&path->dentry->d_inode->i_mutex);
+               path_put(path);
        }
-       *path = nd.path;
-out:
        putname(filename);
        return d;
 }
 
 int kern_path(const char *name, unsigned int flags, struct path *path)
 {
-       struct nameidata nd;
-       struct filename *filename = getname_kernel(name);
-       int res = PTR_ERR(filename);
-
-       if (!IS_ERR(filename)) {
-               res = filename_lookup(AT_FDCWD, filename, flags, &nd);
-               putname(filename);
-               if (!res)
-                       *path = nd.path;
-       }
-       return res;
+       return filename_lookup(AT_FDCWD, getname_kernel(name),
+                              flags, path, NULL);
 }
 EXPORT_SYMBOL(kern_path);
 
@@ -2101,36 +2235,13 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
                    const char *name, unsigned int flags,
                    struct path *path)
 {
-       struct filename *filename = getname_kernel(name);
-       int err = PTR_ERR(filename);
-
-       BUG_ON(flags & LOOKUP_PARENT);
-
-       /* the first argument of filename_lookup() is ignored with LOOKUP_ROOT */
-       if (!IS_ERR(filename)) {
-               struct nameidata nd;
-               nd.root.dentry = dentry;
-               nd.root.mnt = mnt;
-               err = filename_lookup(AT_FDCWD, filename,
-                                     flags | LOOKUP_ROOT, &nd);
-               if (!err)
-                       *path = nd.path;
-               putname(filename);
-       }
-       return err;
+       struct path root = {.mnt = mnt, .dentry = dentry};
+       /* the first argument of filename_lookup() is ignored with root */
+       return filename_lookup(AT_FDCWD, getname_kernel(name),
+                              flags , path, &root);
 }
 EXPORT_SYMBOL(vfs_path_lookup);
 
-/*
- * Restricted form of lookup. Doesn't follow links, single-component only,
- * needs parent already locked. Doesn't follow mounts.
- * SMP-safe.
- */
-static struct dentry *lookup_hash(struct nameidata *nd)
-{
-       return __lookup_hash(&nd->last, nd->path.dentry, nd->flags);
-}
-
 /**
  * lookup_one_len - filesystem helper to lookup single pathname component
  * @name:      pathname component to lookup
@@ -2185,27 +2296,10 @@ EXPORT_SYMBOL(lookup_one_len);
 int user_path_at_empty(int dfd, const char __user *name, unsigned flags,
                 struct path *path, int *empty)
 {
-       struct nameidata nd;
-       struct filename *tmp = getname_flags(name, flags, empty);
-       int err = PTR_ERR(tmp);
-       if (!IS_ERR(tmp)) {
-
-               BUG_ON(flags & LOOKUP_PARENT);
-
-               err = filename_lookup(dfd, tmp, flags, &nd);
-               putname(tmp);
-               if (!err)
-                       *path = nd.path;
-       }
-       return err;
-}
-
-int user_path_at(int dfd, const char __user *name, unsigned flags,
-                struct path *path)
-{
-       return user_path_at_empty(dfd, name, flags, path, NULL);
+       return filename_lookup(dfd, getname_flags(name, flags, empty),
+                              flags, path, NULL);
 }
-EXPORT_SYMBOL(user_path_at);
+EXPORT_SYMBOL(user_path_at_empty);
 
 /*
  * NB: most callers don't do anything directly with the reference to the
@@ -2213,26 +2307,16 @@ EXPORT_SYMBOL(user_path_at);
  *     allocated by getname. So we must hold the reference to it until all
  *     path-walking is complete.
  */
-static struct filename *
-user_path_parent(int dfd, const char __user *path, struct nameidata *nd,
+static inline struct filename *
+user_path_parent(int dfd, const char __user *path,
+                struct path *parent,
+                struct qstr *last,
+                int *type,
                 unsigned int flags)
 {
-       struct filename *s = getname(path);
-       int error;
-
        /* only LOOKUP_REVAL is allowed in extra flags */
-       flags &= LOOKUP_REVAL;
-
-       if (IS_ERR(s))
-               return s;
-
-       error = filename_lookup(dfd, s, flags | LOOKUP_PARENT, nd);
-       if (error) {
-               putname(s);
-               return ERR_PTR(error);
-       }
-
-       return s;
+       return filename_parentat(dfd, getname(path), flags & LOOKUP_REVAL,
+                                parent, last, type);
 }
 
 /**
@@ -2271,10 +2355,8 @@ mountpoint_last(struct nameidata *nd, struct path *path)
 
        /* If we're in rcuwalk, drop out of it to handle last component */
        if (nd->flags & LOOKUP_RCU) {
-               if (unlazy_walk(nd, NULL)) {
-                       error = -ECHILD;
-                       goto out;
-               }
+               if (unlazy_walk(nd, NULL, 0))
+                       return -ECHILD;
        }
 
        nd->flags &= ~LOOKUP_PARENT;
@@ -2282,7 +2364,7 @@ mountpoint_last(struct nameidata *nd, struct path *path)
        if (unlikely(nd->last_type != LAST_NORM)) {
                error = handle_dots(nd, nd->last_type);
                if (error)
-                       goto out;
+                       return error;
                dentry = dget(nd->path.dentry);
                goto done;
        }
@@ -2297,74 +2379,60 @@ mountpoint_last(struct nameidata *nd, struct path *path)
                 */
                dentry = d_alloc(dir, &nd->last);
                if (!dentry) {
-                       error = -ENOMEM;
                        mutex_unlock(&dir->d_inode->i_mutex);
-                       goto out;
+                       return -ENOMEM;
                }
                dentry = lookup_real(dir->d_inode, dentry, nd->flags);
-               error = PTR_ERR(dentry);
                if (IS_ERR(dentry)) {
                        mutex_unlock(&dir->d_inode->i_mutex);
-                       goto out;
+                       return PTR_ERR(dentry);
                }
        }
        mutex_unlock(&dir->d_inode->i_mutex);
 
 done:
        if (d_is_negative(dentry)) {
-               error = -ENOENT;
                dput(dentry);
-               goto out;
+               return -ENOENT;
        }
+       if (nd->depth)
+               put_link(nd);
        path->dentry = dentry;
        path->mnt = nd->path.mnt;
-       if (should_follow_link(dentry, nd->flags & LOOKUP_FOLLOW))
-               return 1;
+       error = should_follow_link(nd, path, nd->flags & LOOKUP_FOLLOW,
+                                  d_backing_inode(dentry), 0);
+       if (unlikely(error))
+               return error;
        mntget(path->mnt);
        follow_mount(path);
-       error = 0;
-out:
-       terminate_walk(nd);
-       return error;
+       return 0;
 }
 
 /**
  * path_mountpoint - look up a path to be umounted
- * @dfd:       directory file descriptor to start walk from
- * @name:      full pathname to walk
- * @path:      pointer to container for result
+ * @nameidata: lookup context
  * @flags:     lookup flags
+ * @path:      pointer to container for result
  *
  * Look up the given name, but don't attempt to revalidate the last component.
  * Returns 0 and "path" will be valid on success; Returns error otherwise.
  */
 static int
-path_mountpoint(int dfd, const struct filename *name, struct path *path,
-               unsigned int flags)
+path_mountpoint(struct nameidata *nd, unsigned flags, struct path *path)
 {
-       struct nameidata nd;
+       const char *s = path_init(nd, flags);
        int err;
-
-       err = path_init(dfd, name, flags, &nd);
-       if (unlikely(err))
-               goto out;
-
-       err = mountpoint_last(&nd, path);
-       while (err > 0) {
-               void *cookie;
-               struct path link = *path;
-               err = may_follow_link(&link, &nd);
-               if (unlikely(err))
-                       break;
-               nd.flags |= LOOKUP_PARENT;
-               err = follow_link(&link, &nd, &cookie);
-               if (err)
+       if (IS_ERR(s))
+               return PTR_ERR(s);
+       while (!(err = link_path_walk(s, nd)) &&
+               (err = mountpoint_last(nd, path)) > 0) {
+               s = trailing_symlink(nd);
+               if (IS_ERR(s)) {
+                       err = PTR_ERR(s);
                        break;
-               err = mountpoint_last(&nd, path);
-               put_link(&nd, &link, cookie);
+               }
        }
-out:
-       path_cleanup(&nd);
+       terminate_walk(nd);
        return err;
 }
 
@@ -2372,16 +2440,19 @@ static int
 filename_mountpoint(int dfd, struct filename *name, struct path *path,
                        unsigned int flags)
 {
+       struct nameidata nd;
        int error;
        if (IS_ERR(name))
                return PTR_ERR(name);
-       error = path_mountpoint(dfd, name, path, flags | LOOKUP_RCU);
+       set_nameidata(&nd, dfd, name);
+       error = path_mountpoint(&nd, flags | LOOKUP_RCU, path);
        if (unlikely(error == -ECHILD))
-               error = path_mountpoint(dfd, name, path, flags);
+               error = path_mountpoint(&nd, flags, path);
        if (unlikely(error == -ESTALE))
-               error = path_mountpoint(dfd, name, path, flags | LOOKUP_REVAL);
+               error = path_mountpoint(&nd, flags | LOOKUP_REVAL, path);
        if (likely(!error))
                audit_inode(name, path->dentry, 0);
+       restore_nameidata();
        putname(name);
        return error;
 }
@@ -2448,7 +2519,7 @@ EXPORT_SYMBOL(__check_sticky);
  */
 static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
 {
-       struct inode *inode = victim->d_inode;
+       struct inode *inode = d_backing_inode(victim);
        int error;
 
        if (d_is_negative(victim))
@@ -2914,18 +2985,19 @@ out_dput:
 /*
  * Handle the last step of open()
  */
-static int do_last(struct nameidata *nd, struct path *path,
+static int do_last(struct nameidata *nd,
                   struct file *file, const struct open_flags *op,
-                  int *opened, struct filename *name)
+                  int *opened)
 {
        struct dentry *dir = nd->path.dentry;
        int open_flag = op->open_flag;
        bool will_truncate = (open_flag & O_TRUNC) != 0;
        bool got_write = false;
        int acc_mode = op->acc_mode;
+       unsigned seq;
        struct inode *inode;
-       bool symlink_ok = false;
        struct path save_parent = { .dentry = NULL, .mnt = NULL };
+       struct path path;
        bool retried = false;
        int error;
 
@@ -2934,7 +3006,7 @@ static int do_last(struct nameidata *nd, struct path *path,
 
        if (nd->last_type != LAST_NORM) {
                error = handle_dots(nd, nd->last_type);
-               if (error)
+               if (unlikely(error))
                        return error;
                goto finish_open;
        }
@@ -2942,15 +3014,13 @@ static int do_last(struct nameidata *nd, struct path *path,
        if (!(open_flag & O_CREAT)) {
                if (nd->last.name[nd->last.len])
                        nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
-               if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW))
-                       symlink_ok = true;
                /* we _can_ be in RCU mode here */
-               error = lookup_fast(nd, path, &inode);
+               error = lookup_fast(nd, &path, &inode, &seq);
                if (likely(!error))
                        goto finish_lookup;
 
                if (error < 0)
-                       goto out;
+                       return error;
 
                BUG_ON(nd->inode != dir->d_inode);
        } else {
@@ -2964,11 +3034,10 @@ static int do_last(struct nameidata *nd, struct path *path,
                if (error)
                        return error;
 
-               audit_inode(name, dir, LOOKUP_PARENT);
-               error = -EISDIR;
+               audit_inode(nd->name, dir, LOOKUP_PARENT);
                /* trailing slashes? */
-               if (nd->last.name[nd->last.len])
-                       goto out;
+               if (unlikely(nd->last.name[nd->last.len]))
+                       return -EISDIR;
        }
 
 retry_lookup:
@@ -2983,7 +3052,7 @@ retry_lookup:
                 */
        }
        mutex_lock(&dir->d_inode->i_mutex);
-       error = lookup_open(nd, path, file, op, got_write, opened);
+       error = lookup_open(nd, &path, file, op, got_write, opened);
        mutex_unlock(&dir->d_inode->i_mutex);
 
        if (error <= 0) {
@@ -2994,7 +3063,7 @@ retry_lookup:
                    !S_ISREG(file_inode(file)->i_mode))
                        will_truncate = false;
 
-               audit_inode(name, file->f_path.dentry, 0);
+               audit_inode(nd->name, file->f_path.dentry, 0);
                goto opened;
        }
 
@@ -3003,15 +3072,15 @@ retry_lookup:
                open_flag &= ~O_TRUNC;
                will_truncate = false;
                acc_mode = MAY_OPEN;
-               path_to_nameidata(path, nd);
+               path_to_nameidata(&path, nd);
                goto finish_open_created;
        }
 
        /*
         * create/update audit record if it already exists.
         */
-       if (d_is_positive(path->dentry))
-               audit_inode(name, path->dentry, 0);
+       if (d_is_positive(path.dentry))
+               audit_inode(nd->name, path.dentry, 0);
 
        /*
         * If atomic_open() acquired write access it is dropped now due to
@@ -3023,48 +3092,45 @@ retry_lookup:
                got_write = false;
        }
 
-       error = -EEXIST;
-       if ((open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT))
-               goto exit_dput;
-
-       error = follow_managed(path, nd->flags);
-       if (error < 0)
-               goto exit_dput;
+       if (unlikely((open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT))) {
+               path_to_nameidata(&path, nd);
+               return -EEXIST;
+       }
 
-       if (error)
-               nd->flags |= LOOKUP_JUMPED;
+       error = follow_managed(&path, nd);
+       if (unlikely(error < 0))
+               return error;
 
        BUG_ON(nd->flags & LOOKUP_RCU);
-       inode = path->dentry->d_inode;
-finish_lookup:
-       /* we _can_ be in RCU mode here */
-       error = -ENOENT;
-       if (d_is_negative(path->dentry)) {
-               path_to_nameidata(path, nd);
-               goto out;
+       inode = d_backing_inode(path.dentry);
+       seq = 0;        /* out of RCU mode, so the value doesn't matter */
+       if (unlikely(d_is_negative(path.dentry))) {
+               path_to_nameidata(&path, nd);
+               return -ENOENT;
        }
+finish_lookup:
+       if (nd->depth)
+               put_link(nd);
+       error = should_follow_link(nd, &path, nd->flags & LOOKUP_FOLLOW,
+                                  inode, seq);
+       if (unlikely(error))
+               return error;
 
-       if (should_follow_link(path->dentry, !symlink_ok)) {
-               if (nd->flags & LOOKUP_RCU) {
-                       if (unlikely(nd->path.mnt != path->mnt ||
-                                    unlazy_walk(nd, path->dentry))) {
-                               error = -ECHILD;
-                               goto out;
-                       }
-               }
-               BUG_ON(inode != path->dentry->d_inode);
-               return 1;
+       if (unlikely(d_is_symlink(path.dentry)) && !(open_flag & O_PATH)) {
+               path_to_nameidata(&path, nd);
+               return -ELOOP;
        }
 
-       if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path->mnt) {
-               path_to_nameidata(path, nd);
+       if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path.mnt) {
+               path_to_nameidata(&path, nd);
        } else {
                save_parent.dentry = nd->path.dentry;
-               save_parent.mnt = mntget(path->mnt);
-               nd->path.dentry = path->dentry;
+               save_parent.mnt = mntget(path.mnt);
+               nd->path.dentry = path.dentry;
 
        }
        nd->inode = inode;
+       nd->seq = seq;
        /* Why this, you ask?  _Now_ we might have grown LOOKUP_JUMPED... */
 finish_open:
        error = complete_walk(nd);
@@ -3072,7 +3138,7 @@ finish_open:
                path_put(&save_parent);
                return error;
        }
-       audit_inode(name, nd->path.dentry, 0);
+       audit_inode(nd->name, nd->path.dentry, 0);
        error = -EISDIR;
        if ((open_flag & O_CREAT) && d_is_dir(nd->path.dentry))
                goto out;
@@ -3119,12 +3185,8 @@ out:
        if (got_write)
                mnt_drop_write(nd->path.mnt);
        path_put(&save_parent);
-       terminate_walk(nd);
        return error;
 
-exit_dput:
-       path_put_conditional(path, nd);
-       goto out;
 exit_fput:
        fput(file);
        goto out;
@@ -3148,50 +3210,46 @@ stale_open:
        goto retry_lookup;
 }
 
-static int do_tmpfile(int dfd, struct filename *pathname,
-               struct nameidata *nd, int flags,
+static int do_tmpfile(struct nameidata *nd, unsigned flags,
                const struct open_flags *op,
                struct file *file, int *opened)
 {
        static const struct qstr name = QSTR_INIT("/", 1);
-       struct dentry *dentry, *child;
+       struct dentry *child;
        struct inode *dir;
-       int error = path_lookupat(dfd, pathname,
-                                 flags | LOOKUP_DIRECTORY, nd);
+       struct path path;
+       int error = path_lookupat(nd, flags | LOOKUP_DIRECTORY, &path);
        if (unlikely(error))
                return error;
-       error = mnt_want_write(nd->path.mnt);
+       error = mnt_want_write(path.mnt);
        if (unlikely(error))
                goto out;
+       dir = path.dentry->d_inode;
        /* we want directory to be writable */
-       error = inode_permission(nd->inode, MAY_WRITE | MAY_EXEC);
+       error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
        if (error)
                goto out2;
-       dentry = nd->path.dentry;
-       dir = dentry->d_inode;
        if (!dir->i_op->tmpfile) {
                error = -EOPNOTSUPP;
                goto out2;
        }
-       child = d_alloc(dentry, &name);
+       child = d_alloc(path.dentry, &name);
        if (unlikely(!child)) {
                error = -ENOMEM;
                goto out2;
        }
-       nd->flags &= ~LOOKUP_DIRECTORY;
-       nd->flags |= op->intent;
-       dput(nd->path.dentry);
-       nd->path.dentry = child;
-       error = dir->i_op->tmpfile(dir, nd->path.dentry, op->mode);
+       dput(path.dentry);
+       path.dentry = child;
+       error = dir->i_op->tmpfile(dir, child, op->mode);
        if (error)
                goto out2;
-       audit_inode(pathname, nd->path.dentry, 0);
+       audit_inode(nd->name, child, 0);
        /* Don't check for other permissions, the inode was just created */
-       error = may_open(&nd->path, MAY_OPEN, op->open_flag);
+       error = may_open(&path, MAY_OPEN, op->open_flag);
        if (error)
                goto out2;
-       file->f_path.mnt = nd->path.mnt;
-       error = finish_open(file, nd->path.dentry, NULL, opened);
+       file->f_path.mnt = path.mnt;
+       error = finish_open(file, child, NULL, opened);
        if (error)
                goto out2;
        error = open_check_o_direct(file);
@@ -3204,17 +3262,17 @@ static int do_tmpfile(int dfd, struct filename *pathname,
                spin_unlock(&inode->i_lock);
        }
 out2:
-       mnt_drop_write(nd->path.mnt);
+       mnt_drop_write(path.mnt);
 out:
-       path_put(&nd->path);
+       path_put(&path);
        return error;
 }
 
-static struct file *path_openat(int dfd, struct filename *pathname,
-               struct nameidata *nd, const struct open_flags *op, int flags)
+static struct file *path_openat(struct nameidata *nd,
+                       const struct open_flags *op, unsigned flags)
 {
+       const char *s;
        struct file *file;
-       struct path path;
        int opened = 0;
        int error;
 
@@ -3225,37 +3283,26 @@ static struct file *path_openat(int dfd, struct filename *pathname,
        file->f_flags = op->open_flag;
 
        if (unlikely(file->f_flags & __O_TMPFILE)) {
-               error = do_tmpfile(dfd, pathname, nd, flags, op, file, &opened);
-               goto out;
+               error = do_tmpfile(nd, flags, op, file, &opened);
+               goto out2;
        }
 
-       error = path_init(dfd, pathname, flags, nd);
-       if (unlikely(error))
-               goto out;
-
-       error = do_last(nd, &path, file, op, &opened, pathname);
-       while (unlikely(error > 0)) { /* trailing symlink */
-               struct path link = path;
-               void *cookie;
-               if (!(nd->flags & LOOKUP_FOLLOW)) {
-                       path_put_conditional(&path, nd);
-                       path_put(&nd->path);
-                       error = -ELOOP;
-                       break;
-               }
-               error = may_follow_link(&link, nd);
-               if (unlikely(error))
-                       break;
-               nd->flags |= LOOKUP_PARENT;
+       s = path_init(nd, flags);
+       if (IS_ERR(s)) {
+               put_filp(file);
+               return ERR_CAST(s);
+       }
+       while (!(error = link_path_walk(s, nd)) &&
+               (error = do_last(nd, file, op, &opened)) > 0) {
                nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
-               error = follow_link(&link, nd, &cookie);
-               if (unlikely(error))
+               s = trailing_symlink(nd);
+               if (IS_ERR(s)) {
+                       error = PTR_ERR(s);
                        break;
-               error = do_last(nd, &path, file, op, &opened, pathname);
-               put_link(nd, &link, cookie);
+               }
        }
-out:
-       path_cleanup(nd);
+       terminate_walk(nd);
+out2:
        if (!(opened & FILE_OPENED)) {
                BUG_ON(!error);
                put_filp(file);
@@ -3279,11 +3326,13 @@ struct file *do_filp_open(int dfd, struct filename *pathname,
        int flags = op->lookup_flags;
        struct file *filp;
 
-       filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_RCU);
+       set_nameidata(&nd, dfd, pathname);
+       filp = path_openat(&nd, op, flags | LOOKUP_RCU);
        if (unlikely(filp == ERR_PTR(-ECHILD)))
-               filp = path_openat(dfd, pathname, &nd, op, flags);
+               filp = path_openat(&nd, op, flags);
        if (unlikely(filp == ERR_PTR(-ESTALE)))
-               filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_REVAL);
+               filp = path_openat(&nd, op, flags | LOOKUP_REVAL);
+       restore_nameidata();
        return filp;
 }
 
@@ -3305,11 +3354,13 @@ struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt,
        if (unlikely(IS_ERR(filename)))
                return ERR_CAST(filename);
 
-       file = path_openat(-1, filename, &nd, op, flags | LOOKUP_RCU);
+       set_nameidata(&nd, -1, filename);
+       file = path_openat(&nd, op, flags | LOOKUP_RCU);
        if (unlikely(file == ERR_PTR(-ECHILD)))
-               file = path_openat(-1, filename, &nd, op, flags);
+               file = path_openat(&nd, op, flags);
        if (unlikely(file == ERR_PTR(-ESTALE)))
-               file = path_openat(-1, filename, &nd, op, flags | LOOKUP_REVAL);
+               file = path_openat(&nd, op, flags | LOOKUP_REVAL);
+       restore_nameidata();
        putname(filename);
        return file;
 }
@@ -3318,7 +3369,8 @@ static struct dentry *filename_create(int dfd, struct filename *name,
                                struct path *path, unsigned int lookup_flags)
 {
        struct dentry *dentry = ERR_PTR(-EEXIST);
-       struct nameidata nd;
+       struct qstr last;
+       int type;
        int err2;
        int error;
        bool is_dir = (lookup_flags & LOOKUP_DIRECTORY);
@@ -3329,26 +3381,25 @@ static struct dentry *filename_create(int dfd, struct filename *name,
         */
        lookup_flags &= LOOKUP_REVAL;
 
-       error = filename_lookup(dfd, name, LOOKUP_PARENT|lookup_flags, &nd);
-       if (error)
-               return ERR_PTR(error);
+       name = filename_parentat(dfd, name, lookup_flags, path, &last, &type);
+       if (IS_ERR(name))
+               return ERR_CAST(name);
 
        /*
         * Yucky last component or no last component at all?
         * (foo/., foo/.., /////)
         */
-       if (nd.last_type != LAST_NORM)
+       if (unlikely(type != LAST_NORM))
                goto out;
-       nd.flags &= ~LOOKUP_PARENT;
-       nd.flags |= LOOKUP_CREATE | LOOKUP_EXCL;
 
        /* don't fail immediately if it's r/o, at least try to report other errors */
-       err2 = mnt_want_write(nd.path.mnt);
+       err2 = mnt_want_write(path->mnt);
        /*
         * Do the final lookup.
         */
-       mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
-       dentry = lookup_hash(&nd);
+       lookup_flags |= LOOKUP_CREATE | LOOKUP_EXCL;
+       mutex_lock_nested(&path->dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+       dentry = __lookup_hash(&last, path->dentry, lookup_flags);
        if (IS_ERR(dentry))
                goto unlock;
 
@@ -3362,7 +3413,7 @@ static struct dentry *filename_create(int dfd, struct filename *name,
         * all is fine. Let's be bastards - you had / on the end, you've
         * been asking for (non-existent) directory. -ENOENT for you.
         */
-       if (unlikely(!is_dir && nd.last.name[nd.last.len])) {
+       if (unlikely(!is_dir && last.name[last.len])) {
                error = -ENOENT;
                goto fail;
        }
@@ -3370,31 +3421,26 @@ static struct dentry *filename_create(int dfd, struct filename *name,
                error = err2;
                goto fail;
        }
-       *path = nd.path;
+       putname(name);
        return dentry;
 fail:
        dput(dentry);
        dentry = ERR_PTR(error);
 unlock:
-       mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
+       mutex_unlock(&path->dentry->d_inode->i_mutex);
        if (!err2)
-               mnt_drop_write(nd.path.mnt);
+               mnt_drop_write(path->mnt);
 out:
-       path_put(&nd.path);
+       path_put(path);
+       putname(name);
        return dentry;
 }
 
 struct dentry *kern_path_create(int dfd, const char *pathname,
                                struct path *path, unsigned int lookup_flags)
 {
-       struct filename *filename = getname_kernel(pathname);
-       struct dentry *res;
-
-       if (IS_ERR(filename))
-               return ERR_CAST(filename);
-       res = filename_create(dfd, filename, path, lookup_flags);
-       putname(filename);
-       return res;
+       return filename_create(dfd, getname_kernel(pathname),
+                               path, lookup_flags);
 }
 EXPORT_SYMBOL(kern_path_create);
 
@@ -3407,16 +3453,10 @@ void done_path_create(struct path *path, struct dentry *dentry)
 }
 EXPORT_SYMBOL(done_path_create);
 
-struct dentry *user_path_create(int dfd, const char __user *pathname,
+inline struct dentry *user_path_create(int dfd, const char __user *pathname,
                                struct path *path, unsigned int lookup_flags)
 {
-       struct filename *tmp = getname(pathname);
-       struct dentry *res;
-       if (IS_ERR(tmp))
-               return ERR_CAST(tmp);
-       res = filename_create(dfd, tmp, path, lookup_flags);
-       putname(tmp);
-       return res;
+       return filename_create(dfd, getname(pathname), path, lookup_flags);
 }
 EXPORT_SYMBOL(user_path_create);
 
@@ -3637,14 +3677,17 @@ static long do_rmdir(int dfd, const char __user *pathname)
        int error = 0;
        struct filename *name;
        struct dentry *dentry;
-       struct nameidata nd;
+       struct path path;
+       struct qstr last;
+       int type;
        unsigned int lookup_flags = 0;
 retry:
-       name = user_path_parent(dfd, pathname, &nd, lookup_flags);
+       name = user_path_parent(dfd, pathname,
+                               &path, &last, &type, lookup_flags);
        if (IS_ERR(name))
                return PTR_ERR(name);
 
-       switch(nd.last_type) {
+       switch (type) {
        case LAST_DOTDOT:
                error = -ENOTEMPTY;
                goto exit1;
@@ -3656,13 +3699,12 @@ retry:
                goto exit1;
        }
 
-       nd.flags &= ~LOOKUP_PARENT;
-       error = mnt_want_write(nd.path.mnt);
+       error = mnt_want_write(path.mnt);
        if (error)
                goto exit1;
 
-       mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
-       dentry = lookup_hash(&nd);
+       mutex_lock_nested(&path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+       dentry = __lookup_hash(&last, path.dentry, lookup_flags);
        error = PTR_ERR(dentry);
        if (IS_ERR(dentry))
                goto exit2;
@@ -3670,17 +3712,17 @@ retry:
                error = -ENOENT;
                goto exit3;
        }
-       error = security_path_rmdir(&nd.path, dentry);
+       error = security_path_rmdir(&path, dentry);
        if (error)
                goto exit3;
-       error = vfs_rmdir(nd.path.dentry->d_inode, dentry);
+       error = vfs_rmdir(path.dentry->d_inode, dentry);
 exit3:
        dput(dentry);
 exit2:
-       mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
-       mnt_drop_write(nd.path.mnt);
+       mutex_unlock(&path.dentry->d_inode->i_mutex);
+       mnt_drop_write(path.mnt);
 exit1:
-       path_put(&nd.path);
+       path_put(&path);
        putname(name);
        if (retry_estale(error, lookup_flags)) {
                lookup_flags |= LOOKUP_REVAL;
@@ -3763,43 +3805,45 @@ static long do_unlinkat(int dfd, const char __user *pathname)
        int error;
        struct filename *name;
        struct dentry *dentry;
-       struct nameidata nd;
+       struct path path;
+       struct qstr last;
+       int type;
        struct inode *inode = NULL;
        struct inode *delegated_inode = NULL;
        unsigned int lookup_flags = 0;
 retry:
-       name = user_path_parent(dfd, pathname, &nd, lookup_flags);
+       name = user_path_parent(dfd, pathname,
+                               &path, &last, &type, lookup_flags);
        if (IS_ERR(name))
                return PTR_ERR(name);
 
        error = -EISDIR;
-       if (nd.last_type != LAST_NORM)
+       if (type != LAST_NORM)
                goto exit1;
 
-       nd.flags &= ~LOOKUP_PARENT;
-       error = mnt_want_write(nd.path.mnt);
+       error = mnt_want_write(path.mnt);
        if (error)
                goto exit1;
 retry_deleg:
-       mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
-       dentry = lookup_hash(&nd);
+       mutex_lock_nested(&path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+       dentry = __lookup_hash(&last, path.dentry, lookup_flags);
        error = PTR_ERR(dentry);
        if (!IS_ERR(dentry)) {
                /* Why not before? Because we want correct error value */
-               if (nd.last.name[nd.last.len])
+               if (last.name[last.len])
                        goto slashes;
                inode = dentry->d_inode;
                if (d_is_negative(dentry))
                        goto slashes;
                ihold(inode);
-               error = security_path_unlink(&nd.path, dentry);
+               error = security_path_unlink(&path, dentry);
                if (error)
                        goto exit2;
-               error = vfs_unlink(nd.path.dentry->d_inode, dentry, &delegated_inode);
+               error = vfs_unlink(path.dentry->d_inode, dentry, &delegated_inode);
 exit2:
                dput(dentry);
        }
-       mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
+       mutex_unlock(&path.dentry->d_inode->i_mutex);
        if (inode)
                iput(inode);    /* truncate the inode here */
        inode = NULL;
@@ -3808,9 +3852,9 @@ exit2:
                if (!error)
                        goto retry_deleg;
        }
-       mnt_drop_write(nd.path.mnt);
+       mnt_drop_write(path.mnt);
 exit1:
-       path_put(&nd.path);
+       path_put(&path);
        putname(name);
        if (retry_estale(error, lookup_flags)) {
                lookup_flags |= LOOKUP_REVAL;
@@ -4240,14 +4284,15 @@ EXPORT_SYMBOL(vfs_rename);
 SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname,
                int, newdfd, const char __user *, newname, unsigned int, flags)
 {
-       struct dentry *old_dir, *new_dir;
        struct dentry *old_dentry, *new_dentry;
        struct dentry *trap;
-       struct nameidata oldnd, newnd;
+       struct path old_path, new_path;
+       struct qstr old_last, new_last;
+       int old_type, new_type;
        struct inode *delegated_inode = NULL;
        struct filename *from;
        struct filename *to;
-       unsigned int lookup_flags = 0;
+       unsigned int lookup_flags = 0, target_flags = LOOKUP_RENAME_TARGET;
        bool should_retry = false;
        int error;
 
@@ -4261,47 +4306,45 @@ SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname,
        if ((flags & RENAME_WHITEOUT) && !capable(CAP_MKNOD))
                return -EPERM;
 
+       if (flags & RENAME_EXCHANGE)
+               target_flags = 0;
+
 retry:
-       from = user_path_parent(olddfd, oldname, &oldnd, lookup_flags);
+       from = user_path_parent(olddfd, oldname,
+                               &old_path, &old_last, &old_type, lookup_flags);
        if (IS_ERR(from)) {
                error = PTR_ERR(from);
                goto exit;
        }
 
-       to = user_path_parent(newdfd, newname, &newnd, lookup_flags);
+       to = user_path_parent(newdfd, newname,
+                               &new_path, &new_last, &new_type, lookup_flags);
        if (IS_ERR(to)) {
                error = PTR_ERR(to);
                goto exit1;
        }
 
        error = -EXDEV;
-       if (oldnd.path.mnt != newnd.path.mnt)
+       if (old_path.mnt != new_path.mnt)
                goto exit2;
 
-       old_dir = oldnd.path.dentry;
        error = -EBUSY;
-       if (oldnd.last_type != LAST_NORM)
+       if (old_type != LAST_NORM)
                goto exit2;
 
-       new_dir = newnd.path.dentry;
        if (flags & RENAME_NOREPLACE)
                error = -EEXIST;
-       if (newnd.last_type != LAST_NORM)
+       if (new_type != LAST_NORM)
                goto exit2;
 
-       error = mnt_want_write(oldnd.path.mnt);
+       error = mnt_want_write(old_path.mnt);
        if (error)
                goto exit2;
 
-       oldnd.flags &= ~LOOKUP_PARENT;
-       newnd.flags &= ~LOOKUP_PARENT;
-       if (!(flags & RENAME_EXCHANGE))
-               newnd.flags |= LOOKUP_RENAME_TARGET;
-
 retry_deleg:
-       trap = lock_rename(new_dir, old_dir);
+       trap = lock_rename(new_path.dentry, old_path.dentry);
 
-       old_dentry = lookup_hash(&oldnd);
+       old_dentry = __lookup_hash(&old_last, old_path.dentry, lookup_flags);
        error = PTR_ERR(old_dentry);
        if (IS_ERR(old_dentry))
                goto exit3;
@@ -4309,7 +4352,7 @@ retry_deleg:
        error = -ENOENT;
        if (d_is_negative(old_dentry))
                goto exit4;
-       new_dentry = lookup_hash(&newnd);
+       new_dentry = __lookup_hash(&new_last, new_path.dentry, lookup_flags | target_flags);
        error = PTR_ERR(new_dentry);
        if (IS_ERR(new_dentry))
                goto exit4;
@@ -4323,16 +4366,16 @@ retry_deleg:
 
                if (!d_is_dir(new_dentry)) {
                        error = -ENOTDIR;
-                       if (newnd.last.name[newnd.last.len])
+                       if (new_last.name[new_last.len])
                                goto exit5;
                }
        }
        /* unless the source is a directory trailing slashes give -ENOTDIR */
        if (!d_is_dir(old_dentry)) {
                error = -ENOTDIR;
-               if (oldnd.last.name[oldnd.last.len])
+               if (old_last.name[old_last.len])
                        goto exit5;
-               if (!(flags & RENAME_EXCHANGE) && newnd.last.name[newnd.last.len])
+               if (!(flags & RENAME_EXCHANGE) && new_last.name[new_last.len])
                        goto exit5;
        }
        /* source should not be ancestor of target */
@@ -4345,32 +4388,32 @@ retry_deleg:
        if (new_dentry == trap)
                goto exit5;
 
-       error = security_path_rename(&oldnd.path, old_dentry,
-                                    &newnd.path, new_dentry, flags);
+       error = security_path_rename(&old_path, old_dentry,
+                                    &new_path, new_dentry, flags);
        if (error)
                goto exit5;
-       error = vfs_rename(old_dir->d_inode, old_dentry,
-                          new_dir->d_inode, new_dentry,
+       error = vfs_rename(old_path.dentry->d_inode, old_dentry,
+                          new_path.dentry->d_inode, new_dentry,
                           &delegated_inode, flags);
 exit5:
        dput(new_dentry);
 exit4:
        dput(old_dentry);
 exit3:
-       unlock_rename(new_dir, old_dir);
+       unlock_rename(new_path.dentry, old_path.dentry);
        if (delegated_inode) {
                error = break_deleg_wait(&delegated_inode);
                if (!error)
                        goto retry_deleg;
        }
-       mnt_drop_write(oldnd.path.mnt);
+       mnt_drop_write(old_path.mnt);
 exit2:
        if (retry_estale(error, lookup_flags))
                should_retry = true;
-       path_put(&newnd.path);
+       path_put(&new_path);
        putname(to);
 exit1:
-       path_put(&oldnd.path);
+       path_put(&old_path);
        putname(from);
        if (should_retry) {
                should_retry = false;
@@ -4429,18 +4472,19 @@ EXPORT_SYMBOL(readlink_copy);
  */
 int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen)
 {
-       struct nameidata nd;
        void *cookie;
+       struct inode *inode = d_inode(dentry);
+       const char *link = inode->i_link;
        int res;
 
-       nd.depth = 0;
-       cookie = dentry->d_inode->i_op->follow_link(dentry, &nd);
-       if (IS_ERR(cookie))
-               return PTR_ERR(cookie);
-
-       res = readlink_copy(buffer, buflen, nd_get_link(&nd));
-       if (dentry->d_inode->i_op->put_link)
-               dentry->d_inode->i_op->put_link(dentry, &nd, cookie);
+       if (!link) {
+               link = inode->i_op->follow_link(dentry, &cookie);
+               if (IS_ERR(link))
+                       return PTR_ERR(link);
+       }
+       res = readlink_copy(buffer, buflen, link);
+       if (inode->i_op->put_link)
+               inode->i_op->put_link(inode, cookie);
        return res;
 }
 EXPORT_SYMBOL(generic_readlink);
@@ -4472,22 +4516,21 @@ int page_readlink(struct dentry *dentry, char __user *buffer, int buflen)
 }
 EXPORT_SYMBOL(page_readlink);
 
-void *page_follow_link_light(struct dentry *dentry, struct nameidata *nd)
+const char *page_follow_link_light(struct dentry *dentry, void **cookie)
 {
        struct page *page = NULL;
-       nd_set_link(nd, page_getlink(dentry, &page));
-       return page;
+       char *res = page_getlink(dentry, &page);
+       if (!IS_ERR(res))
+               *cookie = page;
+       return res;
 }
 EXPORT_SYMBOL(page_follow_link_light);
 
-void page_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
+void page_put_link(struct inode *unused, void *cookie)
 {
        struct page *page = cookie;
-
-       if (page) {
-               kunmap(page);
-               page_cache_release(page);
-       }
+       kunmap(page);
+       page_cache_release(page);
 }
 EXPORT_SYMBOL(page_put_link);
 
index 1f4f9da..9c1c43d 100644 (file)
@@ -590,24 +590,35 @@ static void delayed_free_vfsmnt(struct rcu_head *head)
 }
 
 /* call under rcu_read_lock */
-bool legitimize_mnt(struct vfsmount *bastard, unsigned seq)
+int __legitimize_mnt(struct vfsmount *bastard, unsigned seq)
 {
        struct mount *mnt;
        if (read_seqretry(&mount_lock, seq))
-               return false;
+               return 1;
        if (bastard == NULL)
-               return true;
+               return 0;
        mnt = real_mount(bastard);
        mnt_add_count(mnt, 1);
        if (likely(!read_seqretry(&mount_lock, seq)))
-               return true;
+               return 0;
        if (bastard->mnt_flags & MNT_SYNC_UMOUNT) {
                mnt_add_count(mnt, -1);
-               return false;
+               return 1;
+       }
+       return -1;
+}
+
+/* call under rcu_read_lock */
+bool legitimize_mnt(struct vfsmount *bastard, unsigned seq)
+{
+       int res = __legitimize_mnt(bastard, seq);
+       if (likely(!res))
+               return true;
+       if (unlikely(res < 0)) {
+               rcu_read_unlock();
+               mntput(bastard);
+               rcu_read_lock();
        }
-       rcu_read_unlock();
-       mntput(bastard);
-       rcu_read_lock();
        return false;
 }
 
@@ -3179,6 +3190,12 @@ bool fs_fully_visible(struct file_system_type *type)
                if (mnt->mnt.mnt_sb->s_type != type)
                        continue;
 
+               /* This mount is not fully visible if it's root directory
+                * is not the root directory of the filesystem.
+                */
+               if (mnt->mnt.mnt_root != mnt->mnt.mnt_sb->s_root)
+                       continue;
+
                /* This mount is not fully visible if there are any child mounts
                 * that cover anything except for empty directories.
                 */
index 45b35b9..55e1e3a 100644 (file)
@@ -38,6 +38,7 @@
 #include <linux/mm.h>
 #include <linux/delay.h>
 #include <linux/errno.h>
+#include <linux/file.h>
 #include <linux/string.h>
 #include <linux/ratelimit.h>
 #include <linux/printk.h>
@@ -5604,6 +5605,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
        p->server = server;
        atomic_inc(&lsp->ls_count);
        p->ctx = get_nfs_open_context(ctx);
+       get_file(fl->fl_file);
        memcpy(&p->fl, fl, sizeof(p->fl));
        return p;
 out_free_seqid:
@@ -5716,6 +5718,7 @@ static void nfs4_lock_release(void *calldata)
                nfs_free_seqid(data->arg.lock_seqid);
        nfs4_put_lock_state(data->lsp);
        put_nfs_open_context(data->ctx);
+       fput(data->fl.fl_file);
        kfree(data);
        dprintk("%s: done!\n", __func__);
 }
index 2d56200..b6de433 100644 (file)
@@ -20,7 +20,6 @@
 #include <linux/stat.h>
 #include <linux/mm.h>
 #include <linux/string.h>
-#include <linux/namei.h>
 
 /* Symlink caching in the page cache is even more simplistic
  * and straight-forward than readdir caching.
@@ -43,7 +42,7 @@ error:
        return -EIO;
 }
 
-static void *nfs_follow_link(struct dentry *dentry, struct nameidata *nd)
+static const char *nfs_follow_link(struct dentry *dentry, void **cookie)
 {
        struct inode *inode = d_inode(dentry);
        struct page *page;
@@ -51,19 +50,13 @@ static void *nfs_follow_link(struct dentry *dentry, struct nameidata *nd)
 
        err = ERR_PTR(nfs_revalidate_mapping(inode, inode->i_mapping));
        if (err)
-               goto read_failed;
+               return err;
        page = read_cache_page(&inode->i_data, 0,
                                (filler_t *)nfs_symlink_filler, inode);
-       if (IS_ERR(page)) {
-               err = page;
-               goto read_failed;
-       }
-       nd_set_link(nd, kmap(page));
-       return page;
-
-read_failed:
-       nd_set_link(nd, err);
-       return NULL;
+       if (IS_ERR(page))
+               return ERR_CAST(page);
+       *cookie = page;
+       return kmap(page);
 }
 
 /*
index d12a4be..dfc19f1 100644 (file)
@@ -1845,12 +1845,15 @@ int nfs_wb_all(struct inode *inode)
        trace_nfs_writeback_inode_enter(inode);
 
        ret = filemap_write_and_wait(inode->i_mapping);
-       if (!ret) {
-               ret = nfs_commit_inode(inode, FLUSH_SYNC);
-               if (!ret)
-                       pnfs_sync_inode(inode, true);
-       }
+       if (ret)
+               goto out;
+       ret = nfs_commit_inode(inode, FLUSH_SYNC);
+       if (ret < 0)
+               goto out;
+       pnfs_sync_inode(inode, true);
+       ret = 0;
 
+out:
        trace_nfs_writeback_inode_exit(inode, ret);
        return ret;
 }
index 03d647b..cdefaa3 100644 (file)
@@ -181,6 +181,17 @@ nfsd4_block_proc_layoutcommit(struct inode *inode,
 }
 
 const struct nfsd4_layout_ops bl_layout_ops = {
+       /*
+        * Pretend that we send notification to the client.  This is a blatant
+        * lie to force recent Linux clients to cache our device IDs.
+        * We rarely ever change the device ID, so the harm of leaking deviceids
+        * for a while isn't too bad.  Unfortunately RFC5661 is a complete mess
+        * in this regard, but I filed errata 4119 for this a while ago, and
+        * hopefully the Linux client will eventually start caching deviceids
+        * without this again.
+        */
+       .notify_types           =
+                       NOTIFY_DEVICEID4_DELETE | NOTIFY_DEVICEID4_CHANGE,
        .proc_getdeviceinfo     = nfsd4_block_proc_getdeviceinfo,
        .encode_getdeviceinfo   = nfsd4_block_encode_getdeviceinfo,
        .proc_layoutget         = nfsd4_block_proc_layoutget,
index 5827785..5694cfb 100644 (file)
@@ -224,7 +224,7 @@ static int nfs_cb_stat_to_errno(int status)
 }
 
 static int decode_cb_op_status(struct xdr_stream *xdr, enum nfs_opnum4 expected,
-                              enum nfsstat4 *status)
+                              int *status)
 {
        __be32 *p;
        u32 op;
@@ -235,7 +235,7 @@ static int decode_cb_op_status(struct xdr_stream *xdr, enum nfs_opnum4 expected,
        op = be32_to_cpup(p++);
        if (unlikely(op != expected))
                goto out_unexpected;
-       *status = be32_to_cpup(p);
+       *status = nfs_cb_stat_to_errno(be32_to_cpup(p));
        return 0;
 out_overflow:
        print_overflow_msg(__func__, xdr);
@@ -446,22 +446,16 @@ out_overflow:
 static int decode_cb_sequence4res(struct xdr_stream *xdr,
                                  struct nfsd4_callback *cb)
 {
-       enum nfsstat4 nfserr;
        int status;
 
        if (cb->cb_minorversion == 0)
                return 0;
 
-       status = decode_cb_op_status(xdr, OP_CB_SEQUENCE, &nfserr);
-       if (unlikely(status))
-               goto out;
-       if (unlikely(nfserr != NFS4_OK))
-               goto out_default;
-       status = decode_cb_sequence4resok(xdr, cb);
-out:
-       return status;
-out_default:
-       return nfs_cb_stat_to_errno(nfserr);
+       status = decode_cb_op_status(xdr, OP_CB_SEQUENCE, &cb->cb_status);
+       if (unlikely(status || cb->cb_status))
+               return status;
+
+       return decode_cb_sequence4resok(xdr, cb);
 }
 
 /*
@@ -524,26 +518,19 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
                                  struct nfsd4_callback *cb)
 {
        struct nfs4_cb_compound_hdr hdr;
-       enum nfsstat4 nfserr;
        int status;
 
        status = decode_cb_compound4res(xdr, &hdr);
        if (unlikely(status))
-               goto out;
+               return status;
 
        if (cb != NULL) {
                status = decode_cb_sequence4res(xdr, cb);
-               if (unlikely(status))
-                       goto out;
+               if (unlikely(status || cb->cb_status))
+                       return status;
        }
 
-       status = decode_cb_op_status(xdr, OP_CB_RECALL, &nfserr);
-       if (unlikely(status))
-               goto out;
-       if (unlikely(nfserr != NFS4_OK))
-               status = nfs_cb_stat_to_errno(nfserr);
-out:
-       return status;
+       return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status);
 }
 
 #ifdef CONFIG_NFSD_PNFS
@@ -621,24 +608,18 @@ static int nfs4_xdr_dec_cb_layout(struct rpc_rqst *rqstp,
                                  struct nfsd4_callback *cb)
 {
        struct nfs4_cb_compound_hdr hdr;
-       enum nfsstat4 nfserr;
        int status;
 
        status = decode_cb_compound4res(xdr, &hdr);
        if (unlikely(status))
-               goto out;
+               return status;
+
        if (cb) {
                status = decode_cb_sequence4res(xdr, cb);
-               if (unlikely(status))
-                       goto out;
+               if (unlikely(status || cb->cb_status))
+                       return status;
        }
-       status = decode_cb_op_status(xdr, OP_CB_LAYOUTRECALL, &nfserr);
-       if (unlikely(status))
-               goto out;
-       if (unlikely(nfserr != NFS4_OK))
-               status = nfs_cb_stat_to_errno(nfserr);
-out:
-       return status;
+       return decode_cb_op_status(xdr, OP_CB_LAYOUTRECALL, &cb->cb_status);
 }
 #endif /* CONFIG_NFSD_PNFS */
 
@@ -898,13 +879,6 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
                if (!nfsd41_cb_get_slot(clp, task))
                        return;
        }
-       spin_lock(&clp->cl_lock);
-       if (list_empty(&cb->cb_per_client)) {
-               /* This is the first call, not a restart */
-               cb->cb_done = false;
-               list_add(&cb->cb_per_client, &clp->cl_callbacks);
-       }
-       spin_unlock(&clp->cl_lock);
        rpc_call_start(task);
 }
 
@@ -918,22 +892,33 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
 
        if (clp->cl_minorversion) {
                /* No need for lock, access serialized in nfsd4_cb_prepare */
-               ++clp->cl_cb_session->se_cb_seq_nr;
+               if (!task->tk_status)
+                       ++clp->cl_cb_session->se_cb_seq_nr;
                clear_bit(0, &clp->cl_cb_slot_busy);
                rpc_wake_up_next(&clp->cl_cb_waitq);
                dprintk("%s: freed slot, new seqid=%d\n", __func__,
                        clp->cl_cb_session->se_cb_seq_nr);
        }
 
-       if (clp->cl_cb_client != task->tk_client) {
-               /* We're shutting down or changing cl_cb_client; leave
-                * it to nfsd4_process_cb_update to restart the call if
-                * necessary. */
+       /*
+        * If the backchannel connection was shut down while this
+        * task was queued, we need to resubmit it after setting up
+        * a new backchannel connection.
+        *
+        * Note that if we lost our callback connection permanently
+        * the submission code will error out, so we don't need to
+        * handle that case here.
+        */
+       if (task->tk_flags & RPC_TASK_KILLED) {
+               task->tk_status = 0;
+               cb->cb_need_restart = true;
                return;
        }
 
-       if (cb->cb_done)
-               return;
+       if (cb->cb_status) {
+               WARN_ON_ONCE(task->tk_status);
+               task->tk_status = cb->cb_status;
+       }
 
        switch (cb->cb_ops->done(cb, task)) {
        case 0:
@@ -949,21 +934,17 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
        default:
                BUG();
        }
-       cb->cb_done = true;
 }
 
 static void nfsd4_cb_release(void *calldata)
 {
        struct nfsd4_callback *cb = calldata;
-       struct nfs4_client *clp = cb->cb_clp;
-
-       if (cb->cb_done) {
-               spin_lock(&clp->cl_lock);
-               list_del(&cb->cb_per_client);
-               spin_unlock(&clp->cl_lock);
 
+       if (cb->cb_need_restart)
+               nfsd4_run_cb(cb);
+       else
                cb->cb_ops->release(cb);
-       }
+
 }
 
 static const struct rpc_call_ops nfsd4_cb_ops = {
@@ -1058,9 +1039,6 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
                nfsd4_mark_cb_down(clp, err);
                return;
        }
-       /* Yay, the callback channel's back! Restart any callbacks: */
-       list_for_each_entry(cb, &clp->cl_callbacks, cb_per_client)
-               queue_work(callback_wq, &cb->cb_work);
 }
 
 static void
@@ -1071,8 +1049,12 @@ nfsd4_run_cb_work(struct work_struct *work)
        struct nfs4_client *clp = cb->cb_clp;
        struct rpc_clnt *clnt;
 
-       if (cb->cb_ops && cb->cb_ops->prepare)
-               cb->cb_ops->prepare(cb);
+       if (cb->cb_need_restart) {
+               cb->cb_need_restart = false;
+       } else {
+               if (cb->cb_ops && cb->cb_ops->prepare)
+                       cb->cb_ops->prepare(cb);
+       }
 
        if (clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK)
                nfsd4_process_cb_update(cb);
@@ -1084,6 +1066,15 @@ nfsd4_run_cb_work(struct work_struct *work)
                        cb->cb_ops->release(cb);
                return;
        }
+
+       /*
+        * Don't send probe messages for 4.1 or later.
+        */
+       if (!cb->cb_ops && clp->cl_minorversion) {
+               clp->cl_cb_state = NFSD4_CB_UP;
+               return;
+       }
+
        cb->cb_msg.rpc_cred = clp->cl_cb_cred;
        rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN,
                        cb->cb_ops ? &nfsd4_cb_ops : &nfsd4_cb_probe_ops, cb);
@@ -1098,8 +1089,8 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
        cb->cb_msg.rpc_resp = cb;
        cb->cb_ops = ops;
        INIT_WORK(&cb->cb_work, nfsd4_run_cb_work);
-       INIT_LIST_HEAD(&cb->cb_per_client);
-       cb->cb_done = true;
+       cb->cb_status = 0;
+       cb->cb_need_restart = false;
 }
 
 void nfsd4_run_cb(struct nfsd4_callback *cb)
index 38f2d7a..039f9c8 100644 (file)
@@ -94,6 +94,7 @@ static struct kmem_cache *lockowner_slab;
 static struct kmem_cache *file_slab;
 static struct kmem_cache *stateid_slab;
 static struct kmem_cache *deleg_slab;
+static struct kmem_cache *odstate_slab;
 
 static void free_session(struct nfsd4_session *);
 
@@ -281,6 +282,7 @@ put_nfs4_file(struct nfs4_file *fi)
        if (atomic_dec_and_lock(&fi->fi_ref, &state_lock)) {
                hlist_del_rcu(&fi->fi_hash);
                spin_unlock(&state_lock);
+               WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate));
                WARN_ON_ONCE(!list_empty(&fi->fi_delegations));
                call_rcu(&fi->fi_rcu, nfsd4_free_file_rcu);
        }
@@ -471,6 +473,86 @@ static void nfs4_file_put_access(struct nfs4_file *fp, u32 access)
                __nfs4_file_put_access(fp, O_RDONLY);
 }
 
+/*
+ * Allocate a new open/delegation state counter. This is needed for
+ * pNFS for proper return on close semantics.
+ *
+ * Note that we only allocate it for pNFS-enabled exports, otherwise
+ * all pointers to struct nfs4_clnt_odstate are always NULL.
+ */
+static struct nfs4_clnt_odstate *
+alloc_clnt_odstate(struct nfs4_client *clp)
+{
+       struct nfs4_clnt_odstate *co;
+
+       co = kmem_cache_zalloc(odstate_slab, GFP_KERNEL);
+       if (co) {
+               co->co_client = clp;
+               atomic_set(&co->co_odcount, 1);
+       }
+       return co;
+}
+
+static void
+hash_clnt_odstate_locked(struct nfs4_clnt_odstate *co)
+{
+       struct nfs4_file *fp = co->co_file;
+
+       lockdep_assert_held(&fp->fi_lock);
+       list_add(&co->co_perfile, &fp->fi_clnt_odstate);
+}
+
+static inline void
+get_clnt_odstate(struct nfs4_clnt_odstate *co)
+{
+       if (co)
+               atomic_inc(&co->co_odcount);
+}
+
+static void
+put_clnt_odstate(struct nfs4_clnt_odstate *co)
+{
+       struct nfs4_file *fp;
+
+       if (!co)
+               return;
+
+       fp = co->co_file;
+       if (atomic_dec_and_lock(&co->co_odcount, &fp->fi_lock)) {
+               list_del(&co->co_perfile);
+               spin_unlock(&fp->fi_lock);
+
+               nfsd4_return_all_file_layouts(co->co_client, fp);
+               kmem_cache_free(odstate_slab, co);
+       }
+}
+
+static struct nfs4_clnt_odstate *
+find_or_hash_clnt_odstate(struct nfs4_file *fp, struct nfs4_clnt_odstate *new)
+{
+       struct nfs4_clnt_odstate *co;
+       struct nfs4_client *cl;
+
+       if (!new)
+               return NULL;
+
+       cl = new->co_client;
+
+       spin_lock(&fp->fi_lock);
+       list_for_each_entry(co, &fp->fi_clnt_odstate, co_perfile) {
+               if (co->co_client == cl) {
+                       get_clnt_odstate(co);
+                       goto out;
+               }
+       }
+       co = new;
+       co->co_file = fp;
+       hash_clnt_odstate_locked(new);
+out:
+       spin_unlock(&fp->fi_lock);
+       return co;
+}
+
 struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl,
                                         struct kmem_cache *slab)
 {
@@ -606,7 +688,8 @@ static void block_delegations(struct knfsd_fh *fh)
 }
 
 static struct nfs4_delegation *
-alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh)
+alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh,
+                struct nfs4_clnt_odstate *odstate)
 {
        struct nfs4_delegation *dp;
        long n;
@@ -631,6 +714,8 @@ alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh)
        INIT_LIST_HEAD(&dp->dl_perfile);
        INIT_LIST_HEAD(&dp->dl_perclnt);
        INIT_LIST_HEAD(&dp->dl_recall_lru);
+       dp->dl_clnt_odstate = odstate;
+       get_clnt_odstate(odstate);
        dp->dl_type = NFS4_OPEN_DELEGATE_READ;
        dp->dl_retries = 1;
        nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client,
@@ -714,6 +799,7 @@ static void destroy_delegation(struct nfs4_delegation *dp)
        spin_lock(&state_lock);
        unhash_delegation_locked(dp);
        spin_unlock(&state_lock);
+       put_clnt_odstate(dp->dl_clnt_odstate);
        nfs4_put_deleg_lease(dp->dl_stid.sc_file);
        nfs4_put_stid(&dp->dl_stid);
 }
@@ -724,6 +810,7 @@ static void revoke_delegation(struct nfs4_delegation *dp)
 
        WARN_ON(!list_empty(&dp->dl_recall_lru));
 
+       put_clnt_odstate(dp->dl_clnt_odstate);
        nfs4_put_deleg_lease(dp->dl_stid.sc_file);
 
        if (clp->cl_minorversion == 0)
@@ -933,6 +1020,7 @@ static void nfs4_free_ol_stateid(struct nfs4_stid *stid)
 {
        struct nfs4_ol_stateid *stp = openlockstateid(stid);
 
+       put_clnt_odstate(stp->st_clnt_odstate);
        release_all_access(stp);
        if (stp->st_stateowner)
                nfs4_put_stateowner(stp->st_stateowner);
@@ -1538,7 +1626,6 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
        INIT_LIST_HEAD(&clp->cl_openowners);
        INIT_LIST_HEAD(&clp->cl_delegations);
        INIT_LIST_HEAD(&clp->cl_lru);
-       INIT_LIST_HEAD(&clp->cl_callbacks);
        INIT_LIST_HEAD(&clp->cl_revoked);
 #ifdef CONFIG_NFSD_PNFS
        INIT_LIST_HEAD(&clp->cl_lo_states);
@@ -1634,6 +1721,7 @@ __destroy_client(struct nfs4_client *clp)
        while (!list_empty(&reaplist)) {
                dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru);
                list_del_init(&dp->dl_recall_lru);
+               put_clnt_odstate(dp->dl_clnt_odstate);
                nfs4_put_deleg_lease(dp->dl_stid.sc_file);
                nfs4_put_stid(&dp->dl_stid);
        }
@@ -3057,6 +3145,7 @@ static void nfsd4_init_file(struct knfsd_fh *fh, unsigned int hashval,
        spin_lock_init(&fp->fi_lock);
        INIT_LIST_HEAD(&fp->fi_stateids);
        INIT_LIST_HEAD(&fp->fi_delegations);
+       INIT_LIST_HEAD(&fp->fi_clnt_odstate);
        fh_copy_shallow(&fp->fi_fhandle, fh);
        fp->fi_deleg_file = NULL;
        fp->fi_had_conflict = false;
@@ -3073,6 +3162,7 @@ static void nfsd4_init_file(struct knfsd_fh *fh, unsigned int hashval,
 void
 nfsd4_free_slabs(void)
 {
+       kmem_cache_destroy(odstate_slab);
        kmem_cache_destroy(openowner_slab);
        kmem_cache_destroy(lockowner_slab);
        kmem_cache_destroy(file_slab);
@@ -3103,8 +3193,14 @@ nfsd4_init_slabs(void)
                        sizeof(struct nfs4_delegation), 0, 0, NULL);
        if (deleg_slab == NULL)
                goto out_free_stateid_slab;
+       odstate_slab = kmem_cache_create("nfsd4_odstate",
+                       sizeof(struct nfs4_clnt_odstate), 0, 0, NULL);
+       if (odstate_slab == NULL)
+               goto out_free_deleg_slab;
        return 0;
 
+out_free_deleg_slab:
+       kmem_cache_destroy(deleg_slab);
 out_free_stateid_slab:
        kmem_cache_destroy(stateid_slab);
 out_free_file_slab:
@@ -3581,6 +3677,14 @@ alloc_stateid:
        open->op_stp = nfs4_alloc_open_stateid(clp);
        if (!open->op_stp)
                return nfserr_jukebox;
+
+       if (nfsd4_has_session(cstate) &&
+           (cstate->current_fh.fh_export->ex_flags & NFSEXP_PNFS)) {
+               open->op_odstate = alloc_clnt_odstate(clp);
+               if (!open->op_odstate)
+                       return nfserr_jukebox;
+       }
+
        return nfs_ok;
 }
 
@@ -3869,7 +3973,7 @@ out_fput:
 
 static struct nfs4_delegation *
 nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
-                   struct nfs4_file *fp)
+                   struct nfs4_file *fp, struct nfs4_clnt_odstate *odstate)
 {
        int status;
        struct nfs4_delegation *dp;
@@ -3877,7 +3981,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
        if (fp->fi_had_conflict)
                return ERR_PTR(-EAGAIN);
 
-       dp = alloc_init_deleg(clp, fh);
+       dp = alloc_init_deleg(clp, fh, odstate);
        if (!dp)
                return ERR_PTR(-ENOMEM);
 
@@ -3903,6 +4007,7 @@ out_unlock:
        spin_unlock(&state_lock);
 out:
        if (status) {
+               put_clnt_odstate(dp->dl_clnt_odstate);
                nfs4_put_stid(&dp->dl_stid);
                return ERR_PTR(status);
        }
@@ -3980,7 +4085,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open,
                default:
                        goto out_no_deleg;
        }
-       dp = nfs4_set_delegation(clp, fh, stp->st_stid.sc_file);
+       dp = nfs4_set_delegation(clp, fh, stp->st_stid.sc_file, stp->st_clnt_odstate);
        if (IS_ERR(dp))
                goto out_no_deleg;
 
@@ -4069,6 +4174,11 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
                        release_open_stateid(stp);
                        goto out;
                }
+
+               stp->st_clnt_odstate = find_or_hash_clnt_odstate(fp,
+                                                       open->op_odstate);
+               if (stp->st_clnt_odstate == open->op_odstate)
+                       open->op_odstate = NULL;
        }
        update_stateid(&stp->st_stid.sc_stateid);
        memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
@@ -4129,6 +4239,8 @@ void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate,
                kmem_cache_free(file_slab, open->op_file);
        if (open->op_stp)
                nfs4_put_stid(&open->op_stp->st_stid);
+       if (open->op_odstate)
+               kmem_cache_free(odstate_slab, open->op_odstate);
 }
 
 __be32
@@ -4385,10 +4497,17 @@ static __be32 check_stateid_generation(stateid_t *in, stateid_t *ref, bool has_s
        return nfserr_old_stateid;
 }
 
+static __be32 nfsd4_check_openowner_confirmed(struct nfs4_ol_stateid *ols)
+{
+       if (ols->st_stateowner->so_is_open_owner &&
+           !(openowner(ols->st_stateowner)->oo_flags & NFS4_OO_CONFIRMED))
+               return nfserr_bad_stateid;
+       return nfs_ok;
+}
+
 static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
 {
        struct nfs4_stid *s;
-       struct nfs4_ol_stateid *ols;
        __be32 status = nfserr_bad_stateid;
 
        if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
@@ -4418,13 +4537,7 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
                break;
        case NFS4_OPEN_STID:
        case NFS4_LOCK_STID:
-               ols = openlockstateid(s);
-               if (ols->st_stateowner->so_is_open_owner
-                               && !(openowner(ols->st_stateowner)->oo_flags
-                                               & NFS4_OO_CONFIRMED))
-                       status = nfserr_bad_stateid;
-               else
-                       status = nfs_ok;
+               status = nfsd4_check_openowner_confirmed(openlockstateid(s));
                break;
        default:
                printk("unknown stateid type %x\n", s->sc_type);
@@ -4516,8 +4629,8 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate,
                status = nfs4_check_fh(current_fh, stp);
                if (status)
                        goto out;
-               if (stp->st_stateowner->so_is_open_owner
-                   && !(openowner(stp->st_stateowner)->oo_flags & NFS4_OO_CONFIRMED))
+               status = nfsd4_check_openowner_confirmed(stp);
+               if (status)
                        goto out;
                status = nfs4_check_openmode(stp, flags);
                if (status)
@@ -4852,9 +4965,6 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        update_stateid(&stp->st_stid.sc_stateid);
        memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
 
-       nfsd4_return_all_file_layouts(stp->st_stateowner->so_client,
-                                     stp->st_stid.sc_file);
-
        nfsd4_close_open_stateid(stp);
 
        /* put reference from nfs4_preprocess_seqid_op */
@@ -6488,6 +6598,7 @@ nfs4_state_shutdown_net(struct net *net)
        list_for_each_safe(pos, next, &reaplist) {
                dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
                list_del_init(&dp->dl_recall_lru);
+               put_clnt_odstate(dp->dl_clnt_odstate);
                nfs4_put_deleg_lease(dp->dl_stid.sc_file);
                nfs4_put_stid(&dp->dl_stid);
        }
index 4f3bfeb..dbc4f85 100644 (file)
@@ -63,12 +63,12 @@ typedef struct {
 
 struct nfsd4_callback {
        struct nfs4_client *cb_clp;
-       struct list_head cb_per_client;
        u32 cb_minorversion;
        struct rpc_message cb_msg;
        struct nfsd4_callback_ops *cb_ops;
        struct work_struct cb_work;
-       bool cb_done;
+       int cb_status;
+       bool cb_need_restart;
 };
 
 struct nfsd4_callback_ops {
@@ -126,6 +126,7 @@ struct nfs4_delegation {
        struct list_head        dl_perfile;
        struct list_head        dl_perclnt;
        struct list_head        dl_recall_lru;  /* delegation recalled */
+       struct nfs4_clnt_odstate *dl_clnt_odstate;
        u32                     dl_type;
        time_t                  dl_time;
 /* For recall: */
@@ -332,7 +333,6 @@ struct nfs4_client {
        int                     cl_cb_state;
        struct nfsd4_callback   cl_cb_null;
        struct nfsd4_session    *cl_cb_session;
-       struct list_head        cl_callbacks; /* list of in-progress callbacks */
 
        /* for all client information that callback code might need: */
        spinlock_t              cl_lock;
@@ -465,6 +465,17 @@ static inline struct nfs4_lockowner * lockowner(struct nfs4_stateowner *so)
 }
 
 /*
+ * Per-client state indicating no. of opens and outstanding delegations
+ * on a file from a particular client.'od' stands for 'open & delegation'
+ */
+struct nfs4_clnt_odstate {
+       struct nfs4_client      *co_client;
+       struct nfs4_file        *co_file;
+       struct list_head        co_perfile;
+       atomic_t                co_odcount;
+};
+
+/*
  * nfs4_file: a file opened by some number of (open) nfs4_stateowners.
  *
  * These objects are global. nfsd keeps one instance of a nfs4_file per
@@ -485,6 +496,7 @@ struct nfs4_file {
                struct list_head        fi_delegations;
                struct rcu_head         fi_rcu;
        };
+       struct list_head        fi_clnt_odstate;
        /* One each for O_RDONLY, O_WRONLY, O_RDWR: */
        struct file *           fi_fds[3];
        /*
@@ -526,6 +538,7 @@ struct nfs4_ol_stateid {
        struct list_head              st_perstateowner;
        struct list_head              st_locks;
        struct nfs4_stateowner      * st_stateowner;
+       struct nfs4_clnt_odstate    * st_clnt_odstate;
        unsigned char                 st_access_bmap;
        unsigned char                 st_deny_bmap;
        struct nfs4_ol_stateid         * st_openstp;
index f982ae8..2f8c092 100644 (file)
@@ -247,6 +247,7 @@ struct nfsd4_open {
        struct nfs4_openowner *op_openowner; /* used during processing */
        struct nfs4_file *op_file;          /* used during processing */
        struct nfs4_ol_stateid *op_stp;     /* used during processing */
+       struct nfs4_clnt_odstate *op_odstate; /* used during processing */
        struct nfs4_acl *op_acl;
        struct xdr_netobj op_label;
 };
index 0f35b80..443abec 100644 (file)
@@ -35,7 +35,7 @@
  * ntfs_lookup - find the inode represented by a dentry in a directory inode
  * @dir_ino:   directory inode in which to look for the inode
  * @dent:      dentry representing the inode to look for
- * @nd:                lookup nameidata
+ * @flags:     lookup flags
  *
  * In short, ntfs_lookup() looks for the inode represented by the dentry @dent
  * in the directory inode @dir_ino and if found attaches the inode to the
index 0822345..83f4e76 100644 (file)
@@ -159,7 +159,7 @@ int omfs_allocate_range(struct super_block *sb,
        goto out;
 
 found:
-       *return_block = i * bits_per_entry + bit;
+       *return_block = (u64) i * bits_per_entry + bit;
        *return_size = run;
        ret = set_run(sb, i, bits_per_entry, bit, run, 1);
 
index 138321b..3d935c8 100644 (file)
@@ -306,7 +306,8 @@ static const struct super_operations omfs_sops = {
  */
 static int omfs_get_imap(struct super_block *sb)
 {
-       unsigned int bitmap_size, count, array_size;
+       unsigned int bitmap_size, array_size;
+       int count;
        struct omfs_sb_info *sbi = OMFS_SB(sb);
        struct buffer_head *bh;
        unsigned long **ptr;
@@ -359,7 +360,7 @@ nomem:
 }
 
 enum {
-       Opt_uid, Opt_gid, Opt_umask, Opt_dmask, Opt_fmask
+       Opt_uid, Opt_gid, Opt_umask, Opt_dmask, Opt_fmask, Opt_err
 };
 
 static const match_table_t tokens = {
@@ -368,6 +369,7 @@ static const match_table_t tokens = {
        {Opt_umask, "umask=%o"},
        {Opt_dmask, "dmask=%o"},
        {Opt_fmask, "fmask=%o"},
+       {Opt_err, NULL},
 };
 
 static int parse_options(char *options, struct omfs_sb_info *sbi)
@@ -548,8 +550,10 @@ static int omfs_fill_super(struct super_block *sb, void *data, int silent)
        }
 
        sb->s_root = d_make_root(root);
-       if (!sb->s_root)
+       if (!sb->s_root) {
+               ret = -ENOMEM;
                goto out_brelse_bh2;
+       }
        printk(KERN_DEBUG "omfs: Mounted volume %s\n", omfs_rb->r_name);
 
        ret = 0;
index 98e5a52..e0250bd 100644 (file)
--- a/fs/open.c
+++ b/fs/open.c
@@ -367,7 +367,7 @@ retry:
        if (res)
                goto out;
 
-       inode = path.dentry->d_inode;
+       inode = d_backing_inode(path.dentry);
 
        if ((mode & MAY_EXEC) && S_ISREG(inode->i_mode)) {
                /*
index 24f6404..84d693d 100644 (file)
@@ -299,6 +299,9 @@ int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
        struct cred *override_cred;
        char *link = NULL;
 
+       if (WARN_ON(!workdir))
+               return -EROFS;
+
        ovl_path_upper(parent, &parentpath);
        upperdir = parentpath.dentry;
 
index d139405..692ceda 100644 (file)
@@ -222,6 +222,9 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry,
        struct kstat stat;
        int err;
 
+       if (WARN_ON(!workdir))
+               return ERR_PTR(-EROFS);
+
        err = ovl_lock_rename_workdir(workdir, upperdir);
        if (err)
                goto out;
@@ -322,6 +325,9 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
        struct dentry *newdentry;
        int err;
 
+       if (WARN_ON(!workdir))
+               return -EROFS;
+
        err = ovl_lock_rename_workdir(workdir, upperdir);
        if (err)
                goto out;
@@ -506,11 +512,28 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir)
        struct dentry *opaquedir = NULL;
        int err;
 
-       if (is_dir && OVL_TYPE_MERGE_OR_LOWER(ovl_path_type(dentry))) {
-               opaquedir = ovl_check_empty_and_clear(dentry);
-               err = PTR_ERR(opaquedir);
-               if (IS_ERR(opaquedir))
-                       goto out;
+       if (WARN_ON(!workdir))
+               return -EROFS;
+
+       if (is_dir) {
+               if (OVL_TYPE_MERGE_OR_LOWER(ovl_path_type(dentry))) {
+                       opaquedir = ovl_check_empty_and_clear(dentry);
+                       err = PTR_ERR(opaquedir);
+                       if (IS_ERR(opaquedir))
+                               goto out;
+               } else {
+                       LIST_HEAD(list);
+
+                       /*
+                        * When removing an empty opaque directory, then it
+                        * makes no sense to replace it with an exact replica of
+                        * itself.  But emptiness still needs to be checked.
+                        */
+                       err = ovl_check_empty_dir(dentry, &list);
+                       ovl_cache_free(&list);
+                       if (err)
+                               goto out;
+               }
        }
 
        err = ovl_lock_rename_workdir(workdir, upperdir);
index 04f1248..308379b 100644 (file)
@@ -140,11 +140,12 @@ struct ovl_link_data {
        void *cookie;
 };
 
-static void *ovl_follow_link(struct dentry *dentry, struct nameidata *nd)
+static const char *ovl_follow_link(struct dentry *dentry, void **cookie)
 {
-       void *ret;
        struct dentry *realdentry;
        struct inode *realinode;
+       struct ovl_link_data *data = NULL;
+       const char *ret;
 
        realdentry = ovl_dentry_real(dentry);
        realinode = realdentry->d_inode;
@@ -152,28 +153,28 @@ static void *ovl_follow_link(struct dentry *dentry, struct nameidata *nd)
        if (WARN_ON(!realinode->i_op->follow_link))
                return ERR_PTR(-EPERM);
 
-       ret = realinode->i_op->follow_link(realdentry, nd);
-       if (IS_ERR(ret))
-               return ret;
-
        if (realinode->i_op->put_link) {
-               struct ovl_link_data *data;
-
                data = kmalloc(sizeof(struct ovl_link_data), GFP_KERNEL);
-               if (!data) {
-                       realinode->i_op->put_link(realdentry, nd, ret);
+               if (!data)
                        return ERR_PTR(-ENOMEM);
-               }
                data->realdentry = realdentry;
-               data->cookie = ret;
+       }
 
-               return data;
-       } else {
-               return NULL;
+       ret = realinode->i_op->follow_link(realdentry, cookie);
+       if (IS_ERR_OR_NULL(ret)) {
+               kfree(data);
+               return ret;
        }
+
+       if (data)
+               data->cookie = *cookie;
+
+       *cookie = data;
+
+       return ret;
 }
 
-static void ovl_put_link(struct dentry *dentry, struct nameidata *nd, void *c)
+static void ovl_put_link(struct inode *unused, void *c)
 {
        struct inode *realinode;
        struct ovl_link_data *data = c;
@@ -182,7 +183,7 @@ static void ovl_put_link(struct dentry *dentry, struct nameidata *nd, void *c)
                return;
 
        realinode = data->realdentry->d_inode;
-       realinode->i_op->put_link(data->realdentry, nd, data->cookie);
+       realinode->i_op->put_link(realinode, data->cookie);
        kfree(data);
 }
 
index 5f0d199..bf8537c 100644 (file)
@@ -529,7 +529,7 @@ static int ovl_remount(struct super_block *sb, int *flags, char *data)
 {
        struct ovl_fs *ufs = sb->s_fs_info;
 
-       if (!(*flags & MS_RDONLY) && !ufs->upper_mnt)
+       if (!(*flags & MS_RDONLY) && (!ufs->upper_mnt || !ufs->workdir))
                return -EROFS;
 
        return 0;
@@ -925,9 +925,10 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
                ufs->workdir = ovl_workdir_create(ufs->upper_mnt, workpath.dentry);
                err = PTR_ERR(ufs->workdir);
                if (IS_ERR(ufs->workdir)) {
-                       pr_err("overlayfs: failed to create directory %s/%s\n",
-                              ufs->config.workdir, OVL_WORKDIR_NAME);
-                       goto out_put_upper_mnt;
+                       pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n",
+                               ufs->config.workdir, OVL_WORKDIR_NAME, -err);
+                       sb->s_flags |= MS_RDONLY;
+                       ufs->workdir = NULL;
                }
        }
 
@@ -997,7 +998,6 @@ out_put_lower_mnt:
        kfree(ufs->lower_mnt);
 out_put_workdir:
        dput(ufs->workdir);
-out_put_upper_mnt:
        mntput(ufs->upper_mnt);
 out_put_lowerpath:
        for (i = 0; i < numlower; i++)
index 093ca14..286a422 100644 (file)
@@ -1380,7 +1380,7 @@ static int proc_exe_link(struct dentry *dentry, struct path *exe_path)
                return -ENOENT;
 }
 
-static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
+static const char *proc_pid_follow_link(struct dentry *dentry, void **cookie)
 {
        struct inode *inode = d_inode(dentry);
        struct path path;
@@ -1394,7 +1394,7 @@ static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
        if (error)
                goto out;
 
-       nd_jump_link(nd, &path);
+       nd_jump_link(&path);
        return NULL;
 out:
        return ERR_PTR(error);
index 8272aab..afe232b 100644 (file)
@@ -23,7 +23,6 @@
 #include <linux/slab.h>
 #include <linux/mount.h>
 #include <linux/magic.h>
-#include <linux/namei.h>
 
 #include <asm/uaccess.h>
 
@@ -394,16 +393,16 @@ static const struct file_operations proc_reg_file_ops_no_compat = {
 };
 #endif
 
-static void *proc_follow_link(struct dentry *dentry, struct nameidata *nd)
+static const char *proc_follow_link(struct dentry *dentry, void **cookie)
 {
        struct proc_dir_entry *pde = PDE(d_inode(dentry));
        if (unlikely(!use_pde(pde)))
                return ERR_PTR(-EINVAL);
-       nd_set_link(nd, pde->data);
-       return pde;
+       *cookie = pde;
+       return pde->data;
 }
 
-static void proc_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
+static void proc_put_link(struct inode *unused, void *p)
 {
        unuse_pde(p);
 }
index e512642..f6e8354 100644 (file)
@@ -30,7 +30,7 @@ static const struct proc_ns_operations *ns_entries[] = {
        &mntns_operations,
 };
 
-static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd)
+static const char *proc_ns_follow_link(struct dentry *dentry, void **cookie)
 {
        struct inode *inode = d_inode(dentry);
        const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops;
@@ -45,7 +45,7 @@ static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd)
        if (ptrace_may_access(task, PTRACE_MODE_READ)) {
                error = ns_get_path(&ns_path, task, ns_ops);
                if (!error)
-                       nd_jump_link(nd, &ns_path);
+                       nd_jump_link(&ns_path);
        }
        put_task_struct(task);
        return error;
index 6195b4a..113b8d0 100644 (file)
@@ -1,5 +1,4 @@
 #include <linux/sched.h>
-#include <linux/namei.h>
 #include <linux/slab.h>
 #include <linux/pid_namespace.h>
 #include "internal.h"
@@ -19,21 +18,20 @@ static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
        return readlink_copy(buffer, buflen, tmp);
 }
 
-static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
+static const char *proc_self_follow_link(struct dentry *dentry, void **cookie)
 {
        struct pid_namespace *ns = dentry->d_sb->s_fs_info;
        pid_t tgid = task_tgid_nr_ns(current, ns);
-       char *name = ERR_PTR(-ENOENT);
-       if (tgid) {
-               /* 11 for max length of signed int in decimal + NULL term */
-               name = kmalloc(12, GFP_KERNEL);
-               if (!name)
-                       name = ERR_PTR(-ENOMEM);
-               else
-                       sprintf(name, "%d", tgid);
-       }
-       nd_set_link(nd, name);
-       return NULL;
+       char *name;
+
+       if (!tgid)
+               return ERR_PTR(-ENOENT);
+       /* 11 for max length of signed int in decimal + NULL term */
+       name = kmalloc(12, GFP_KERNEL);
+       if (!name)
+               return ERR_PTR(-ENOMEM);
+       sprintf(name, "%d", tgid);
+       return *cookie = name;
 }
 
 static const struct inode_operations proc_self_inode_operations = {
index a837199..947b0f4 100644 (file)
@@ -1,5 +1,4 @@
 #include <linux/sched.h>
-#include <linux/namei.h>
 #include <linux/slab.h>
 #include <linux/pid_namespace.h>
 #include "internal.h"
@@ -20,21 +19,20 @@ static int proc_thread_self_readlink(struct dentry *dentry, char __user *buffer,
        return readlink_copy(buffer, buflen, tmp);
 }
 
-static void *proc_thread_self_follow_link(struct dentry *dentry, struct nameidata *nd)
+static const char *proc_thread_self_follow_link(struct dentry *dentry, void **cookie)
 {
        struct pid_namespace *ns = dentry->d_sb->s_fs_info;
        pid_t tgid = task_tgid_nr_ns(current, ns);
        pid_t pid = task_pid_nr_ns(current, ns);
-       char *name = ERR_PTR(-ENOENT);
-       if (pid) {
-               name = kmalloc(PROC_NUMBUF + 6 + PROC_NUMBUF, GFP_KERNEL);
-               if (!name)
-                       name = ERR_PTR(-ENOMEM);
-               else
-                       sprintf(name, "%d/task/%d", tgid, pid);
-       }
-       nd_set_link(nd, name);
-       return NULL;
+       char *name;
+
+       if (!pid)
+               return ERR_PTR(-ENOENT);
+       name = kmalloc(PROC_NUMBUF + 6 + PROC_NUMBUF, GFP_KERNEL);
+       if (!name)
+               return ERR_PTR(-ENOMEM);
+       sprintf(name, "%d/task/%d", tgid, pid);
+       return *cookie = name;
 }
 
 static const struct inode_operations proc_thread_self_inode_operations = {
index f684c75..0155473 100644 (file)
@@ -189,7 +189,7 @@ static int __pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
         * doesn't imply write barrier and the users expect write
         * barrier semantics on wakeup functions.  The following
         * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up()
-        * and is paired with set_mb() in poll_schedule_timeout.
+        * and is paired with smp_store_mb() in poll_schedule_timeout.
         */
        smp_wmb();
        pwq->triggered = 1;
@@ -244,7 +244,7 @@ int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
        /*
         * Prepare for the next iteration.
         *
-        * The following set_mb() serves two purposes.  First, it's
+        * The following smp_store_mb() serves two purposes.  First, it's
         * the counterpart rmb of the wmb in pollwake() such that data
         * written before wake up is always visible after wake up.
         * Second, the full barrier guarantees that triggered clearing
@@ -252,7 +252,7 @@ int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
         * this problem doesn't exist for the first iteration as
         * add_wait_queue() has full barrier semantics.
         */
-       set_mb(pwq->triggered, 0);
+       smp_store_mb(pwq->triggered, 0);
 
        return rc;
 }
index 476024b..bfe62ae 100644 (file)
@@ -1161,7 +1161,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
        long ret, bytes;
        umode_t i_mode;
        size_t len;
-       int i, flags;
+       int i, flags, more;
 
        /*
         * We require the input being a regular file, as we don't want to
@@ -1204,6 +1204,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
         * Don't block on output, we have to drain the direct pipe.
         */
        sd->flags &= ~SPLICE_F_NONBLOCK;
+       more = sd->flags & SPLICE_F_MORE;
 
        while (len) {
                size_t read_len;
@@ -1217,6 +1218,15 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
                sd->total_len = read_len;
 
                /*
+                * If more data is pending, set SPLICE_F_MORE
+                * If this is the last data and SPLICE_F_MORE was not set
+                * initially, clears it.
+                */
+               if (read_len < len)
+                       sd->flags |= SPLICE_F_MORE;
+               else if (!more)
+                       sd->flags &= ~SPLICE_F_MORE;
+               /*
                 * NOTE: nonblocking mode only applies to the input. We
                 * must not do the output in nonblocking mode as then we
                 * could get stuck data in the internal pipe:
index 3591f9d..7a75e70 100644 (file)
@@ -5,4 +5,4 @@
 obj-$(CONFIG_SYSV_FS) += sysv.o
 
 sysv-objs := ialloc.o balloc.o inode.o itree.o file.o dir.o \
-            namei.o super.o symlink.o
+            namei.o super.o
index 8895630..590ad92 100644 (file)
@@ -166,8 +166,9 @@ void sysv_set_inode(struct inode *inode, dev_t rdev)
                        inode->i_op = &sysv_symlink_inode_operations;
                        inode->i_mapping->a_ops = &sysv_aops;
                } else {
-                       inode->i_op = &sysv_fast_symlink_inode_operations;
-                       nd_terminate_link(SYSV_I(inode)->i_data, inode->i_size,
+                       inode->i_op = &simple_symlink_inode_operations;
+                       inode->i_link = (char *)SYSV_I(inode)->i_data;
+                       nd_terminate_link(inode->i_link, inode->i_size,
                                sizeof(SYSV_I(inode)->i_data) - 1);
                }
        } else
diff --git a/fs/sysv/symlink.c b/fs/sysv/symlink.c
deleted file mode 100644 (file)
index d3fa0d7..0000000
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- *  linux/fs/sysv/symlink.c
- *
- *  Handling of System V filesystem fast symlinks extensions.
- *  Aug 2001, Christoph Hellwig (hch@infradead.org)
- */
-
-#include "sysv.h"
-#include <linux/namei.h>
-
-static void *sysv_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
-       nd_set_link(nd, (char *)SYSV_I(d_inode(dentry))->i_data);
-       return NULL;
-}
-
-const struct inode_operations sysv_fast_symlink_inode_operations = {
-       .readlink       = generic_readlink,
-       .follow_link    = sysv_follow_link,
-};
index 69d4889..2c13525 100644 (file)
@@ -161,7 +161,6 @@ extern ino_t sysv_inode_by_name(struct dentry *);
 
 extern const struct inode_operations sysv_file_inode_operations;
 extern const struct inode_operations sysv_dir_inode_operations;
-extern const struct inode_operations sysv_fast_symlink_inode_operations;
 extern const struct file_operations sysv_file_operations;
 extern const struct file_operations sysv_dir_operations;
 extern const struct address_space_operations sysv_aops;
index 27060fc..5c27c66 100644 (file)
@@ -889,6 +889,7 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry,
 
        memcpy(ui->data, symname, len);
        ((char *)ui->data)[len] = '\0';
+       inode->i_link = ui->data;
        /*
         * The terminating zero byte is not written to the flash media and it
         * is put just to make later in-memory string processing simpler. Thus,
index 35efc10..a3dfe2a 100644 (file)
@@ -51,7 +51,6 @@
 
 #include "ubifs.h"
 #include <linux/mount.h>
-#include <linux/namei.h>
 #include <linux/slab.h>
 
 static int read_block(struct inode *inode, void *addr, unsigned int block,
@@ -1300,14 +1299,6 @@ static void ubifs_invalidatepage(struct page *page, unsigned int offset,
        ClearPageChecked(page);
 }
 
-static void *ubifs_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
-       struct ubifs_inode *ui = ubifs_inode(d_inode(dentry));
-
-       nd_set_link(nd, ui->data);
-       return NULL;
-}
-
 int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 {
        struct inode *inode = file->f_mapping->host;
@@ -1570,7 +1561,7 @@ const struct inode_operations ubifs_file_inode_operations = {
 
 const struct inode_operations ubifs_symlink_inode_operations = {
        .readlink    = generic_readlink,
-       .follow_link = ubifs_follow_link,
+       .follow_link = simple_follow_link,
        .setattr     = ubifs_setattr,
        .getattr     = ubifs_getattr,
        .setxattr    = ubifs_setxattr,
index 75e6f04..20f5dbd 100644 (file)
@@ -195,6 +195,7 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum)
                }
                memcpy(ui->data, ino->data, ui->data_len);
                ((char *)ui->data)[ui->data_len] = '\0';
+               inode->i_link = ui->data;
                break;
        case S_IFBLK:
        case S_IFCHR:
index be7d42c..99aaf5c 100644 (file)
@@ -572,9 +572,10 @@ static void ufs_set_inode_ops(struct inode *inode)
                inode->i_fop = &ufs_dir_operations;
                inode->i_mapping->a_ops = &ufs_aops;
        } else if (S_ISLNK(inode->i_mode)) {
-               if (!inode->i_blocks)
+               if (!inode->i_blocks) {
                        inode->i_op = &ufs_fast_symlink_inode_operations;
-               else {
+                       inode->i_link = (char *)UFS_I(inode)->i_u1.i_symlink;
+               } else {
                        inode->i_op = &ufs_symlink_inode_operations;
                        inode->i_mapping->a_ops = &ufs_aops;
                }
index e491a93..f773deb 100644 (file)
@@ -144,7 +144,8 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry,
        } else {
                /* fast symlink */
                inode->i_op = &ufs_fast_symlink_inode_operations;
-               memcpy(UFS_I(inode)->i_u1.i_symlink, symname, l);
+               inode->i_link = (char *)UFS_I(inode)->i_u1.i_symlink;
+               memcpy(inode->i_link, symname, l);
                inode->i_size = l-1;
        }
        mark_inode_dirty(inode);
index 5b537e2..874480b 100644 (file)
  *  ext2 symlink handling code
  */
 
-#include <linux/fs.h>
-#include <linux/namei.h>
-
 #include "ufs_fs.h"
 #include "ufs.h"
 
-
-static void *ufs_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
-       struct ufs_inode_info *p = UFS_I(d_inode(dentry));
-       nd_set_link(nd, (char*)p->i_u1.i_symlink);
-       return NULL;
-}
-
 const struct inode_operations ufs_fast_symlink_inode_operations = {
        .readlink       = generic_readlink,
-       .follow_link    = ufs_follow_link,
+       .follow_link    = simple_follow_link,
        .setattr        = ufs_setattr,
 };
 
index 04e79d5..e9d401c 100644 (file)
@@ -574,8 +574,8 @@ xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff)
  * After the last attribute is removed revert to original inode format,
  * making all literal area available to the data fork once more.
  */
-STATIC void
-xfs_attr_fork_reset(
+void
+xfs_attr_fork_remove(
        struct xfs_inode        *ip,
        struct xfs_trans        *tp)
 {
@@ -641,7 +641,7 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
            (mp->m_flags & XFS_MOUNT_ATTR2) &&
            (dp->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
            !(args->op_flags & XFS_DA_OP_ADDNAME)) {
-               xfs_attr_fork_reset(dp, args->trans);
+               xfs_attr_fork_remove(dp, args->trans);
        } else {
                xfs_idata_realloc(dp, -size, XFS_ATTR_FORK);
                dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize);
@@ -905,7 +905,7 @@ xfs_attr3_leaf_to_shortform(
        if (forkoff == -1) {
                ASSERT(dp->i_mount->m_flags & XFS_MOUNT_ATTR2);
                ASSERT(dp->i_d.di_format != XFS_DINODE_FMT_BTREE);
-               xfs_attr_fork_reset(dp, args->trans);
+               xfs_attr_fork_remove(dp, args->trans);
                goto out;
        }
 
index 025c4b8..882c8d3 100644 (file)
@@ -53,7 +53,7 @@ int   xfs_attr_shortform_remove(struct xfs_da_args *args);
 int    xfs_attr_shortform_list(struct xfs_attr_list_context *context);
 int    xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp);
 int    xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes);
-
+void   xfs_attr_fork_remove(struct xfs_inode *ip, struct xfs_trans *tp);
 
 /*
  * Internal routines when attribute fork size == XFS_LBSIZE(mp).
index aeffeaa..f1026e8 100644 (file)
@@ -3224,12 +3224,24 @@ xfs_bmap_extsize_align(
                align_alen += temp;
                align_off -= temp;
        }
+
+       /* Same adjustment for the end of the requested area. */
+       temp = (align_alen % extsz);
+       if (temp)
+               align_alen += extsz - temp;
+
        /*
-        * Same adjustment for the end of the requested area.
+        * For large extent hint sizes, the aligned extent might be larger than
+        * MAXEXTLEN. In that case, reduce the size by an extsz so that it pulls
+        * the length back under MAXEXTLEN. The outer allocation loops handle
+        * short allocation just fine, so it is safe to do this. We only want to
+        * do it when we are forced to, though, because it means more allocation
+        * operations are required.
         */
-       if ((temp = (align_alen % extsz))) {
-               align_alen += extsz - temp;
-       }
+       while (align_alen > MAXEXTLEN)
+               align_alen -= extsz;
+       ASSERT(align_alen <= MAXEXTLEN);
+
        /*
         * If the previous block overlaps with this proposed allocation
         * then move the start forward without adjusting the length.
@@ -3318,7 +3330,9 @@ xfs_bmap_extsize_align(
                        return -EINVAL;
        } else {
                ASSERT(orig_off >= align_off);
-               ASSERT(orig_end <= align_off + align_alen);
+               /* see MAXEXTLEN handling above */
+               ASSERT(orig_end <= align_off + align_alen ||
+                      align_alen + extsz > MAXEXTLEN);
        }
 
 #ifdef DEBUG
@@ -4099,13 +4113,6 @@ xfs_bmapi_reserve_delalloc(
        /* Figure out the extent size, adjust alen */
        extsz = xfs_get_extsz_hint(ip);
        if (extsz) {
-               /*
-                * Make sure we don't exceed a single extent length when we
-                * align the extent by reducing length we are going to
-                * allocate by the maximum amount extent size aligment may
-                * require.
-                */
-               alen = XFS_FILBLKS_MIN(len, MAXEXTLEN - (2 * extsz - 1));
                error = xfs_bmap_extsize_align(mp, got, prev, extsz, rt, eof,
                                               1, 0, &aoff, &alen);
                ASSERT(!error);
index 07349a1..1c9e755 100644 (file)
@@ -376,7 +376,7 @@ xfs_ialloc_ag_alloc(
         */
        newlen = args.mp->m_ialloc_inos;
        if (args.mp->m_maxicount &&
-           percpu_counter_read(&args.mp->m_icount) + newlen >
+           percpu_counter_read_positive(&args.mp->m_icount) + newlen >
                                                        args.mp->m_maxicount)
                return -ENOSPC;
        args.minlen = args.maxlen = args.mp->m_ialloc_blks;
@@ -1339,10 +1339,13 @@ xfs_dialloc(
         * If we have already hit the ceiling of inode blocks then clear
         * okalloc so we scan all available agi structures for a free
         * inode.
+        *
+        * Read rough value of mp->m_icount by percpu_counter_read_positive,
+        * which will sacrifice the preciseness but improve the performance.
         */
        if (mp->m_maxicount &&
-           percpu_counter_read(&mp->m_icount) + mp->m_ialloc_inos >
-                                                       mp->m_maxicount) {
+           percpu_counter_read_positive(&mp->m_icount) + mp->m_ialloc_inos
+                                                       mp->m_maxicount) {
                noroom = 1;
                okalloc = 0;
        }
index f9c1c64..3fbf167 100644 (file)
@@ -380,23 +380,31 @@ xfs_attr3_root_inactive(
        return error;
 }
 
+/*
+ * xfs_attr_inactive kills all traces of an attribute fork on an inode. It
+ * removes both the on-disk and in-memory inode fork. Note that this also has to
+ * handle the condition of inodes without attributes but with an attribute fork
+ * configured, so we can't use xfs_inode_hasattr() here.
+ *
+ * The in-memory attribute fork is removed even on error.
+ */
 int
-xfs_attr_inactive(xfs_inode_t *dp)
+xfs_attr_inactive(
+       struct xfs_inode        *dp)
 {
-       xfs_trans_t *trans;
-       xfs_mount_t *mp;
-       int error;
+       struct xfs_trans        *trans;
+       struct xfs_mount        *mp;
+       int                     cancel_flags = 0;
+       int                     lock_mode = XFS_ILOCK_SHARED;
+       int                     error = 0;
 
        mp = dp->i_mount;
        ASSERT(! XFS_NOT_DQATTACHED(mp, dp));
 
-       xfs_ilock(dp, XFS_ILOCK_SHARED);
-       if (!xfs_inode_hasattr(dp) ||
-           dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
-               xfs_iunlock(dp, XFS_ILOCK_SHARED);
-               return 0;
-       }
-       xfs_iunlock(dp, XFS_ILOCK_SHARED);
+       xfs_ilock(dp, lock_mode);
+       if (!XFS_IFORK_Q(dp))
+               goto out_destroy_fork;
+       xfs_iunlock(dp, lock_mode);
 
        /*
         * Start our first transaction of the day.
@@ -408,13 +416,18 @@ xfs_attr_inactive(xfs_inode_t *dp)
         * the inode in every transaction to let it float upward through
         * the log.
         */
+       lock_mode = 0;
        trans = xfs_trans_alloc(mp, XFS_TRANS_ATTRINVAL);
        error = xfs_trans_reserve(trans, &M_RES(mp)->tr_attrinval, 0, 0);
-       if (error) {
-               xfs_trans_cancel(trans, 0);
-               return error;
-       }
-       xfs_ilock(dp, XFS_ILOCK_EXCL);
+       if (error)
+               goto out_cancel;
+
+       lock_mode = XFS_ILOCK_EXCL;
+       cancel_flags = XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT;
+       xfs_ilock(dp, lock_mode);
+
+       if (!XFS_IFORK_Q(dp))
+               goto out_cancel;
 
        /*
         * No need to make quota reservations here. We expect to release some
@@ -422,29 +435,31 @@ xfs_attr_inactive(xfs_inode_t *dp)
         */
        xfs_trans_ijoin(trans, dp, 0);
 
-       /*
-        * Decide on what work routines to call based on the inode size.
-        */
-       if (!xfs_inode_hasattr(dp) ||
-           dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
-               error = 0;
-               goto out;
+       /* invalidate and truncate the attribute fork extents */
+       if (dp->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) {
+               error = xfs_attr3_root_inactive(&trans, dp);
+               if (error)
+                       goto out_cancel;
+
+               error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0);
+               if (error)
+                       goto out_cancel;
        }
-       error = xfs_attr3_root_inactive(&trans, dp);
-       if (error)
-               goto out;
 
-       error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0);
-       if (error)
-               goto out;
+       /* Reset the attribute fork - this also destroys the in-core fork */
+       xfs_attr_fork_remove(dp, trans);
 
        error = xfs_trans_commit(trans, XFS_TRANS_RELEASE_LOG_RES);
-       xfs_iunlock(dp, XFS_ILOCK_EXCL);
-
+       xfs_iunlock(dp, lock_mode);
        return error;
 
-out:
-       xfs_trans_cancel(trans, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
-       xfs_iunlock(dp, XFS_ILOCK_EXCL);
+out_cancel:
+       xfs_trans_cancel(trans, cancel_flags);
+out_destroy_fork:
+       /* kill the in-core attr fork before we drop the inode lock */
+       if (dp->i_afp)
+               xfs_idestroy_fork(dp, XFS_ATTR_FORK);
+       if (lock_mode)
+               xfs_iunlock(dp, lock_mode);
        return error;
 }
index 8121e75..3b75912 100644 (file)
@@ -124,7 +124,7 @@ xfs_iozero(
                status = 0;
        } while (count);
 
-       return (-status);
+       return status;
 }
 
 int
index d6ebc85..539a85f 100644 (file)
@@ -1946,21 +1946,17 @@ xfs_inactive(
        /*
         * If there are attributes associated with the file then blow them away
         * now.  The code calls a routine that recursively deconstructs the
-        * attribute fork.  We need to just commit the current transaction
-        * because we can't use it for xfs_attr_inactive().
+        * attribute fork. If also blows away the in-core attribute fork.
         */
-       if (ip->i_d.di_anextents > 0) {
-               ASSERT(ip->i_d.di_forkoff != 0);
-
+       if (XFS_IFORK_Q(ip)) {
                error = xfs_attr_inactive(ip);
                if (error)
                        return;
        }
 
-       if (ip->i_afp)
-               xfs_idestroy_fork(ip, XFS_ATTR_FORK);
-
+       ASSERT(!ip->i_afp);
        ASSERT(ip->i_d.di_anextents == 0);
+       ASSERT(ip->i_d.di_forkoff == 0);
 
        /*
         * Free the inode.
@@ -2883,7 +2879,13 @@ xfs_rename_alloc_whiteout(
        if (error)
                return error;
 
-       /* Satisfy xfs_bumplink that this is a real tmpfile */
+       /*
+        * Prepare the tmpfile inode as if it were created through the VFS.
+        * Otherwise, the link increment paths will complain about nlink 0->1.
+        * Drop the link count as done by d_tmpfile(), complete the inode setup
+        * and flag it as linkable.
+        */
+       drop_nlink(VFS_I(tmpfile));
        xfs_finish_inode_setup(tmpfile);
        VFS_I(tmpfile)->i_state |= I_LINKABLE;
 
@@ -3151,7 +3153,7 @@ xfs_rename(
         * intermediate state on disk.
         */
        if (wip) {
-               ASSERT(wip->i_d.di_nlink == 0);
+               ASSERT(VFS_I(wip)->i_nlink == 0 && wip->i_d.di_nlink == 0);
                error = xfs_bumplink(tp, wip);
                if (error)
                        goto out_trans_abort;
index f4cd720..7f51f39 100644 (file)
@@ -41,7 +41,6 @@
 
 #include <linux/capability.h>
 #include <linux/xattr.h>
-#include <linux/namei.h>
 #include <linux/posix_acl.h>
 #include <linux/security.h>
 #include <linux/fiemap.h>
@@ -414,10 +413,10 @@ xfs_vn_rename(
  * we need to be very careful about how much stack we use.
  * uio is kmalloced for this reason...
  */
-STATIC void *
+STATIC const char *
 xfs_vn_follow_link(
        struct dentry           *dentry,
-       struct nameidata        *nd)
+       void                    **cookie)
 {
        char                    *link;
        int                     error = -ENOMEM;
@@ -430,14 +429,12 @@ xfs_vn_follow_link(
        if (unlikely(error))
                goto out_kfree;
 
-       nd_set_link(nd, link);
-       return NULL;
+       return *cookie = link;
 
  out_kfree:
        kfree(link);
  out_err:
-       nd_set_link(nd, ERR_PTR(error));
-       return NULL;
+       return ERR_PTR(error);
 }
 
 STATIC int
index 2ce7ee3..6f23fbd 100644 (file)
@@ -1084,14 +1084,18 @@ xfs_log_sbcount(xfs_mount_t *mp)
        return xfs_sync_sb(mp, true);
 }
 
+/*
+ * Deltas for the inode count are +/-64, hence we use a large batch size
+ * of 128 so we don't need to take the counter lock on every update.
+ */
+#define XFS_ICOUNT_BATCH       128
 int
 xfs_mod_icount(
        struct xfs_mount        *mp,
        int64_t                 delta)
 {
-       /* deltas are +/-64, hence the large batch size of 128. */
-       __percpu_counter_add(&mp->m_icount, delta, 128);
-       if (percpu_counter_compare(&mp->m_icount, 0) < 0) {
+       __percpu_counter_add(&mp->m_icount, delta, XFS_ICOUNT_BATCH);
+       if (__percpu_counter_compare(&mp->m_icount, 0, XFS_ICOUNT_BATCH) < 0) {
                ASSERT(0);
                percpu_counter_add(&mp->m_icount, -delta);
                return -EINVAL;
@@ -1113,6 +1117,14 @@ xfs_mod_ifree(
        return 0;
 }
 
+/*
+ * Deltas for the block count can vary from 1 to very large, but lock contention
+ * only occurs on frequent small block count updates such as in the delayed
+ * allocation path for buffered writes (page a time updates). Hence we set
+ * a large batch count (1024) to minimise global counter updates except when
+ * we get near to ENOSPC and we have to be very accurate with our updates.
+ */
+#define XFS_FDBLOCKS_BATCH     1024
 int
 xfs_mod_fdblocks(
        struct xfs_mount        *mp,
@@ -1151,25 +1163,19 @@ xfs_mod_fdblocks(
         * Taking blocks away, need to be more accurate the closer we
         * are to zero.
         *
-        * batch size is set to a maximum of 1024 blocks - if we are
-        * allocating of freeing extents larger than this then we aren't
-        * going to be hammering the counter lock so a lock per update
-        * is not a problem.
-        *
         * If the counter has a value of less than 2 * max batch size,
         * then make everything serialise as we are real close to
         * ENOSPC.
         */
-#define __BATCH        1024
-       if (percpu_counter_compare(&mp->m_fdblocks, 2 * __BATCH) < 0)
+       if (__percpu_counter_compare(&mp->m_fdblocks, 2 * XFS_FDBLOCKS_BATCH,
+                                    XFS_FDBLOCKS_BATCH) < 0)
                batch = 1;
        else
-               batch = __BATCH;
-#undef __BATCH
+               batch = XFS_FDBLOCKS_BATCH;
 
        __percpu_counter_add(&mp->m_fdblocks, delta, batch);
-       if (percpu_counter_compare(&mp->m_fdblocks,
-                                  XFS_ALLOC_SET_ASIDE(mp)) >= 0) {
+       if (__percpu_counter_compare(&mp->m_fdblocks, XFS_ALLOC_SET_ASIDE(mp),
+                                    XFS_FDBLOCKS_BATCH) >= 0) {
                /* we had space! */
                return 0;
        }
index f5c40b0..e6a83d7 100644 (file)
@@ -66,8 +66,8 @@
 #define smp_read_barrier_depends()     do { } while (0)
 #endif
 
-#ifndef set_mb
-#define set_mb(var, value)  do { (var) = (value); mb(); } while (0)
+#ifndef smp_store_mb
+#define smp_store_mb(var, value)  do { WRITE_ONCE(var, value); mb(); } while (0)
 #endif
 
 #ifndef smp_mb__before_atomic
index 811fb1e..3766ab3 100644 (file)
@@ -86,9 +86,6 @@ unsigned long __xchg(unsigned long x, volatile void *ptr, int size)
 
 /*
  * Atomic compare and exchange.
- *
- * Do not define __HAVE_ARCH_CMPXCHG because we want to use it to check whether
- * a cmpxchg primitive faster than repeated local irq save/restore exists.
  */
 #include <asm-generic/cmpxchg-local.h>
 
index b59b5a5..e56272c 100644 (file)
@@ -8,8 +8,7 @@
 #ifndef CONFIG_SMP
 /*
  * The following implementation only for uniprocessor machines.
- * For UP, it's relies on the fact that pagefault_disable() also disables
- * preemption to ensure mutual exclusion.
+ * It relies on preempt_disable() ensuring mutual exclusion.
  *
  */
 
@@ -38,6 +37,7 @@ futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
        if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
                oparg = 1 << oparg;
 
+       preempt_disable();
        pagefault_disable();
 
        ret = -EFAULT;
@@ -72,6 +72,7 @@ futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
 
 out_pagefault_enable:
        pagefault_enable();
+       preempt_enable();
 
        if (ret == 0) {
                switch (cmp) {
@@ -106,6 +107,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 {
        u32 val;
 
+       preempt_disable();
        if (unlikely(get_user(val, uaddr) != 0))
                return -EFAULT;
 
@@ -113,6 +115,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
                return -EFAULT;
 
        *uval = val;
+       preempt_enable();
 
        return 0;
 }
index eb6f9e6..d0a7a47 100644 (file)
@@ -79,11 +79,8 @@ static __always_inline bool should_resched(void)
 #ifdef CONFIG_PREEMPT
 extern asmlinkage void preempt_schedule(void);
 #define __preempt_schedule() preempt_schedule()
-
-#ifdef CONFIG_CONTEXT_TRACKING
-extern asmlinkage void preempt_schedule_context(void);
-#define __preempt_schedule_context() preempt_schedule_context()
-#endif
+extern asmlinkage void preempt_schedule_notrace(void);
+#define __preempt_schedule_notrace() preempt_schedule_notrace()
 #endif /* CONFIG_PREEMPT */
 
 #endif /* __ASM_PREEMPT_H */
diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
new file mode 100644 (file)
index 0000000..83bfb87
--- /dev/null
@@ -0,0 +1,139 @@
+/*
+ * Queued spinlock
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P.
+ *
+ * Authors: Waiman Long <waiman.long@hp.com>
+ */
+#ifndef __ASM_GENERIC_QSPINLOCK_H
+#define __ASM_GENERIC_QSPINLOCK_H
+
+#include <asm-generic/qspinlock_types.h>
+
+/**
+ * queued_spin_is_locked - is the spinlock locked?
+ * @lock: Pointer to queued spinlock structure
+ * Return: 1 if it is locked, 0 otherwise
+ */
+static __always_inline int queued_spin_is_locked(struct qspinlock *lock)
+{
+       return atomic_read(&lock->val);
+}
+
+/**
+ * queued_spin_value_unlocked - is the spinlock structure unlocked?
+ * @lock: queued spinlock structure
+ * Return: 1 if it is unlocked, 0 otherwise
+ *
+ * N.B. Whenever there are tasks waiting for the lock, it is considered
+ *      locked wrt the lockref code to avoid lock stealing by the lockref
+ *      code and change things underneath the lock. This also allows some
+ *      optimizations to be applied without conflict with lockref.
+ */
+static __always_inline int queued_spin_value_unlocked(struct qspinlock lock)
+{
+       return !atomic_read(&lock.val);
+}
+
+/**
+ * queued_spin_is_contended - check if the lock is contended
+ * @lock : Pointer to queued spinlock structure
+ * Return: 1 if lock contended, 0 otherwise
+ */
+static __always_inline int queued_spin_is_contended(struct qspinlock *lock)
+{
+       return atomic_read(&lock->val) & ~_Q_LOCKED_MASK;
+}
+/**
+ * queued_spin_trylock - try to acquire the queued spinlock
+ * @lock : Pointer to queued spinlock structure
+ * Return: 1 if lock acquired, 0 if failed
+ */
+static __always_inline int queued_spin_trylock(struct qspinlock *lock)
+{
+       if (!atomic_read(&lock->val) &&
+          (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) == 0))
+               return 1;
+       return 0;
+}
+
+extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
+
+/**
+ * queued_spin_lock - acquire a queued spinlock
+ * @lock: Pointer to queued spinlock structure
+ */
+static __always_inline void queued_spin_lock(struct qspinlock *lock)
+{
+       u32 val;
+
+       val = atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL);
+       if (likely(val == 0))
+               return;
+       queued_spin_lock_slowpath(lock, val);
+}
+
+#ifndef queued_spin_unlock
+/**
+ * queued_spin_unlock - release a queued spinlock
+ * @lock : Pointer to queued spinlock structure
+ */
+static __always_inline void queued_spin_unlock(struct qspinlock *lock)
+{
+       /*
+        * smp_mb__before_atomic() in order to guarantee release semantics
+        */
+       smp_mb__before_atomic_dec();
+       atomic_sub(_Q_LOCKED_VAL, &lock->val);
+}
+#endif
+
+/**
+ * queued_spin_unlock_wait - wait until current lock holder releases the lock
+ * @lock : Pointer to queued spinlock structure
+ *
+ * There is a very slight possibility of live-lock if the lockers keep coming
+ * and the waiter is just unfortunate enough to not see any unlock state.
+ */
+static inline void queued_spin_unlock_wait(struct qspinlock *lock)
+{
+       while (atomic_read(&lock->val) & _Q_LOCKED_MASK)
+               cpu_relax();
+}
+
+#ifndef virt_queued_spin_lock
+static __always_inline bool virt_queued_spin_lock(struct qspinlock *lock)
+{
+       return false;
+}
+#endif
+
+/*
+ * Initializier
+ */
+#define        __ARCH_SPIN_LOCK_UNLOCKED       { ATOMIC_INIT(0) }
+
+/*
+ * Remapping spinlock architecture specific functions to the corresponding
+ * queued spinlock functions.
+ */
+#define arch_spin_is_locked(l)         queued_spin_is_locked(l)
+#define arch_spin_is_contended(l)      queued_spin_is_contended(l)
+#define arch_spin_value_unlocked(l)    queued_spin_value_unlocked(l)
+#define arch_spin_lock(l)              queued_spin_lock(l)
+#define arch_spin_trylock(l)           queued_spin_trylock(l)
+#define arch_spin_unlock(l)            queued_spin_unlock(l)
+#define arch_spin_lock_flags(l, f)     queued_spin_lock(l)
+#define arch_spin_unlock_wait(l)       queued_spin_unlock_wait(l)
+
+#endif /* __ASM_GENERIC_QSPINLOCK_H */
diff --git a/include/asm-generic/qspinlock_types.h b/include/asm-generic/qspinlock_types.h
new file mode 100644 (file)
index 0000000..85f888e
--- /dev/null
@@ -0,0 +1,79 @@
+/*
+ * Queued spinlock
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P.
+ *
+ * Authors: Waiman Long <waiman.long@hp.com>
+ */
+#ifndef __ASM_GENERIC_QSPINLOCK_TYPES_H
+#define __ASM_GENERIC_QSPINLOCK_TYPES_H
+
+/*
+ * Including atomic.h with PARAVIRT on will cause compilation errors because
+ * of recursive header file incluson via paravirt_types.h. So don't include
+ * it if PARAVIRT is on.
+ */
+#ifndef CONFIG_PARAVIRT
+#include <linux/types.h>
+#include <linux/atomic.h>
+#endif
+
+typedef struct qspinlock {
+       atomic_t        val;
+} arch_spinlock_t;
+
+/*
+ * Bitfields in the atomic value:
+ *
+ * When NR_CPUS < 16K
+ *  0- 7: locked byte
+ *     8: pending
+ *  9-15: not used
+ * 16-17: tail index
+ * 18-31: tail cpu (+1)
+ *
+ * When NR_CPUS >= 16K
+ *  0- 7: locked byte
+ *     8: pending
+ *  9-10: tail index
+ * 11-31: tail cpu (+1)
+ */
+#define        _Q_SET_MASK(type)       (((1U << _Q_ ## type ## _BITS) - 1)\
+                                     << _Q_ ## type ## _OFFSET)
+#define _Q_LOCKED_OFFSET       0
+#define _Q_LOCKED_BITS         8
+#define _Q_LOCKED_MASK         _Q_SET_MASK(LOCKED)
+
+#define _Q_PENDING_OFFSET      (_Q_LOCKED_OFFSET + _Q_LOCKED_BITS)
+#if CONFIG_NR_CPUS < (1U << 14)
+#define _Q_PENDING_BITS                8
+#else
+#define _Q_PENDING_BITS                1
+#endif
+#define _Q_PENDING_MASK                _Q_SET_MASK(PENDING)
+
+#define _Q_TAIL_IDX_OFFSET     (_Q_PENDING_OFFSET + _Q_PENDING_BITS)
+#define _Q_TAIL_IDX_BITS       2
+#define _Q_TAIL_IDX_MASK       _Q_SET_MASK(TAIL_IDX)
+
+#define _Q_TAIL_CPU_OFFSET     (_Q_TAIL_IDX_OFFSET + _Q_TAIL_IDX_BITS)
+#define _Q_TAIL_CPU_BITS       (32 - _Q_TAIL_CPU_OFFSET)
+#define _Q_TAIL_CPU_MASK       _Q_SET_MASK(TAIL_CPU)
+
+#define _Q_TAIL_OFFSET         _Q_TAIL_IDX_OFFSET
+#define _Q_TAIL_MASK           (_Q_TAIL_IDX_MASK | _Q_TAIL_CPU_MASK)
+
+#define _Q_LOCKED_VAL          (1U << _Q_LOCKED_OFFSET)
+#define _Q_PENDING_VAL         (1U << _Q_PENDING_OFFSET)
+
+#endif /* __ASM_GENERIC_QSPINLOCK_TYPES_H */
index 2dd405c..45c39a3 100644 (file)
        {0x1002, 0x6658, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BONAIRE|RADEON_NEW_MEMMAP}, \
        {0x1002, 0x665c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BONAIRE|RADEON_NEW_MEMMAP}, \
        {0x1002, 0x665d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BONAIRE|RADEON_NEW_MEMMAP}, \
+       {0x1002, 0x665f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BONAIRE|RADEON_NEW_MEMMAP}, \
        {0x1002, 0x6660, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
        {0x1002, 0x6663, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
        {0x1002, 0x6664, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
index aff923a..d87d8ec 100644 (file)
@@ -116,7 +116,6 @@ __printf(3, 4)
 int bdi_register(struct backing_dev_info *bdi, struct device *parent,
                const char *fmt, ...);
 int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev);
-void bdi_unregister(struct backing_dev_info *bdi);
 int __must_check bdi_setup_and_register(struct backing_dev_info *, char *);
 void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
                        enum wb_reason reason);
index a1b25e3..b7299fe 100644 (file)
@@ -220,7 +220,7 @@ enum rq_flag_bits {
 
 /* This mask is used for both bio and request merge checking */
 #define REQ_NOMERGE_FLAGS \
-       (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA)
+       (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA | REQ_FLUSH_SEQ)
 
 #define REQ_RAHEAD             (1ULL << __REQ_RAHEAD)
 #define REQ_THROTTLED          (1ULL << __REQ_THROTTLED)
index 7f9a516..5d93a66 100644 (file)
@@ -821,8 +821,6 @@ extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t,
 extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t,
                         struct scsi_ioctl_command __user *);
 
-extern void blk_queue_bio(struct request_queue *q, struct bio *bio);
-
 /*
  * A queue has just exitted congestion.  Note this in the global counter of
  * congested queues, and wake up anyone who was waiting for requests to be
index 86c12c9..8fdcb78 100644 (file)
@@ -2,7 +2,6 @@
 #define _LINUX_BH_H
 
 #include <linux/preempt.h>
-#include <linux/preempt_mask.h>
 
 #ifdef CONFIG_TRACE_IRQFLAGS
 extern void __local_bh_disable_ip(unsigned long ip, unsigned int cnt);
index ae2982c..656da2a 100644 (file)
@@ -17,7 +17,7 @@
 #define PHY_ID_BCM7250                 0xae025280
 #define PHY_ID_BCM7364                 0xae025260
 #define PHY_ID_BCM7366                 0x600d8490
-#define PHY_ID_BCM7425                 0x03625e60
+#define PHY_ID_BCM7425                 0x600d86b0
 #define PHY_ID_BCM7429                 0x600d8730
 #define PHY_ID_BCM7439                 0x600d8480
 #define PHY_ID_BCM7439_2               0xae025080
index 8677225..05be235 100644 (file)
@@ -250,7 +250,23 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
        ({ union { typeof(x) __val; char __c[1]; } __u; __read_once_size(&(x), __u.__c, sizeof(x)); __u.__val; })
 
 #define WRITE_ONCE(x, val) \
-       ({ typeof(x) __val = (val); __write_once_size(&(x), &__val, sizeof(__val)); __val; })
+       ({ union { typeof(x) __val; char __c[1]; } __u = { .__val = (val) }; __write_once_size(&(x), __u.__c, sizeof(x)); __u.__val; })
+
+/**
+ * READ_ONCE_CTRL - Read a value heading a control dependency
+ * @x: The value to be read, heading the control dependency
+ *
+ * Control dependencies are tricky.  See Documentation/memory-barriers.txt
+ * for important information on how to use them.  Note that in many cases,
+ * use of smp_load_acquire() will be much simpler.  Control dependencies
+ * should be avoided except on the hottest of hotpaths.
+ */
+#define READ_ONCE_CTRL(x) \
+({ \
+       typeof(x) __val = READ_ONCE(x); \
+       smp_read_barrier_depends(); /* Enforce control dependency. */ \
+       __val; \
+})
 
 #endif /* __KERNEL__ */
 
@@ -450,7 +466,7 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
  * with an explicit memory barrier or atomic instruction that provides the
  * required ordering.
  *
- * If possible use READ_ONCE/ASSIGN_ONCE instead.
+ * If possible use READ_ONCE()/WRITE_ONCE() instead.
  */
 #define __ACCESS_ONCE(x) ({ \
         __maybe_unused typeof(x) __var = (__force typeof(x)) 0; \
index 27e285b..59915ea 100644 (file)
@@ -151,10 +151,8 @@ static inline unsigned int cpumask_any_but(const struct cpumask *mask,
        return 1;
 }
 
-static inline int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp)
+static inline unsigned int cpumask_local_spread(unsigned int i, int node)
 {
-       set_bit(0, cpumask_bits(dstp));
-
        return 0;
 }
 
@@ -208,7 +206,7 @@ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp)
 
 int cpumask_next_and(int n, const struct cpumask *, const struct cpumask *);
 int cpumask_any_but(const struct cpumask *mask, unsigned int cpu);
-int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp);
+unsigned int cpumask_local_spread(unsigned int i, int node);
 
 /**
  * for_each_cpu - iterate over every cpu in a mask
index cb25af4..420311b 100644 (file)
@@ -45,7 +45,6 @@ extern struct dentry *arch_debugfs_dir;
 
 /* declared over in file.c */
 extern const struct file_operations debugfs_file_operations;
-extern const struct inode_operations debugfs_link_operations;
 
 struct dentry *debugfs_create_file(const char *name, umode_t mode,
                                   struct dentry *parent, void *data,
index 35ec87e..b577e80 100644 (file)
@@ -38,7 +38,6 @@ struct backing_dev_info;
 struct export_operations;
 struct hd_geometry;
 struct iovec;
-struct nameidata;
 struct kiocb;
 struct kobject;
 struct pipe_inode_info;
@@ -656,6 +655,7 @@ struct inode {
                struct pipe_inode_info  *i_pipe;
                struct block_device     *i_bdev;
                struct cdev             *i_cdev;
+               char                    *i_link;
        };
 
        __u32                   i_generation;
@@ -1607,12 +1607,12 @@ struct file_operations {
 
 struct inode_operations {
        struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
-       void * (*follow_link) (struct dentry *, struct nameidata *);
+       const char * (*follow_link) (struct dentry *, void **);
        int (*permission) (struct inode *, int);
        struct posix_acl * (*get_acl)(struct inode *, int);
 
        int (*readlink) (struct dentry *, char __user *,int);
-       void (*put_link) (struct dentry *, struct nameidata *, void *);
+       void (*put_link) (struct inode *, void *);
 
        int (*create) (struct inode *,struct dentry *, umode_t, bool);
        int (*link) (struct dentry *,struct inode *,struct dentry *);
@@ -1879,6 +1879,7 @@ enum file_time_flags {
        S_VERSION = 8,
 };
 
+extern bool atime_needs_update(const struct path *, struct inode *);
 extern void touch_atime(const struct path *);
 static inline void file_accessed(struct file *file)
 {
@@ -2704,13 +2705,14 @@ extern const struct file_operations generic_ro_fops;
 
 extern int readlink_copy(char __user *, int, const char *);
 extern int page_readlink(struct dentry *, char __user *, int);
-extern void *page_follow_link_light(struct dentry *, struct nameidata *);
-extern void page_put_link(struct dentry *, struct nameidata *, void *);
+extern const char *page_follow_link_light(struct dentry *, void **);
+extern void page_put_link(struct inode *, void *);
 extern int __page_symlink(struct inode *inode, const char *symname, int len,
                int nofs);
 extern int page_symlink(struct inode *inode, const char *symname, int len);
 extern const struct inode_operations page_symlink_inode_operations;
-extern void kfree_put_link(struct dentry *, struct nameidata *, void *);
+extern void kfree_put_link(struct inode *, void *);
+extern void free_page_put_link(struct inode *, void *);
 extern int generic_readlink(struct dentry *, char __user *, int);
 extern void generic_fillattr(struct inode *, struct kstat *);
 int vfs_getattr_nosec(struct path *path, struct kstat *stat);
@@ -2721,6 +2723,8 @@ void __inode_sub_bytes(struct inode *inode, loff_t bytes);
 void inode_sub_bytes(struct inode *inode, loff_t bytes);
 loff_t inode_get_bytes(struct inode *inode);
 void inode_set_bytes(struct inode *inode, loff_t bytes);
+const char *simple_follow_link(struct dentry *, void **);
+extern const struct inode_operations simple_symlink_inode_operations;
 
 extern int iterate_dir(struct file *, struct dir_context *);
 
index 46e83c2..f9ecf63 100644 (file)
@@ -46,7 +46,7 @@ const char *ftrace_print_hex_seq(struct trace_seq *p,
                                 const unsigned char *buf, int len);
 
 const char *ftrace_print_array_seq(struct trace_seq *p,
-                                  const void *buf, int buf_len,
+                                  const void *buf, int count,
                                   size_t el_size);
 
 struct trace_iterator;
index 97a9373..15928f0 100644 (file)
@@ -30,6 +30,7 @@ struct vm_area_struct;
 #define ___GFP_HARDWALL                0x20000u
 #define ___GFP_THISNODE                0x40000u
 #define ___GFP_RECLAIMABLE     0x80000u
+#define ___GFP_NOACCOUNT       0x100000u
 #define ___GFP_NOTRACK         0x200000u
 #define ___GFP_NO_KSWAPD       0x400000u
 #define ___GFP_OTHER_NODE      0x800000u
@@ -87,6 +88,7 @@ struct vm_area_struct;
 #define __GFP_HARDWALL   ((__force gfp_t)___GFP_HARDWALL) /* Enforce hardwall cpuset memory allocs */
 #define __GFP_THISNODE ((__force gfp_t)___GFP_THISNODE)/* No fallback, no policies */
 #define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) /* Page is reclaimable */
+#define __GFP_NOACCOUNT        ((__force gfp_t)___GFP_NOACCOUNT) /* Don't account to kmemcg */
 #define __GFP_NOTRACK  ((__force gfp_t)___GFP_NOTRACK)  /* Don't track with kmemcheck */
 
 #define __GFP_NO_KSWAPD        ((__force gfp_t)___GFP_NO_KSWAPD)
index f4af034..dfd59d6 100644 (file)
@@ -1,7 +1,7 @@
 #ifndef LINUX_HARDIRQ_H
 #define LINUX_HARDIRQ_H
 
-#include <linux/preempt_mask.h>
+#include <linux/preempt.h>
 #include <linux/lockdep.h>
 #include <linux/ftrace_irq.h>
 #include <linux/vtime.h>
index 0408421..0042bf3 100644 (file)
@@ -74,7 +74,7 @@ struct sensor_hub_pending {
  * @usage:             Usage id for this hub device instance.
  * @start_collection_index: Starting index for a phy type collection
  * @end_collection_index: Last index for a phy type collection
- * @mutex:             synchronizing mutex.
+ * @mutex_ptr:         synchronizing mutex pointer.
  * @pending:           Holds information of pending sync read request.
  */
 struct hid_sensor_hub_device {
@@ -84,7 +84,7 @@ struct hid_sensor_hub_device {
        u32 usage;
        int start_collection_index;
        int end_collection_index;
-       struct mutex mutex;
+       struct mutex *mutex_ptr;
        struct sensor_hub_pending pending;
 };
 
index 9286a46..6aefcd0 100644 (file)
@@ -65,6 +65,7 @@ static inline void kunmap(struct page *page)
 
 static inline void *kmap_atomic(struct page *page)
 {
+       preempt_disable();
        pagefault_disable();
        return page_address(page);
 }
@@ -73,6 +74,7 @@ static inline void *kmap_atomic(struct page *page)
 static inline void __kunmap_atomic(void *addr)
 {
        pagefault_enable();
+       preempt_enable();
 }
 
 #define kmap_atomic_pfn(pfn)   kmap_atomic(pfn_to_page(pfn))
index 696d223..bb9b075 100644 (file)
@@ -50,9 +50,8 @@ extern struct fs_struct init_fs;
        .cpu_timers     = INIT_CPU_TIMERS(sig.cpu_timers),              \
        .rlim           = INIT_RLIMITS,                                 \
        .cputimer       = {                                             \
-               .cputime = INIT_CPUTIME,                                \
-               .running = 0,                                           \
-               .lock = __RAW_SPIN_LOCK_UNLOCKED(sig.cputimer.lock),    \
+               .cputime_atomic = INIT_CPUTIME_ATOMIC,                  \
+               .running        = 0,                                    \
        },                                                              \
        .cred_guard_mutex =                                             \
                 __MUTEX_INITIALIZER(sig.cred_guard_mutex),             \
index 796ef96..a240e61 100644 (file)
@@ -115,13 +115,14 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
  * Extended Capability Register
  */
 
+#define ecap_pasid(e)          ((e >> 40) & 0x1)
 #define ecap_pss(e)            ((e >> 35) & 0x1f)
 #define ecap_eafs(e)           ((e >> 34) & 0x1)
 #define ecap_nwfs(e)           ((e >> 33) & 0x1)
 #define ecap_srs(e)            ((e >> 31) & 0x1)
 #define ecap_ers(e)            ((e >> 30) & 0x1)
 #define ecap_prs(e)            ((e >> 29) & 0x1)
-#define ecap_pasid(e)          ((e >> 28) & 0x1)
+/* PASID support used to be on bit 28 */
 #define ecap_dis(e)            ((e >> 27) & 0x1)
 #define ecap_nest(e)           ((e >> 26) & 0x1)
 #define ecap_mts(e)            ((e >> 25) & 0x1)
index 657fab4..c27dde7 100644 (file)
@@ -141,6 +141,7 @@ static inline void __iomem *
 io_mapping_map_atomic_wc(struct io_mapping *mapping,
                         unsigned long offset)
 {
+       preempt_disable();
        pagefault_disable();
        return ((char __force __iomem *) mapping) + offset;
 }
@@ -149,6 +150,7 @@ static inline void
 io_mapping_unmap_atomic(void __iomem *vaddr)
 {
        pagefault_enable();
+       preempt_enable();
 }
 
 /* Non-atomic map/unmap */
index 36ec4ae..9de976b 100644 (file)
@@ -95,8 +95,6 @@
 
 struct device_node;
 
-extern struct irq_chip gic_arch_extn;
-
 void gic_set_irqchip_flags(unsigned long flags);
 void gic_init_bases(unsigned int, int, void __iomem *, void __iomem *,
                    u32 offset, struct device_node *);
index 3a5b48e..060dd7b 100644 (file)
@@ -244,7 +244,8 @@ static inline u32 reciprocal_scale(u32 val, u32 ep_ro)
 
 #if defined(CONFIG_MMU) && \
        (defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_DEBUG_ATOMIC_SLEEP))
-void might_fault(void);
+#define might_fault() __might_fault(__FILE__, __LINE__)
+void __might_fault(const char *file, int line);
 #else
 static inline void might_fault(void) { }
 #endif
index 5fc3d10..2b6a204 100644 (file)
@@ -166,19 +166,34 @@ static inline bool ktime_before(const ktime_t cmp1, const ktime_t cmp2)
 }
 
 #if BITS_PER_LONG < 64
-extern u64 __ktime_divns(const ktime_t kt, s64 div);
-static inline u64 ktime_divns(const ktime_t kt, s64 div)
+extern s64 __ktime_divns(const ktime_t kt, s64 div);
+static inline s64 ktime_divns(const ktime_t kt, s64 div)
 {
+       /*
+        * Negative divisors could cause an inf loop,
+        * so bug out here.
+        */
+       BUG_ON(div < 0);
        if (__builtin_constant_p(div) && !(div >> 32)) {
-               u64 ns = kt.tv64;
-               do_div(ns, div);
-               return ns;
+               s64 ns = kt.tv64;
+               u64 tmp = ns < 0 ? -ns : ns;
+
+               do_div(tmp, div);
+               return ns < 0 ? -tmp : tmp;
        } else {
                return __ktime_divns(kt, div);
        }
 }
 #else /* BITS_PER_LONG < 64 */
-# define ktime_divns(kt, div)          (u64)((kt).tv64 / (div))
+static inline s64 ktime_divns(const ktime_t kt, s64 div)
+{
+       /*
+        * 32-bit implementation cannot handle negative divisors,
+        * so catch them on 64bit as well.
+        */
+       WARN_ON(div < 0);
+       return kt.tv64 / div;
+}
 #endif
 
 static inline s64 ktime_to_us(const ktime_t kt)
index 0081f00..c92ebd1 100644 (file)
@@ -52,10 +52,15 @@ struct lglock {
        static struct lglock name = { .lock = &name ## _lock }
 
 void lg_lock_init(struct lglock *lg, char *name);
+
 void lg_local_lock(struct lglock *lg);
 void lg_local_unlock(struct lglock *lg);
 void lg_local_lock_cpu(struct lglock *lg, int cpu);
 void lg_local_unlock_cpu(struct lglock *lg, int cpu);
+
+void lg_double_lock(struct lglock *lg, int cpu1, int cpu2);
+void lg_double_unlock(struct lglock *lg, int cpu1, int cpu2);
+
 void lg_global_lock(struct lglock *lg);
 void lg_global_unlock(struct lglock *lg);
 
index 8dad4a3..28aeae4 100644 (file)
@@ -205,6 +205,7 @@ enum {
        ATA_LFLAG_SW_ACTIVITY   = (1 << 7), /* keep activity stats */
        ATA_LFLAG_NO_LPM        = (1 << 8), /* disable LPM on this link */
        ATA_LFLAG_RST_ONCE      = (1 << 9), /* limit recovery to one reset */
+       ATA_LFLAG_CHANGED       = (1 << 10), /* LPM state changed on this link */
 
        /* struct ata_port flags */
        ATA_FLAG_SLAVE_POSS     = (1 << 0), /* host supports slave dev */
@@ -309,6 +310,12 @@ enum {
         */
        ATA_TMOUT_PMP_SRST_WAIT = 5000,
 
+       /* When the LPM policy is set to ATA_LPM_MAX_POWER, there might
+        * be a spurious PHY event, so ignore the first PHY event that
+        * occurs within 10s after the policy change.
+        */
+       ATA_TMOUT_SPURIOUS_PHY  = 10000,
+
        /* ATA bus states */
        BUS_UNKNOWN             = 0,
        BUS_DMA                 = 1,
@@ -788,6 +795,8 @@ struct ata_link {
        struct ata_eh_context   eh_context;
 
        struct ata_device       device[ATA_MAX_DEVICES];
+
+       unsigned long           last_lpm_change; /* when last LPM change happened */
 };
 #define ATA_LINK_CLEAR_BEGIN           offsetof(struct ata_link, active_tag)
 #define ATA_LINK_CLEAR_END             offsetof(struct ata_link, device[0])
@@ -1201,6 +1210,7 @@ extern struct ata_device *ata_dev_pair(struct ata_device *adev);
 extern int ata_do_set_mode(struct ata_link *link, struct ata_device **r_failed_dev);
 extern void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap);
 extern void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap, struct list_head *eh_q);
+extern bool sata_lpm_ignore_phy_events(struct ata_link *link);
 
 extern int ata_cable_40wire(struct ata_port *ap);
 extern int ata_cable_80wire(struct ata_port *ap);
index 066ba41..2722111 100644 (file)
@@ -130,8 +130,8 @@ enum bounce_type {
 };
 
 struct lock_class_stats {
-       unsigned long                   contention_point[4];
-       unsigned long                   contending_point[4];
+       unsigned long                   contention_point[LOCKSTAT_POINTS];
+       unsigned long                   contending_point[LOCKSTAT_POINTS];
        struct lock_time                read_waittime;
        struct lock_time                write_waittime;
        struct lock_time                read_holdtime;
index 72dff5f..6c89181 100644 (file)
@@ -463,6 +463,8 @@ memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
        if (!memcg_kmem_enabled())
                return true;
 
+       if (gfp & __GFP_NOACCOUNT)
+               return true;
        /*
         * __GFP_NOFAIL allocations will move on even if charging is not
         * possible. Therefore we don't even try, and have this allocation
@@ -522,6 +524,8 @@ memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
 {
        if (!memcg_kmem_enabled())
                return cachep;
+       if (gfp & __GFP_NOACCOUNT)
+               return cachep;
        if (gfp & __GFP_NOFAIL)
                return cachep;
        if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD))
index c899077..d8c6334 100644 (file)
@@ -1,16 +1,15 @@
 #ifndef _LINUX_NAMEI_H
 #define _LINUX_NAMEI_H
 
-#include <linux/dcache.h>
-#include <linux/errno.h>
-#include <linux/linkage.h>
+#include <linux/kernel.h>
 #include <linux/path.h>
-
-struct vfsmount;
-struct nameidata;
+#include <linux/fcntl.h>
+#include <linux/errno.h>
 
 enum { MAX_NESTED_LINKS = 8 };
 
+#define MAXSYMLINKS 40
+
 /*
  * Type of the last component on LOOKUP_PARENT
  */
@@ -45,13 +44,29 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
 #define LOOKUP_ROOT            0x2000
 #define LOOKUP_EMPTY           0x4000
 
-extern int user_path_at(int, const char __user *, unsigned, struct path *);
 extern int user_path_at_empty(int, const char __user *, unsigned, struct path *, int *empty);
 
-#define user_path(name, path) user_path_at(AT_FDCWD, name, LOOKUP_FOLLOW, path)
-#define user_lpath(name, path) user_path_at(AT_FDCWD, name, 0, path)
-#define user_path_dir(name, path) \
-       user_path_at(AT_FDCWD, name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, path)
+static inline int user_path_at(int dfd, const char __user *name, unsigned flags,
+                struct path *path)
+{
+       return user_path_at_empty(dfd, name, flags, path, NULL);
+}
+
+static inline int user_path(const char __user *name, struct path *path)
+{
+       return user_path_at_empty(AT_FDCWD, name, LOOKUP_FOLLOW, path, NULL);
+}
+
+static inline int user_lpath(const char __user *name, struct path *path)
+{
+       return user_path_at_empty(AT_FDCWD, name, 0, path, NULL);
+}
+
+static inline int user_path_dir(const char __user *name, struct path *path)
+{
+       return user_path_at_empty(AT_FDCWD, name,
+                                 LOOKUP_FOLLOW | LOOKUP_DIRECTORY, path, NULL);
+}
 
 extern int kern_path(const char *, unsigned, struct path *);
 
@@ -70,9 +85,7 @@ extern int follow_up(struct path *);
 extern struct dentry *lock_rename(struct dentry *, struct dentry *);
 extern void unlock_rename(struct dentry *, struct dentry *);
 
-extern void nd_jump_link(struct nameidata *nd, struct path *path);
-extern void nd_set_link(struct nameidata *nd, char *path);
-extern char *nd_get_link(struct nameidata *nd);
+extern void nd_jump_link(struct path *path);
 
 static inline void nd_terminate_link(void *name, size_t len, size_t maxlen)
 {
index 1899c74..05b9a69 100644 (file)
@@ -25,7 +25,6 @@
 #ifndef _LINUX_NETDEVICE_H
 #define _LINUX_NETDEVICE_H
 
-#include <linux/pm_qos.h>
 #include <linux/timer.h>
 #include <linux/bug.h>
 #include <linux/delay.h>
@@ -1499,8 +1498,6 @@ enum netdev_priv_flags {
  *
  *     @qdisc_tx_busylock:     XXX: need comments on this one
  *
- *     @pm_qos_req:    Power Management QoS object
- *
  *     FIXME: cleanup struct net_device such that network protocol info
  *     moves out.
  */
index ddeaae6..b871ff9 100644 (file)
@@ -121,6 +121,8 @@ extern struct device_node *of_stdout;
 extern raw_spinlock_t devtree_lock;
 
 #ifdef CONFIG_OF
+void of_core_init(void);
+
 static inline bool is_of_node(struct fwnode_handle *fwnode)
 {
        return fwnode && fwnode->type == FWNODE_OF;
@@ -376,6 +378,10 @@ bool of_console_check(struct device_node *dn, char *name, int index);
 
 #else /* CONFIG_OF */
 
+static inline void of_core_init(void)
+{
+}
+
 static inline bool is_of_node(struct fwnode_handle *fwnode)
 {
        return false;
index 3a6490e..703ea5c 100644 (file)
@@ -32,4 +32,9 @@ static inline void osq_lock_init(struct optimistic_spin_queue *lock)
 extern bool osq_lock(struct optimistic_spin_queue *lock);
 extern void osq_unlock(struct optimistic_spin_queue *lock);
 
+static inline bool osq_is_locked(struct optimistic_spin_queue *lock)
+{
+       return atomic_read(&lock->tail) != OSQ_UNLOCKED_VAL;
+}
+
 #endif
index 38cff8f..2f7b9a4 100644 (file)
 
 #define PCI_VENDOR_ID_INTEL            0x8086
 #define PCI_DEVICE_ID_INTEL_EESSC      0x0008
-#define PCI_DEVICE_ID_INTEL_SNB_IMC    0x0100
-#define PCI_DEVICE_ID_INTEL_IVB_IMC    0x0154
-#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150
-#define PCI_DEVICE_ID_INTEL_HSW_IMC    0x0c00
 #define PCI_DEVICE_ID_INTEL_PXHD_0     0x0320
 #define PCI_DEVICE_ID_INTEL_PXHD_1     0x0321
 #define PCI_DEVICE_ID_INTEL_PXH_0      0x0329
index 50e5009..84a1094 100644 (file)
@@ -41,7 +41,12 @@ void percpu_counter_destroy(struct percpu_counter *fbc);
 void percpu_counter_set(struct percpu_counter *fbc, s64 amount);
 void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch);
 s64 __percpu_counter_sum(struct percpu_counter *fbc);
-int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs);
+int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch);
+
+static inline int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs)
+{
+       return __percpu_counter_compare(fbc, rhs, percpu_counter_batch);
+}
 
 static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount)
 {
@@ -116,6 +121,12 @@ static inline int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs)
                return 0;
 }
 
+static inline int
+__percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch)
+{
+       return percpu_counter_compare(fbc, rhs);
+}
+
 static inline void
 percpu_counter_add(struct percpu_counter *fbc, s64 amount)
 {
index 61992cf..a204d52 100644 (file)
@@ -92,8 +92,6 @@ struct hw_perf_event_extra {
        int             idx;    /* index in shared_regs->regs[] */
 };
 
-struct event_constraint;
-
 /**
  * struct hw_perf_event - performance event hardware details:
  */
@@ -112,8 +110,6 @@ struct hw_perf_event {
 
                        struct hw_perf_event_extra extra_reg;
                        struct hw_perf_event_extra branch_reg;
-
-                       struct event_constraint *constraint;
                };
                struct { /* software */
                        struct hrtimer  hrtimer;
@@ -124,7 +120,7 @@ struct hw_perf_event {
                };
                struct { /* intel_cqm */
                        int                     cqm_state;
-                       int                     cqm_rmid;
+                       u32                     cqm_rmid;
                        struct list_head        cqm_events_entry;
                        struct list_head        cqm_groups_entry;
                        struct list_head        cqm_group_entry;
@@ -734,6 +730,22 @@ extern int perf_event_overflow(struct perf_event *event,
                                 struct perf_sample_data *data,
                                 struct pt_regs *regs);
 
+extern void perf_event_output(struct perf_event *event,
+                               struct perf_sample_data *data,
+                               struct pt_regs *regs);
+
+extern void
+perf_event_header__init_id(struct perf_event_header *header,
+                          struct perf_sample_data *data,
+                          struct perf_event *event);
+extern void
+perf_event__output_id_sample(struct perf_event *event,
+                            struct perf_output_handle *handle,
+                            struct perf_sample_data *sample);
+
+extern void
+perf_log_lost_samples(struct perf_event *event, u64 lost);
+
 static inline bool is_sampling_event(struct perf_event *event)
 {
        return event->attr.sample_period != 0;
@@ -798,11 +810,33 @@ perf_sw_event_sched(u32 event_id, u64 nr, u64 addr)
 
 extern struct static_key_deferred perf_sched_events;
 
+static __always_inline bool
+perf_sw_migrate_enabled(void)
+{
+       if (static_key_false(&perf_swevent_enabled[PERF_COUNT_SW_CPU_MIGRATIONS]))
+               return true;
+       return false;
+}
+
+static inline void perf_event_task_migrate(struct task_struct *task)
+{
+       if (perf_sw_migrate_enabled())
+               task->sched_migrated = 1;
+}
+
 static inline void perf_event_task_sched_in(struct task_struct *prev,
                                            struct task_struct *task)
 {
        if (static_key_false(&perf_sched_events.key))
                __perf_event_task_sched_in(prev, task);
+
+       if (perf_sw_migrate_enabled() && task->sched_migrated) {
+               struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]);
+
+               perf_fetch_caller_regs(regs);
+               ___perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, regs, 0);
+               task->sched_migrated = 0;
+       }
 }
 
 static inline void perf_event_task_sched_out(struct task_struct *prev,
@@ -925,6 +959,8 @@ perf_aux_output_skip(struct perf_output_handle *handle,
 static inline void *
 perf_get_aux(struct perf_output_handle *handle)                                { return NULL; }
 static inline void
+perf_event_task_migrate(struct task_struct *task)                      { }
+static inline void
 perf_event_task_sched_in(struct task_struct *prev,
                         struct task_struct *task)                      { }
 static inline void
index a947ab8..533d980 100644 (file)
@@ -5,8 +5,6 @@
 #ifndef __LINUX_PLATFORM_DATA_SI5351_H__
 #define __LINUX_PLATFORM_DATA_SI5351_H__
 
-struct clk;
-
 /**
  * enum si5351_pll_src - Si5351 pll clock source
  * @SI5351_PLL_SRC_DEFAULT: default, do not change eeprom config
@@ -107,8 +105,6 @@ struct si5351_clkout_config {
  * @clkout: array of clkout configuration
  */
 struct si5351_platform_data {
-       struct clk *clk_xtal;
-       struct clk *clk_clkin;
        enum si5351_pll_src pll_src[2];
        struct si5351_clkout_config clkout[8];
 };
index de83b4e..0f1534a 100644 (file)
 #include <linux/list.h>
 
 /*
- * We use the MSB mostly because its available; see <linux/preempt_mask.h> for
- * the other bits -- can't include that header due to inclusion hell.
+ * We put the hardirq and softirq counter into the preemption
+ * counter. The bitmask has the following meaning:
+ *
+ * - bits 0-7 are the preemption count (max preemption depth: 256)
+ * - bits 8-15 are the softirq count (max # of softirqs: 256)
+ *
+ * The hardirq count could in theory be the same as the number of
+ * interrupts in the system, but we run all interrupt handlers with
+ * interrupts disabled, so we cannot have nesting interrupts. Though
+ * there are a few palaeontologic drivers which reenable interrupts in
+ * the handler, so we need more than one bit here.
+ *
+ *         PREEMPT_MASK:       0x000000ff
+ *         SOFTIRQ_MASK:       0x0000ff00
+ *         HARDIRQ_MASK:       0x000f0000
+ *             NMI_MASK:       0x00100000
+ *       PREEMPT_ACTIVE:       0x00200000
+ * PREEMPT_NEED_RESCHED:       0x80000000
  */
+#define PREEMPT_BITS   8
+#define SOFTIRQ_BITS   8
+#define HARDIRQ_BITS   4
+#define NMI_BITS       1
+
+#define PREEMPT_SHIFT  0
+#define SOFTIRQ_SHIFT  (PREEMPT_SHIFT + PREEMPT_BITS)
+#define HARDIRQ_SHIFT  (SOFTIRQ_SHIFT + SOFTIRQ_BITS)
+#define NMI_SHIFT      (HARDIRQ_SHIFT + HARDIRQ_BITS)
+
+#define __IRQ_MASK(x)  ((1UL << (x))-1)
+
+#define PREEMPT_MASK   (__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT)
+#define SOFTIRQ_MASK   (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT)
+#define HARDIRQ_MASK   (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT)
+#define NMI_MASK       (__IRQ_MASK(NMI_BITS)     << NMI_SHIFT)
+
+#define PREEMPT_OFFSET (1UL << PREEMPT_SHIFT)
+#define SOFTIRQ_OFFSET (1UL << SOFTIRQ_SHIFT)
+#define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT)
+#define NMI_OFFSET     (1UL << NMI_SHIFT)
+
+#define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET)
+
+#define PREEMPT_ACTIVE_BITS    1
+#define PREEMPT_ACTIVE_SHIFT   (NMI_SHIFT + NMI_BITS)
+#define PREEMPT_ACTIVE (__IRQ_MASK(PREEMPT_ACTIVE_BITS) << PREEMPT_ACTIVE_SHIFT)
+
+/* We use the MSB mostly because its available */
 #define PREEMPT_NEED_RESCHED   0x80000000
 
+/* preempt_count() and related functions, depends on PREEMPT_NEED_RESCHED */
 #include <asm/preempt.h>
 
+#define hardirq_count()        (preempt_count() & HARDIRQ_MASK)
+#define softirq_count()        (preempt_count() & SOFTIRQ_MASK)
+#define irq_count()    (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \
+                                | NMI_MASK))
+
+/*
+ * Are we doing bottom half or hardware interrupt processing?
+ * Are we in a softirq context? Interrupt context?
+ * in_softirq - Are we currently processing softirq or have bh disabled?
+ * in_serving_softirq - Are we currently processing softirq?
+ */
+#define in_irq()               (hardirq_count())
+#define in_softirq()           (softirq_count())
+#define in_interrupt()         (irq_count())
+#define in_serving_softirq()   (softirq_count() & SOFTIRQ_OFFSET)
+
+/*
+ * Are we in NMI context?
+ */
+#define in_nmi()       (preempt_count() & NMI_MASK)
+
+#if defined(CONFIG_PREEMPT_COUNT)
+# define PREEMPT_DISABLE_OFFSET 1
+#else
+# define PREEMPT_DISABLE_OFFSET 0
+#endif
+
+/*
+ * The preempt_count offset needed for things like:
+ *
+ *  spin_lock_bh()
+ *
+ * Which need to disable both preemption (CONFIG_PREEMPT_COUNT) and
+ * softirqs, such that unlock sequences of:
+ *
+ *  spin_unlock();
+ *  local_bh_enable();
+ *
+ * Work as expected.
+ */
+#define SOFTIRQ_LOCK_OFFSET (SOFTIRQ_DISABLE_OFFSET + PREEMPT_DISABLE_OFFSET)
+
+/*
+ * Are we running in atomic context?  WARNING: this macro cannot
+ * always detect atomic context; in particular, it cannot know about
+ * held spinlocks in non-preemptible kernels.  Thus it should not be
+ * used in the general case to determine whether sleeping is possible.
+ * Do not use in_atomic() in driver code.
+ */
+#define in_atomic()    (preempt_count() != 0)
+
+/*
+ * Check whether we were atomic before we did preempt_disable():
+ * (used by the scheduler)
+ */
+#define in_atomic_preempt_off() \
+               ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_DISABLE_OFFSET)
+
 #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER)
 extern void preempt_count_add(int val);
 extern void preempt_count_sub(int val);
@@ -33,6 +137,18 @@ extern void preempt_count_sub(int val);
 #define preempt_count_inc() preempt_count_add(1)
 #define preempt_count_dec() preempt_count_sub(1)
 
+#define preempt_active_enter() \
+do { \
+       preempt_count_add(PREEMPT_ACTIVE + PREEMPT_DISABLE_OFFSET); \
+       barrier(); \
+} while (0)
+
+#define preempt_active_exit() \
+do { \
+       barrier(); \
+       preempt_count_sub(PREEMPT_ACTIVE + PREEMPT_DISABLE_OFFSET); \
+} while (0)
+
 #ifdef CONFIG_PREEMPT_COUNT
 
 #define preempt_disable() \
@@ -49,6 +165,8 @@ do { \
 
 #define preempt_enable_no_resched() sched_preempt_enable_no_resched()
 
+#define preemptible()  (preempt_count() == 0 && !irqs_disabled())
+
 #ifdef CONFIG_PREEMPT
 #define preempt_enable() \
 do { \
@@ -57,52 +175,46 @@ do { \
                __preempt_schedule(); \
 } while (0)
 
+#define preempt_enable_notrace() \
+do { \
+       barrier(); \
+       if (unlikely(__preempt_count_dec_and_test())) \
+               __preempt_schedule_notrace(); \
+} while (0)
+
 #define preempt_check_resched() \
 do { \
        if (should_resched()) \
                __preempt_schedule(); \
 } while (0)
 
-#else
+#else /* !CONFIG_PREEMPT */
 #define preempt_enable() \
 do { \
        barrier(); \
        preempt_count_dec(); \
 } while (0)
-#define preempt_check_resched() do { } while (0)
-#endif
-
-#define preempt_disable_notrace() \
-do { \
-       __preempt_count_inc(); \
-       barrier(); \
-} while (0)
 
-#define preempt_enable_no_resched_notrace() \
+#define preempt_enable_notrace() \
 do { \
        barrier(); \
        __preempt_count_dec(); \
 } while (0)
 
-#ifdef CONFIG_PREEMPT
-
-#ifndef CONFIG_CONTEXT_TRACKING
-#define __preempt_schedule_context() __preempt_schedule()
-#endif
+#define preempt_check_resched() do { } while (0)
+#endif /* CONFIG_PREEMPT */
 
-#define preempt_enable_notrace() \
+#define preempt_disable_notrace() \
 do { \
+       __preempt_count_inc(); \
        barrier(); \
-       if (unlikely(__preempt_count_dec_and_test())) \
-               __preempt_schedule_context(); \
 } while (0)
-#else
-#define preempt_enable_notrace() \
+
+#define preempt_enable_no_resched_notrace() \
 do { \
        barrier(); \
        __preempt_count_dec(); \
 } while (0)
-#endif
 
 #else /* !CONFIG_PREEMPT_COUNT */
 
@@ -121,6 +233,7 @@ do { \
 #define preempt_disable_notrace()              barrier()
 #define preempt_enable_no_resched_notrace()    barrier()
 #define preempt_enable_notrace()               barrier()
+#define preemptible()                          0
 
 #endif /* CONFIG_PREEMPT_COUNT */
 
diff --git a/include/linux/preempt_mask.h b/include/linux/preempt_mask.h
deleted file mode 100644 (file)
index dbeec4d..0000000
+++ /dev/null
@@ -1,117 +0,0 @@
-#ifndef LINUX_PREEMPT_MASK_H
-#define LINUX_PREEMPT_MASK_H
-
-#include <linux/preempt.h>
-
-/*
- * We put the hardirq and softirq counter into the preemption
- * counter. The bitmask has the following meaning:
- *
- * - bits 0-7 are the preemption count (max preemption depth: 256)
- * - bits 8-15 are the softirq count (max # of softirqs: 256)
- *
- * The hardirq count could in theory be the same as the number of
- * interrupts in the system, but we run all interrupt handlers with
- * interrupts disabled, so we cannot have nesting interrupts. Though
- * there are a few palaeontologic drivers which reenable interrupts in
- * the handler, so we need more than one bit here.
- *
- * PREEMPT_MASK:       0x000000ff
- * SOFTIRQ_MASK:       0x0000ff00
- * HARDIRQ_MASK:       0x000f0000
- *     NMI_MASK:       0x00100000
- * PREEMPT_ACTIVE:     0x00200000
- */
-#define PREEMPT_BITS   8
-#define SOFTIRQ_BITS   8
-#define HARDIRQ_BITS   4
-#define NMI_BITS       1
-
-#define PREEMPT_SHIFT  0
-#define SOFTIRQ_SHIFT  (PREEMPT_SHIFT + PREEMPT_BITS)
-#define HARDIRQ_SHIFT  (SOFTIRQ_SHIFT + SOFTIRQ_BITS)
-#define NMI_SHIFT      (HARDIRQ_SHIFT + HARDIRQ_BITS)
-
-#define __IRQ_MASK(x)  ((1UL << (x))-1)
-
-#define PREEMPT_MASK   (__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT)
-#define SOFTIRQ_MASK   (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT)
-#define HARDIRQ_MASK   (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT)
-#define NMI_MASK       (__IRQ_MASK(NMI_BITS)     << NMI_SHIFT)
-
-#define PREEMPT_OFFSET (1UL << PREEMPT_SHIFT)
-#define SOFTIRQ_OFFSET (1UL << SOFTIRQ_SHIFT)
-#define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT)
-#define NMI_OFFSET     (1UL << NMI_SHIFT)
-
-#define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET)
-
-#define PREEMPT_ACTIVE_BITS    1
-#define PREEMPT_ACTIVE_SHIFT   (NMI_SHIFT + NMI_BITS)
-#define PREEMPT_ACTIVE (__IRQ_MASK(PREEMPT_ACTIVE_BITS) << PREEMPT_ACTIVE_SHIFT)
-
-#define hardirq_count()        (preempt_count() & HARDIRQ_MASK)
-#define softirq_count()        (preempt_count() & SOFTIRQ_MASK)
-#define irq_count()    (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \
-                                | NMI_MASK))
-
-/*
- * Are we doing bottom half or hardware interrupt processing?
- * Are we in a softirq context? Interrupt context?
- * in_softirq - Are we currently processing softirq or have bh disabled?
- * in_serving_softirq - Are we currently processing softirq?
- */
-#define in_irq()               (hardirq_count())
-#define in_softirq()           (softirq_count())
-#define in_interrupt()         (irq_count())
-#define in_serving_softirq()   (softirq_count() & SOFTIRQ_OFFSET)
-
-/*
- * Are we in NMI context?
- */
-#define in_nmi()       (preempt_count() & NMI_MASK)
-
-#if defined(CONFIG_PREEMPT_COUNT)
-# define PREEMPT_CHECK_OFFSET 1
-#else
-# define PREEMPT_CHECK_OFFSET 0
-#endif
-
-/*
- * The preempt_count offset needed for things like:
- *
- *  spin_lock_bh()
- *
- * Which need to disable both preemption (CONFIG_PREEMPT_COUNT) and
- * softirqs, such that unlock sequences of:
- *
- *  spin_unlock();
- *  local_bh_enable();
- *
- * Work as expected.
- */
-#define SOFTIRQ_LOCK_OFFSET (SOFTIRQ_DISABLE_OFFSET + PREEMPT_CHECK_OFFSET)
-
-/*
- * Are we running in atomic context?  WARNING: this macro cannot
- * always detect atomic context; in particular, it cannot know about
- * held spinlocks in non-preemptible kernels.  Thus it should not be
- * used in the general case to determine whether sleeping is possible.
- * Do not use in_atomic() in driver code.
- */
-#define in_atomic()    ((preempt_count() & ~PREEMPT_ACTIVE) != 0)
-
-/*
- * Check whether we were atomic before we did preempt_disable():
- * (used by the scheduler, *after* releasing the kernel lock)
- */
-#define in_atomic_preempt_off() \
-               ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET)
-
-#ifdef CONFIG_PREEMPT_COUNT
-# define preemptible() (preempt_count() == 0 && !irqs_disabled())
-#else
-# define preemptible() 0
-#endif
-
-#endif /* LINUX_PREEMPT_MASK_H */
index a18b16f..17c6b1f 100644 (file)
@@ -29,8 +29,8 @@
  */
 static inline void INIT_LIST_HEAD_RCU(struct list_head *list)
 {
-       ACCESS_ONCE(list->next) = list;
-       ACCESS_ONCE(list->prev) = list;
+       WRITE_ONCE(list->next, list);
+       WRITE_ONCE(list->prev, list);
 }
 
 /*
@@ -288,7 +288,7 @@ static inline void list_splice_init_rcu(struct list_head *list,
 #define list_first_or_null_rcu(ptr, type, member) \
 ({ \
        struct list_head *__ptr = (ptr); \
-       struct list_head *__next = ACCESS_ONCE(__ptr->next); \
+       struct list_head *__next = READ_ONCE(__ptr->next); \
        likely(__ptr != __next) ? list_entry_rcu(__next, type, member) : NULL; \
 })
 
@@ -549,8 +549,8 @@ static inline void hlist_add_behind_rcu(struct hlist_node *n,
  */
 #define hlist_for_each_entry_from_rcu(pos, member)                     \
        for (; pos;                                                     \
-            pos = hlist_entry_safe(rcu_dereference((pos)->member.next),\
-                       typeof(*(pos)), member))
+            pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu( \
+                       &(pos)->member)), typeof(*(pos)), member))
 
 #endif /* __KERNEL__ */
 #endif
index 573a5af..03a899a 100644 (file)
@@ -292,10 +292,6 @@ void rcu_sched_qs(void);
 void rcu_bh_qs(void);
 void rcu_check_callbacks(int user);
 struct notifier_block;
-void rcu_idle_enter(void);
-void rcu_idle_exit(void);
-void rcu_irq_enter(void);
-void rcu_irq_exit(void);
 int rcu_cpu_notify(struct notifier_block *self,
                   unsigned long action, void *hcpu);
 
@@ -364,8 +360,8 @@ extern struct srcu_struct tasks_rcu_exit_srcu;
 #define rcu_note_voluntary_context_switch(t) \
        do { \
                rcu_all_qs(); \
-               if (ACCESS_ONCE((t)->rcu_tasks_holdout)) \
-                       ACCESS_ONCE((t)->rcu_tasks_holdout) = false; \
+               if (READ_ONCE((t)->rcu_tasks_holdout)) \
+                       WRITE_ONCE((t)->rcu_tasks_holdout, false); \
        } while (0)
 #else /* #ifdef CONFIG_TASKS_RCU */
 #define TASKS_RCU(x) do { } while (0)
@@ -609,7 +605,7 @@ static inline void rcu_preempt_sleep_check(void)
 
 #define __rcu_access_pointer(p, space) \
 ({ \
-       typeof(*p) *_________p1 = (typeof(*p) *__force)ACCESS_ONCE(p); \
+       typeof(*p) *_________p1 = (typeof(*p) *__force)READ_ONCE(p); \
        rcu_dereference_sparse(p, space); \
        ((typeof(*p) __force __kernel *)(_________p1)); \
 })
@@ -628,21 +624,6 @@ static inline void rcu_preempt_sleep_check(void)
        ((typeof(*p) __force __kernel *)(p)); \
 })
 
-#define __rcu_access_index(p, space) \
-({ \
-       typeof(p) _________p1 = ACCESS_ONCE(p); \
-       rcu_dereference_sparse(p, space); \
-       (_________p1); \
-})
-#define __rcu_dereference_index_check(p, c) \
-({ \
-       /* Dependency order vs. p above. */ \
-       typeof(p) _________p1 = lockless_dereference(p); \
-       rcu_lockdep_assert(c, \
-                          "suspicious rcu_dereference_index_check() usage"); \
-       (_________p1); \
-})
-
 /**
  * RCU_INITIALIZER() - statically initialize an RCU-protected global variable
  * @v: The value to statically initialize with.
@@ -659,7 +640,7 @@ static inline void rcu_preempt_sleep_check(void)
  */
 #define lockless_dereference(p) \
 ({ \
-       typeof(p) _________p1 = ACCESS_ONCE(p); \
+       typeof(p) _________p1 = READ_ONCE(p); \
        smp_read_barrier_depends(); /* Dependency order vs. p above. */ \
        (_________p1); \
 })
@@ -702,7 +683,7 @@ static inline void rcu_preempt_sleep_check(void)
  * @p: The pointer to read
  *
  * Return the value of the specified RCU-protected pointer, but omit the
- * smp_read_barrier_depends() and keep the ACCESS_ONCE().  This is useful
+ * smp_read_barrier_depends() and keep the READ_ONCE().  This is useful
  * when the value of this pointer is accessed, but the pointer is not
  * dereferenced, for example, when testing an RCU-protected pointer against
  * NULL.  Although rcu_access_pointer() may also be used in cases where
@@ -787,47 +768,12 @@ static inline void rcu_preempt_sleep_check(void)
 #define rcu_dereference_raw_notrace(p) __rcu_dereference_check((p), 1, __rcu)
 
 /**
- * rcu_access_index() - fetch RCU index with no dereferencing
- * @p: The index to read
- *
- * Return the value of the specified RCU-protected index, but omit the
- * smp_read_barrier_depends() and keep the ACCESS_ONCE().  This is useful
- * when the value of this index is accessed, but the index is not
- * dereferenced, for example, when testing an RCU-protected index against
- * -1.  Although rcu_access_index() may also be used in cases where
- * update-side locks prevent the value of the index from changing, you
- * should instead use rcu_dereference_index_protected() for this use case.
- */
-#define rcu_access_index(p) __rcu_access_index((p), __rcu)
-
-/**
- * rcu_dereference_index_check() - rcu_dereference for indices with debug checking
- * @p: The pointer to read, prior to dereferencing
- * @c: The conditions under which the dereference will take place
- *
- * Similar to rcu_dereference_check(), but omits the sparse checking.
- * This allows rcu_dereference_index_check() to be used on integers,
- * which can then be used as array indices.  Attempting to use
- * rcu_dereference_check() on an integer will give compiler warnings
- * because the sparse address-space mechanism relies on dereferencing
- * the RCU-protected pointer.  Dereferencing integers is not something
- * that even gcc will put up with.
- *
- * Note that this function does not implicitly check for RCU read-side
- * critical sections.  If this function gains lots of uses, it might
- * make sense to provide versions for each flavor of RCU, but it does
- * not make sense as of early 2010.
- */
-#define rcu_dereference_index_check(p, c) \
-       __rcu_dereference_index_check((p), (c))
-
-/**
  * rcu_dereference_protected() - fetch RCU pointer when updates prevented
  * @p: The pointer to read, prior to dereferencing
  * @c: The conditions under which the dereference will take place
  *
  * Return the value of the specified RCU-protected pointer, but omit
- * both the smp_read_barrier_depends() and the ACCESS_ONCE().  This
+ * both the smp_read_barrier_depends() and the READ_ONCE().  This
  * is useful in cases where update-side locks prevent the value of the
  * pointer from changing.  Please note that this primitive does -not-
  * prevent the compiler from repeating this reference or combining it
@@ -1153,13 +1099,13 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
 #define kfree_rcu(ptr, rcu_head)                                       \
        __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head))
 
-#if defined(CONFIG_TINY_RCU) || defined(CONFIG_RCU_NOCB_CPU_ALL)
+#ifdef CONFIG_TINY_RCU
 static inline int rcu_needs_cpu(unsigned long *delta_jiffies)
 {
        *delta_jiffies = ULONG_MAX;
        return 0;
 }
-#endif /* #if defined(CONFIG_TINY_RCU) || defined(CONFIG_RCU_NOCB_CPU_ALL) */
+#endif /* #ifdef CONFIG_TINY_RCU */
 
 #if defined(CONFIG_RCU_NOCB_CPU_ALL)
 static inline bool rcu_is_nocb_cpu(int cpu) { return true; }
index 937edae..3df6c1e 100644 (file)
@@ -159,6 +159,22 @@ static inline void rcu_cpu_stall_reset(void)
 {
 }
 
+static inline void rcu_idle_enter(void)
+{
+}
+
+static inline void rcu_idle_exit(void)
+{
+}
+
+static inline void rcu_irq_enter(void)
+{
+}
+
+static inline void rcu_irq_exit(void)
+{
+}
+
 static inline void exit_rcu(void)
 {
 }
index d2e583a..3fa4a43 100644 (file)
@@ -31,9 +31,7 @@
 #define __LINUX_RCUTREE_H
 
 void rcu_note_context_switch(void);
-#ifndef CONFIG_RCU_NOCB_CPU_ALL
 int rcu_needs_cpu(unsigned long *delta_jiffies);
-#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */
 void rcu_cpu_stall_reset(void);
 
 /*
@@ -93,6 +91,11 @@ void rcu_force_quiescent_state(void);
 void rcu_bh_force_quiescent_state(void);
 void rcu_sched_force_quiescent_state(void);
 
+void rcu_idle_enter(void);
+void rcu_idle_exit(void);
+void rcu_irq_enter(void);
+void rcu_irq_exit(void);
+
 void exit_rcu(void);
 
 void rcu_scheduler_starting(void);
index dbcbcc5..843ceca 100644 (file)
@@ -17,6 +17,7 @@
 #ifndef _LINUX_RHASHTABLE_H
 #define _LINUX_RHASHTABLE_H
 
+#include <linux/atomic.h>
 #include <linux/compiler.h>
 #include <linux/errno.h>
 #include <linux/jhash.h>
@@ -100,6 +101,7 @@ struct rhashtable;
  * @key_len: Length of key
  * @key_offset: Offset of key in struct to be hashed
  * @head_offset: Offset of rhash_head in struct to be hashed
+ * @insecure_max_entries: Maximum number of entries (may be exceeded)
  * @max_size: Maximum size while expanding
  * @min_size: Minimum size while shrinking
  * @nulls_base: Base value to generate nulls marker
@@ -115,6 +117,7 @@ struct rhashtable_params {
        size_t                  key_len;
        size_t                  key_offset;
        size_t                  head_offset;
+       unsigned int            insecure_max_entries;
        unsigned int            max_size;
        unsigned int            min_size;
        u32                     nulls_base;
@@ -286,6 +289,18 @@ static inline bool rht_grow_above_100(const struct rhashtable *ht,
                (!ht->p.max_size || tbl->size < ht->p.max_size);
 }
 
+/**
+ * rht_grow_above_max - returns true if table is above maximum
+ * @ht:                hash table
+ * @tbl:       current table
+ */
+static inline bool rht_grow_above_max(const struct rhashtable *ht,
+                                     const struct bucket_table *tbl)
+{
+       return ht->p.insecure_max_entries &&
+              atomic_read(&ht->nelems) >= ht->p.insecure_max_entries;
+}
+
 /* The bucket lock is selected based on the hash and protects mutations
  * on a group of hash buckets.
  *
@@ -589,6 +604,10 @@ restart:
                goto out;
        }
 
+       err = -E2BIG;
+       if (unlikely(rht_grow_above_max(ht, tbl)))
+               goto out;
+
        if (unlikely(rht_grow_above_100(ht, tbl))) {
 slow_path:
                spin_unlock_bh(lock);
index 26a2e61..d4193d5 100644 (file)
@@ -25,7 +25,7 @@ struct sched_param {
 #include <linux/errno.h>
 #include <linux/nodemask.h>
 #include <linux/mm_types.h>
-#include <linux/preempt_mask.h>
+#include <linux/preempt.h>
 
 #include <asm/page.h>
 #include <asm/ptrace.h>
@@ -132,6 +132,7 @@ struct fs_struct;
 struct perf_event_context;
 struct blk_plug;
 struct filename;
+struct nameidata;
 
 #define VMACACHE_BITS 2
 #define VMACACHE_SIZE (1U << VMACACHE_BITS)
@@ -173,7 +174,12 @@ extern unsigned long nr_iowait_cpu(int cpu);
 extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load);
 
 extern void calc_global_load(unsigned long ticks);
+
+#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
 extern void update_cpu_load_nohz(void);
+#else
+static inline void update_cpu_load_nohz(void) { }
+#endif
 
 extern unsigned long get_parent_ip(unsigned long addr);
 
@@ -213,9 +219,10 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
 #define TASK_WAKEKILL          128
 #define TASK_WAKING            256
 #define TASK_PARKED            512
-#define TASK_STATE_MAX         1024
+#define TASK_NOLOAD            1024
+#define TASK_STATE_MAX         2048
 
-#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWP"
+#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPN"
 
 extern char ___assert_task_state[1 - 2*!!(
                sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)];
@@ -225,6 +232,8 @@ extern char ___assert_task_state[1 - 2*!!(
 #define TASK_STOPPED           (TASK_WAKEKILL | __TASK_STOPPED)
 #define TASK_TRACED            (TASK_WAKEKILL | __TASK_TRACED)
 
+#define TASK_IDLE              (TASK_UNINTERRUPTIBLE | TASK_NOLOAD)
+
 /* Convenience macros for the sake of wake_up */
 #define TASK_NORMAL            (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)
 #define TASK_ALL               (TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED)
@@ -240,7 +249,8 @@ extern char ___assert_task_state[1 - 2*!!(
                        ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0)
 #define task_contributes_to_load(task) \
                                ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \
-                                (task->flags & PF_FROZEN) == 0)
+                                (task->flags & PF_FROZEN) == 0 && \
+                                (task->state & TASK_NOLOAD) == 0)
 
 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
 
@@ -252,7 +262,7 @@ extern char ___assert_task_state[1 - 2*!!(
 #define set_task_state(tsk, state_value)                       \
        do {                                                    \
                (tsk)->task_state_change = _THIS_IP_;           \
-               set_mb((tsk)->state, (state_value));            \
+               smp_store_mb((tsk)->state, (state_value));              \
        } while (0)
 
 /*
@@ -274,7 +284,7 @@ extern char ___assert_task_state[1 - 2*!!(
 #define set_current_state(state_value)                         \
        do {                                                    \
                current->task_state_change = _THIS_IP_;         \
-               set_mb(current->state, (state_value));          \
+               smp_store_mb(current->state, (state_value));            \
        } while (0)
 
 #else
@@ -282,7 +292,7 @@ extern char ___assert_task_state[1 - 2*!!(
 #define __set_task_state(tsk, state_value)             \
        do { (tsk)->state = (state_value); } while (0)
 #define set_task_state(tsk, state_value)               \
-       set_mb((tsk)->state, (state_value))
+       smp_store_mb((tsk)->state, (state_value))
 
 /*
  * set_current_state() includes a barrier so that the write of current->state
@@ -298,7 +308,7 @@ extern char ___assert_task_state[1 - 2*!!(
 #define __set_current_state(state_value)               \
        do { current->state = (state_value); } while (0)
 #define set_current_state(state_value)                 \
-       set_mb(current->state, (state_value))
+       smp_store_mb(current->state, (state_value))
 
 #endif
 
@@ -567,6 +577,23 @@ struct task_cputime {
                .sum_exec_runtime = 0,                          \
        }
 
+/*
+ * This is the atomic variant of task_cputime, which can be used for
+ * storing and updating task_cputime statistics without locking.
+ */
+struct task_cputime_atomic {
+       atomic64_t utime;
+       atomic64_t stime;
+       atomic64_t sum_exec_runtime;
+};
+
+#define INIT_CPUTIME_ATOMIC \
+       (struct task_cputime_atomic) {                          \
+               .utime = ATOMIC64_INIT(0),                      \
+               .stime = ATOMIC64_INIT(0),                      \
+               .sum_exec_runtime = ATOMIC64_INIT(0),           \
+       }
+
 #ifdef CONFIG_PREEMPT_COUNT
 #define PREEMPT_DISABLED       (1 + PREEMPT_ENABLED)
 #else
@@ -584,18 +611,16 @@ struct task_cputime {
 
 /**
  * struct thread_group_cputimer - thread group interval timer counts
- * @cputime:           thread group interval timers.
+ * @cputime_atomic:    atomic thread group interval timers.
  * @running:           non-zero when there are timers running and
  *                     @cputime receives updates.
- * @lock:              lock for fields in this struct.
  *
  * This structure contains the version of task_cputime, above, that is
  * used for thread group CPU timer calculations.
  */
 struct thread_group_cputimer {
-       struct task_cputime cputime;
+       struct task_cputime_atomic cputime_atomic;
        int running;
-       raw_spinlock_t lock;
 };
 
 #include <linux/rwsem.h>
@@ -900,6 +925,50 @@ enum cpu_idle_type {
 #define SCHED_CAPACITY_SCALE   (1L << SCHED_CAPACITY_SHIFT)
 
 /*
+ * Wake-queues are lists of tasks with a pending wakeup, whose
+ * callers have already marked the task as woken internally,
+ * and can thus carry on. A common use case is being able to
+ * do the wakeups once the corresponding user lock as been
+ * released.
+ *
+ * We hold reference to each task in the list across the wakeup,
+ * thus guaranteeing that the memory is still valid by the time
+ * the actual wakeups are performed in wake_up_q().
+ *
+ * One per task suffices, because there's never a need for a task to be
+ * in two wake queues simultaneously; it is forbidden to abandon a task
+ * in a wake queue (a call to wake_up_q() _must_ follow), so if a task is
+ * already in a wake queue, the wakeup will happen soon and the second
+ * waker can just skip it.
+ *
+ * The WAKE_Q macro declares and initializes the list head.
+ * wake_up_q() does NOT reinitialize the list; it's expected to be
+ * called near the end of a function, where the fact that the queue is
+ * not used again will be easy to see by inspection.
+ *
+ * Note that this can cause spurious wakeups. schedule() callers
+ * must ensure the call is done inside a loop, confirming that the
+ * wakeup condition has in fact occurred.
+ */
+struct wake_q_node {
+       struct wake_q_node *next;
+};
+
+struct wake_q_head {
+       struct wake_q_node *first;
+       struct wake_q_node **lastp;
+};
+
+#define WAKE_Q_TAIL ((struct wake_q_node *) 0x01)
+
+#define WAKE_Q(name)                                   \
+       struct wake_q_head name = { WAKE_Q_TAIL, &name.first }
+
+extern void wake_q_add(struct wake_q_head *head,
+                      struct task_struct *task);
+extern void wake_up_q(struct wake_q_head *head);
+
+/*
  * sched-domains (multiprocessor balancing) declarations:
  */
 #ifdef CONFIG_SMP
@@ -1334,8 +1403,6 @@ struct task_struct {
        int rcu_read_lock_nesting;
        union rcu_special rcu_read_unlock_special;
        struct list_head rcu_node_entry;
-#endif /* #ifdef CONFIG_PREEMPT_RCU */
-#ifdef CONFIG_PREEMPT_RCU
        struct rcu_node *rcu_blocked_node;
 #endif /* #ifdef CONFIG_PREEMPT_RCU */
 #ifdef CONFIG_TASKS_RCU
@@ -1356,9 +1423,6 @@ struct task_struct {
 #endif
 
        struct mm_struct *mm, *active_mm;
-#ifdef CONFIG_COMPAT_BRK
-       unsigned brk_randomized:1;
-#endif
        /* per-thread vma caching */
        u32 vmacache_seqnum;
        struct vm_area_struct *vmacache[VMACACHE_SIZE];
@@ -1369,7 +1433,7 @@ struct task_struct {
        int exit_state;
        int exit_code, exit_signal;
        int pdeath_signal;  /*  The signal sent when the parent dies  */
-       unsigned int jobctl;    /* JOBCTL_*, siglock protected */
+       unsigned long jobctl;   /* JOBCTL_*, siglock protected */
 
        /* Used for emulating ABI behavior of previous Linux versions */
        unsigned int personality;
@@ -1381,10 +1445,14 @@ struct task_struct {
        /* Revert to default priority/policy when forking */
        unsigned sched_reset_on_fork:1;
        unsigned sched_contributes_to_load:1;
+       unsigned sched_migrated:1;
 
 #ifdef CONFIG_MEMCG_KMEM
        unsigned memcg_kmem_skip_account:1;
 #endif
+#ifdef CONFIG_COMPAT_BRK
+       unsigned brk_randomized:1;
+#endif
 
        unsigned long atomic_flags; /* Flags needing atomic access. */
 
@@ -1461,7 +1529,7 @@ struct task_struct {
                                       it with task_lock())
                                     - initialized normally by setup_new_exec */
 /* file system info */
-       int link_count, total_link_count;
+       struct nameidata *nameidata;
 #ifdef CONFIG_SYSVIPC
 /* ipc stuff */
        struct sysv_sem sysvsem;
@@ -1511,6 +1579,8 @@ struct task_struct {
        /* Protection of the PI data structures: */
        raw_spinlock_t pi_lock;
 
+       struct wake_q_node wake_q;
+
 #ifdef CONFIG_RT_MUTEXES
        /* PI waiters blocked on a rt_mutex held by this task */
        struct rb_root pi_waiters;
@@ -1724,6 +1794,7 @@ struct task_struct {
 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
        unsigned long   task_state_change;
 #endif
+       int pagefault_disabled;
 };
 
 /* Future-safe accessor for struct task_struct's cpus_allowed. */
@@ -2077,22 +2148,22 @@ TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab)
 #define JOBCTL_TRAPPING_BIT    21      /* switching to TRACED */
 #define JOBCTL_LISTENING_BIT   22      /* ptracer is listening for events */
 
-#define JOBCTL_STOP_DEQUEUED   (1 << JOBCTL_STOP_DEQUEUED_BIT)
-#define JOBCTL_STOP_PENDING    (1 << JOBCTL_STOP_PENDING_BIT)
-#define JOBCTL_STOP_CONSUME    (1 << JOBCTL_STOP_CONSUME_BIT)
-#define JOBCTL_TRAP_STOP       (1 << JOBCTL_TRAP_STOP_BIT)
-#define JOBCTL_TRAP_NOTIFY     (1 << JOBCTL_TRAP_NOTIFY_BIT)
-#define JOBCTL_TRAPPING                (1 << JOBCTL_TRAPPING_BIT)
-#define JOBCTL_LISTENING       (1 << JOBCTL_LISTENING_BIT)
+#define JOBCTL_STOP_DEQUEUED   (1UL << JOBCTL_STOP_DEQUEUED_BIT)
+#define JOBCTL_STOP_PENDING    (1UL << JOBCTL_STOP_PENDING_BIT)
+#define JOBCTL_STOP_CONSUME    (1UL << JOBCTL_STOP_CONSUME_BIT)
+#define JOBCTL_TRAP_STOP       (1UL << JOBCTL_TRAP_STOP_BIT)
+#define JOBCTL_TRAP_NOTIFY     (1UL << JOBCTL_TRAP_NOTIFY_BIT)
+#define JOBCTL_TRAPPING                (1UL << JOBCTL_TRAPPING_BIT)
+#define JOBCTL_LISTENING       (1UL << JOBCTL_LISTENING_BIT)
 
 #define JOBCTL_TRAP_MASK       (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY)
 #define JOBCTL_PENDING_MASK    (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK)
 
 extern bool task_set_jobctl_pending(struct task_struct *task,
-                                   unsigned int mask);
+                                   unsigned long mask);
 extern void task_clear_jobctl_trapping(struct task_struct *task);
 extern void task_clear_jobctl_pending(struct task_struct *task,
-                                     unsigned int mask);
+                                     unsigned long mask);
 
 static inline void rcu_copy_process(struct task_struct *p)
 {
@@ -2962,11 +3033,6 @@ static __always_inline bool need_resched(void)
 void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times);
 void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times);
 
-static inline void thread_group_cputime_init(struct signal_struct *sig)
-{
-       raw_spin_lock_init(&sig->cputimer.lock);
-}
-
 /*
  * Reevaluate whether the task has signals pending delivery.
  * Wake the task if so.
@@ -3080,13 +3146,13 @@ static inline void mm_update_next_owner(struct mm_struct *mm)
 static inline unsigned long task_rlimit(const struct task_struct *tsk,
                unsigned int limit)
 {
-       return ACCESS_ONCE(tsk->signal->rlim[limit].rlim_cur);
+       return READ_ONCE(tsk->signal->rlim[limit].rlim_cur);
 }
 
 static inline unsigned long task_rlimit_max(const struct task_struct *tsk,
                unsigned int limit)
 {
-       return ACCESS_ONCE(tsk->signal->rlim[limit].rlim_max);
+       return READ_ONCE(tsk->signal->rlim[limit].rlim_max);
 }
 
 static inline unsigned long rlimit(unsigned int limit)
index 6341f5b..a30b172 100644 (file)
@@ -18,7 +18,7 @@ static inline int rt_task(struct task_struct *p)
 #ifdef CONFIG_RT_MUTEXES
 extern int rt_mutex_getprio(struct task_struct *p);
 extern void rt_mutex_setprio(struct task_struct *p, int prio);
-extern int rt_mutex_check_prio(struct task_struct *task, int newprio);
+extern int rt_mutex_get_effective_prio(struct task_struct *task, int newprio);
 extern struct task_struct *rt_mutex_get_top_task(struct task_struct *task);
 extern void rt_mutex_adjust_pi(struct task_struct *p);
 static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
@@ -31,9 +31,10 @@ static inline int rt_mutex_getprio(struct task_struct *p)
        return p->normal_prio;
 }
 
-static inline int rt_mutex_check_prio(struct task_struct *task, int newprio)
+static inline int rt_mutex_get_effective_prio(struct task_struct *task,
+                                             int newprio)
 {
-       return 0;
+       return newprio;
 }
 
 static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *task)
index 18264ea..52febde 100644 (file)
@@ -43,7 +43,6 @@ struct file;
 struct vfsmount;
 struct path;
 struct qstr;
-struct nameidata;
 struct iattr;
 struct fown_struct;
 struct file_operations;
@@ -477,7 +476,8 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  * @inode_follow_link:
  *     Check permission to follow a symbolic link when looking up a pathname.
  *     @dentry contains the dentry structure for the link.
- *     @nd contains the nameidata structure for the parent directory.
+ *     @inode contains the inode, which itself is not stable in RCU-walk
+ *     @rcu indicates whether we are in RCU-walk mode.
  *     Return 0 if permission is granted.
  * @inode_permission:
  *     Check permission before accessing an inode.  This hook is called by the
@@ -1553,7 +1553,8 @@ struct security_operations {
        int (*inode_rename) (struct inode *old_dir, struct dentry *old_dentry,
                             struct inode *new_dir, struct dentry *new_dentry);
        int (*inode_readlink) (struct dentry *dentry);
-       int (*inode_follow_link) (struct dentry *dentry, struct nameidata *nd);
+       int (*inode_follow_link) (struct dentry *dentry, struct inode *inode,
+                                 bool rcu);
        int (*inode_permission) (struct inode *inode, int mask);
        int (*inode_setattr)    (struct dentry *dentry, struct iattr *attr);
        int (*inode_getattr) (const struct path *path);
@@ -1839,7 +1840,8 @@ int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry,
                          struct inode *new_dir, struct dentry *new_dentry,
                          unsigned int flags);
 int security_inode_readlink(struct dentry *dentry);
-int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd);
+int security_inode_follow_link(struct dentry *dentry, struct inode *inode,
+                              bool rcu);
 int security_inode_permission(struct inode *inode, int mask);
 int security_inode_setattr(struct dentry *dentry, struct iattr *attr);
 int security_inode_getattr(const struct path *path);
@@ -2242,7 +2244,8 @@ static inline int security_inode_readlink(struct dentry *dentry)
 }
 
 static inline int security_inode_follow_link(struct dentry *dentry,
-                                             struct nameidata *nd)
+                                            struct inode *inode,
+                                            bool rcu)
 {
        return 0;
 }
index 66e374d..f15154a 100644 (file)
@@ -176,6 +176,7 @@ struct nf_bridge_info {
        struct net_device       *physindev;
        struct net_device       *physoutdev;
        char                    neigh_header[8];
+       __be32                  ipv4_daddr;
 };
 #endif
 
index 3e18379..0063b24 100644 (file)
@@ -120,7 +120,7 @@ do {                                                                \
 /*
  * Despite its name it doesn't necessarily has to be a full barrier.
  * It should only guarantee that a STORE before the critical section
- * can not be reordered with a LOAD inside this section.
+ * can not be reordered with LOADs and STOREs inside this section.
  * spin_lock() is the one-way barrier, this LOAD can not escape out
  * of the region. So the default implementation simply ensures that
  * a STORE can not move into the critical section, smp_wmb() should
index 0caa3a2..e8bbf40 100644 (file)
@@ -145,11 +145,21 @@ struct tcp_sock {
  *     read the code and the spec side by side (and laugh ...)
  *     See RFC793 and RFC1122. The RFC writes these in capitals.
  */
+       u64     bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived
+                                * sum(delta(rcv_nxt)), or how many bytes
+                                * were acked.
+                                */
        u32     rcv_nxt;        /* What we want to receive next         */
        u32     copied_seq;     /* Head of yet unread data              */
        u32     rcv_wup;        /* rcv_nxt on last window update sent   */
        u32     snd_nxt;        /* Next sequence we send                */
 
+       u64     bytes_acked;    /* RFC4898 tcpEStatsAppHCThruOctetsAcked
+                                * sum(delta(snd_una)), or how many bytes
+                                * were acked.
+                                */
+       struct u64_stats_sync syncp; /* protects 64bit vars (cf tcp_get_info()) */
+
        u32     snd_una;        /* First byte we want an ack for        */
        u32     snd_sml;        /* Last byte of the most recently transmitted small packet */
        u32     rcv_tstamp;     /* timestamp of last received ACK (for keepalives) */
index 909b6e4..73ddad1 100644 (file)
@@ -191,8 +191,8 @@ static inline int cpu_to_mem(int cpu)
 #ifndef topology_core_id
 #define topology_core_id(cpu)                  ((void)(cpu), 0)
 #endif
-#ifndef topology_thread_cpumask
-#define topology_thread_cpumask(cpu)           cpumask_of(cpu)
+#ifndef topology_sibling_cpumask
+#define topology_sibling_cpumask(cpu)          cpumask_of(cpu)
 #endif
 #ifndef topology_core_cpumask
 #define topology_core_cpumask(cpu)             cpumask_of(cpu)
@@ -201,7 +201,7 @@ static inline int cpu_to_mem(int cpu)
 #ifdef CONFIG_SCHED_SMT
 static inline const struct cpumask *cpu_smt_mask(int cpu)
 {
-       return topology_thread_cpumask(cpu);
+       return topology_sibling_cpumask(cpu);
 }
 #endif
 
index fe5623c..d76631f 100644 (file)
@@ -339,6 +339,7 @@ struct tty_file_private {
 #define TTY_EXCLUSIVE          3       /* Exclusive open mode */
 #define TTY_DEBUG              4       /* Debugging */
 #define TTY_DO_WRITE_WAKEUP    5       /* Call write_wakeup after queuing new */
+#define TTY_OTHER_DONE         6       /* Closed pty has completed input processing */
 #define TTY_LDISC_OPEN         11      /* Line discipline is open */
 #define TTY_PTY_LOCK           16      /* pty private */
 #define TTY_NO_WRITE_SPLIT     17      /* Preserve write boundaries to driver */
@@ -462,7 +463,6 @@ extern int tty_hung_up_p(struct file *filp);
 extern void do_SAK(struct tty_struct *tty);
 extern void __do_SAK(struct tty_struct *tty);
 extern void no_tty(void);
-extern void tty_flush_to_ldisc(struct tty_struct *tty);
 extern void tty_buffer_free_all(struct tty_port *port);
 extern void tty_buffer_flush(struct tty_struct *tty, struct tty_ldisc *ld);
 extern void tty_buffer_init(struct tty_port *port);
index ecd3319..ae572c1 100644 (file)
@@ -1,21 +1,30 @@
 #ifndef __LINUX_UACCESS_H__
 #define __LINUX_UACCESS_H__
 
-#include <linux/preempt.h>
+#include <linux/sched.h>
 #include <asm/uaccess.h>
 
+static __always_inline void pagefault_disabled_inc(void)
+{
+       current->pagefault_disabled++;
+}
+
+static __always_inline void pagefault_disabled_dec(void)
+{
+       current->pagefault_disabled--;
+       WARN_ON(current->pagefault_disabled < 0);
+}
+
 /*
- * These routines enable/disable the pagefault handler in that
- * it will not take any locks and go straight to the fixup table.
+ * These routines enable/disable the pagefault handler. If disabled, it will
+ * not take any locks and go straight to the fixup table.
  *
- * They have great resemblance to the preempt_disable/enable calls
- * and in fact they are identical; this is because currently there is
- * no other way to make the pagefault handlers do this. So we do
- * disable preemption but we don't necessarily care about that.
+ * User access methods will not sleep when called from a pagefault_disabled()
+ * environment.
  */
 static inline void pagefault_disable(void)
 {
-       preempt_count_inc();
+       pagefault_disabled_inc();
        /*
         * make sure to have issued the store before a pagefault
         * can hit.
@@ -25,18 +34,31 @@ static inline void pagefault_disable(void)
 
 static inline void pagefault_enable(void)
 {
-#ifndef CONFIG_PREEMPT
        /*
         * make sure to issue those last loads/stores before enabling
         * the pagefault handler again.
         */
        barrier();
-       preempt_count_dec();
-#else
-       preempt_enable();
-#endif
+       pagefault_disabled_dec();
 }
 
+/*
+ * Is the pagefault handler disabled? If so, user access methods will not sleep.
+ */
+#define pagefault_disabled() (current->pagefault_disabled != 0)
+
+/*
+ * The pagefault handler is in general disabled by pagefault_disable() or
+ * when in irq context (via in_atomic()).
+ *
+ * This function should only be used by the fault handlers. Other users should
+ * stick to pagefault_disabled().
+ * Please NEVER use preempt_disable() to disable the fault handler. With
+ * !CONFIG_PREEMPT_COUNT, this is like a NOP. So the handler won't be disabled.
+ * in_atomic() will report different values based on !CONFIG_PREEMPT_COUNT.
+ */
+#define faulthandler_disabled() (pagefault_disabled() || in_atomic())
+
 #ifndef ARCH_HAS_NOCACHE_UACCESS
 
 static inline unsigned long __copy_from_user_inatomic_nocache(void *to,
index 0ee05da..0383552 100644 (file)
@@ -109,12 +109,12 @@ static inline bool gid_lte(kgid_t left, kgid_t right)
 
 static inline bool uid_valid(kuid_t uid)
 {
-       return !uid_eq(uid, INVALID_UID);
+       return __kuid_val(uid) != (uid_t) -1;
 }
 
 static inline bool gid_valid(kgid_t gid)
 {
-       return !gid_eq(gid, INVALID_GID);
+       return __kgid_val(gid) != (gid_t) -1;
 }
 
 #ifdef CONFIG_USER_NS
index 2db8334..d69ac4e 100644 (file)
@@ -969,7 +969,7 @@ extern int bit_wait_io_timeout(struct wait_bit_key *);
  * on that signal.
  */
 static inline int
-wait_on_bit(void *word, int bit, unsigned mode)
+wait_on_bit(unsigned long *word, int bit, unsigned mode)
 {
        might_sleep();
        if (!test_bit(bit, word))
@@ -994,7 +994,7 @@ wait_on_bit(void *word, int bit, unsigned mode)
  * on that signal.
  */
 static inline int
-wait_on_bit_io(void *word, int bit, unsigned mode)
+wait_on_bit_io(unsigned long *word, int bit, unsigned mode)
 {
        might_sleep();
        if (!test_bit(bit, word))
@@ -1020,7 +1020,8 @@ wait_on_bit_io(void *word, int bit, unsigned mode)
  * received a signal and the mode permitted wakeup on that signal.
  */
 static inline int
-wait_on_bit_timeout(void *word, int bit, unsigned mode, unsigned long timeout)
+wait_on_bit_timeout(unsigned long *word, int bit, unsigned mode,
+                   unsigned long timeout)
 {
        might_sleep();
        if (!test_bit(bit, word))
@@ -1047,7 +1048,8 @@ wait_on_bit_timeout(void *word, int bit, unsigned mode, unsigned long timeout)
  * on that signal.
  */
 static inline int
-wait_on_bit_action(void *word, int bit, wait_bit_action_f *action, unsigned mode)
+wait_on_bit_action(unsigned long *word, int bit, wait_bit_action_f *action,
+                  unsigned mode)
 {
        might_sleep();
        if (!test_bit(bit, word))
@@ -1075,7 +1077,7 @@ wait_on_bit_action(void *word, int bit, wait_bit_action_f *action, unsigned mode
  * the @mode allows that signal to wake the process.
  */
 static inline int
-wait_on_bit_lock(void *word, int bit, unsigned mode)
+wait_on_bit_lock(unsigned long *word, int bit, unsigned mode)
 {
        might_sleep();
        if (!test_and_set_bit(bit, word))
@@ -1099,7 +1101,7 @@ wait_on_bit_lock(void *word, int bit, unsigned mode)
  * the @mode allows that signal to wake the process.
  */
 static inline int
-wait_on_bit_lock_io(void *word, int bit, unsigned mode)
+wait_on_bit_lock_io(unsigned long *word, int bit, unsigned mode)
 {
        might_sleep();
        if (!test_and_set_bit(bit, word))
@@ -1125,7 +1127,8 @@ wait_on_bit_lock_io(void *word, int bit, unsigned mode)
  * the @mode allows that signal to wake the process.
  */
 static inline int
-wait_on_bit_lock_action(void *word, int bit, wait_bit_action_f *action, unsigned mode)
+wait_on_bit_lock_action(unsigned long *word, int bit, wait_bit_action_f *action,
+                       unsigned mode)
 {
        might_sleep();
        if (!test_and_set_bit(bit, word))
index eeda676..6ea16c8 100644 (file)
@@ -30,11 +30,13 @@ struct wpan_phy_cca;
 struct cfg802154_ops {
        struct net_device * (*add_virtual_intf_deprecated)(struct wpan_phy *wpan_phy,
                                                           const char *name,
+                                                          unsigned char name_assign_type,
                                                           int type);
        void    (*del_virtual_intf_deprecated)(struct wpan_phy *wpan_phy,
                                               struct net_device *dev);
        int     (*add_virtual_intf)(struct wpan_phy *wpan_phy,
                                    const char *name,
+                                   unsigned char name_assign_type,
                                    enum nl802154_iftype type,
                                    __le64 extended_addr);
        int     (*del_virtual_intf)(struct wpan_phy *wpan_phy,
index aeee280..1e18005 100644 (file)
@@ -120,11 +120,13 @@ static inline u32 codel_time_to_us(codel_time_t val)
  * struct codel_params - contains codel parameters
  * @target:    target queue size (in time units)
  * @interval:  width of moving time window
+ * @mtu:       device mtu, or minimal queue backlog in bytes.
  * @ecn:       is Explicit Congestion Notification enabled
  */
 struct codel_params {
        codel_time_t    target;
        codel_time_t    interval;
+       u32             mtu;
        bool            ecn;
 };
 
@@ -166,10 +168,12 @@ struct codel_stats {
        u32             ecn_mark;
 };
 
-static void codel_params_init(struct codel_params *params)
+static void codel_params_init(struct codel_params *params,
+                             const struct Qdisc *sch)
 {
        params->interval = MS2TIME(100);
        params->target = MS2TIME(5);
+       params->mtu = psched_mtu(qdisc_dev(sch));
        params->ecn = false;
 }
 
@@ -180,7 +184,7 @@ static void codel_vars_init(struct codel_vars *vars)
 
 static void codel_stats_init(struct codel_stats *stats)
 {
-       stats->maxpacket = 256;
+       stats->maxpacket = 0;
 }
 
 /*
@@ -234,7 +238,7 @@ static bool codel_should_drop(const struct sk_buff *skb,
                stats->maxpacket = qdisc_pkt_len(skb);
 
        if (codel_time_before(vars->ldelay, params->target) ||
-           sch->qstats.backlog <= stats->maxpacket) {
+           sch->qstats.backlog <= params->mtu) {
                /* went below - stay below for at least interval */
                vars->first_above_time = 0;
                return false;
index 48a8158..0320bbb 100644 (file)
@@ -98,7 +98,8 @@ struct inet_connection_sock {
        const struct tcp_congestion_ops *icsk_ca_ops;
        const struct inet_connection_sock_af_ops *icsk_af_ops;
        unsigned int              (*icsk_sync_mss)(struct sock *sk, u32 pmtu);
-       __u8                      icsk_ca_state:7,
+       __u8                      icsk_ca_state:6,
+                                 icsk_ca_setsockopt:1,
                                  icsk_ca_dst_locked:1;
        __u8                      icsk_retransmits;
        __u8                      icsk_pending;
@@ -129,9 +130,10 @@ struct inet_connection_sock {
 
                u32               probe_timestamp;
        } icsk_mtup;
-       u32                       icsk_ca_priv[16];
        u32                       icsk_user_timeout;
-#define ICSK_CA_PRIV_SIZE      (16 * sizeof(u32))
+
+       u64                       icsk_ca_priv[64 / sizeof(u64)];
+#define ICSK_CA_PRIV_SIZE      (8 * sizeof(u64))
 };
 
 #define ICSK_TIME_RETRANS      1       /* Retransmit timer */
index b4bef11..fc57f6b 100644 (file)
@@ -354,7 +354,7 @@ enum ieee80211_rssi_event_data {
 };
 
 /**
- * enum ieee80211_rssi_event - data attached to an %RSSI_EVENT
+ * struct ieee80211_rssi_event - data attached to an %RSSI_EVENT
  * @data: See &enum ieee80211_rssi_event_data
  */
 struct ieee80211_rssi_event {
@@ -388,7 +388,7 @@ enum ieee80211_mlme_event_status {
 };
 
 /**
- * enum ieee80211_mlme_event - data attached to an %MLME_EVENT
+ * struct ieee80211_mlme_event - data attached to an %MLME_EVENT
  * @data: See &enum ieee80211_mlme_event_data
  * @status: See &enum ieee80211_mlme_event_status
  * @reason: the reason code if applicable
@@ -401,9 +401,10 @@ struct ieee80211_mlme_event {
 
 /**
  * struct ieee80211_event - event to be sent to the driver
- * @type The event itself. See &enum ieee80211_event_type.
+ * @type: The event itself. See &enum ieee80211_event_type.
  * @rssi: relevant if &type is %RSSI_EVENT
  * @mlme: relevant if &type is %AUTH_EVENT
+ * @u:    union holding the above two fields
  */
 struct ieee80211_event {
        enum ieee80211_event_type type;
@@ -1666,6 +1667,8 @@ struct ieee80211_tx_control {
  * @sta: station table entry, %NULL for per-vif queue
  * @tid: the TID for this queue (unused for per-vif queue)
  * @ac: the AC for this queue
+ * @drv_priv: data area for driver use, will always be aligned to
+ *     sizeof(void *).
  *
  * The driver can obtain packets from this queue by calling
  * ieee80211_tx_dequeue().
index e18e7fd..7df28a4 100644 (file)
@@ -247,19 +247,109 @@ static inline void ieee802154_le64_to_be64(void *be64_dst, const void *le64_src)
        __put_unaligned_memmove64(swab64p(le64_src), be64_dst);
 }
 
-/* Basic interface to register ieee802154 device */
+/**
+ * ieee802154_alloc_hw - Allocate a new hardware device
+ *
+ * This must be called once for each hardware device. The returned pointer
+ * must be used to refer to this device when calling other functions.
+ * mac802154 allocates a private data area for the driver pointed to by
+ * @priv in &struct ieee802154_hw, the size of this area is given as
+ * @priv_data_len.
+ *
+ * @priv_data_len: length of private data
+ * @ops: callbacks for this device
+ *
+ * Return: A pointer to the new hardware device, or %NULL on error.
+ */
 struct ieee802154_hw *
 ieee802154_alloc_hw(size_t priv_data_len, const struct ieee802154_ops *ops);
+
+/**
+ * ieee802154_free_hw - free hardware descriptor
+ *
+ * This function frees everything that was allocated, including the
+ * private data for the driver. You must call ieee802154_unregister_hw()
+ * before calling this function.
+ *
+ * @hw: the hardware to free
+ */
 void ieee802154_free_hw(struct ieee802154_hw *hw);
+
+/**
+ * ieee802154_register_hw - Register hardware device
+ *
+ * You must call this function before any other functions in
+ * mac802154. Note that before a hardware can be registered, you
+ * need to fill the contained wpan_phy's information.
+ *
+ * @hw: the device to register as returned by ieee802154_alloc_hw()
+ *
+ * Return: 0 on success. An error code otherwise.
+ */
 int ieee802154_register_hw(struct ieee802154_hw *hw);
+
+/**
+ * ieee802154_unregister_hw - Unregister a hardware device
+ *
+ * This function instructs mac802154 to free allocated resources
+ * and unregister netdevices from the networking subsystem.
+ *
+ * @hw: the hardware to unregister
+ */
 void ieee802154_unregister_hw(struct ieee802154_hw *hw);
 
+/**
+ * ieee802154_rx - receive frame
+ *
+ * Use this function to hand received frames to mac802154. The receive
+ * buffer in @skb must start with an IEEE 802.15.4 header. In case of a
+ * paged @skb is used, the driver is recommended to put the ieee802154
+ * header of the frame on the linear part of the @skb to avoid memory
+ * allocation and/or memcpy by the stack.
+ *
+ * This function may not be called in IRQ context. Calls to this function
+ * for a single hardware must be synchronized against each other.
+ *
+ * @hw: the hardware this frame came in on
+ * @skb: the buffer to receive, owned by mac802154 after this call
+ */
 void ieee802154_rx(struct ieee802154_hw *hw, struct sk_buff *skb);
+
+/**
+ * ieee802154_rx_irqsafe - receive frame
+ *
+ * Like ieee802154_rx() but can be called in IRQ context
+ * (internally defers to a tasklet.)
+ *
+ * @hw: the hardware this frame came in on
+ * @skb: the buffer to receive, owned by mac802154 after this call
+ * @lqi: link quality indicator
+ */
 void ieee802154_rx_irqsafe(struct ieee802154_hw *hw, struct sk_buff *skb,
                           u8 lqi);
-
+/**
+ * ieee802154_wake_queue - wake ieee802154 queue
+ * @hw: pointer as obtained from ieee802154_alloc_hw().
+ *
+ * Drivers should use this function instead of netif_wake_queue.
+ */
 void ieee802154_wake_queue(struct ieee802154_hw *hw);
+
+/**
+ * ieee802154_stop_queue - stop ieee802154 queue
+ * @hw: pointer as obtained from ieee802154_alloc_hw().
+ *
+ * Drivers should use this function instead of netif_stop_queue.
+ */
 void ieee802154_stop_queue(struct ieee802154_hw *hw);
+
+/**
+ * ieee802154_xmit_complete - frame transmission complete
+ *
+ * @hw: pointer as obtained from ieee802154_alloc_hw().
+ * @skb: buffer for transmission
+ * @ifs_handling: indicate interframe space handling
+ */
 void ieee802154_xmit_complete(struct ieee802154_hw *hw, struct sk_buff *skb,
                              bool ifs_handling);
 
index c56a438..ce13cf2 100644 (file)
@@ -574,11 +574,14 @@ static inline void sctp_v6_map_v4(union sctp_addr *addr)
 /* Map v4 address to v4-mapped v6 address */
 static inline void sctp_v4_map_v6(union sctp_addr *addr)
 {
+       __be16 port;
+
+       port = addr->v4.sin_port;
+       addr->v6.sin6_addr.s6_addr32[3] = addr->v4.sin_addr.s_addr;
+       addr->v6.sin6_port = port;
        addr->v6.sin6_family = AF_INET6;
        addr->v6.sin6_flowinfo = 0;
        addr->v6.sin6_scope_id = 0;
-       addr->v6.sin6_port = addr->v4.sin_port;
-       addr->v6.sin6_addr.s6_addr32[3] = addr->v4.sin_addr.s_addr;
        addr->v6.sin6_addr.s6_addr32[0] = 0;
        addr->v6.sin6_addr.s6_addr32[1] = 0;
        addr->v6.sin6_addr.s6_addr32[2] = htonl(0x0000ffff);
index 051dc5c..6d204f3 100644 (file)
@@ -576,7 +576,7 @@ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize)
 }
 
 /* tcp.c */
-void tcp_get_info(const struct sock *, struct tcp_info *);
+void tcp_get_info(struct sock *, struct tcp_info *);
 
 /* Read 'sendfile()'-style from a TCP socket */
 typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *,
@@ -804,6 +804,8 @@ enum tcp_ca_ack_event_flags {
 /* Requires ECN/ECT set on all packets */
 #define TCP_CONG_NEEDS_ECN     0x2
 
+union tcp_cc_info;
+
 struct tcp_congestion_ops {
        struct list_head        list;
        u32 key;
@@ -829,7 +831,8 @@ struct tcp_congestion_ops {
        /* hook for packet ack accounting (optional) */
        void (*pkts_acked)(struct sock *sk, u32 num_acked, s32 rtt_us);
        /* get info for inet_diag (optional) */
-       int (*get_info)(struct sock *sk, u32 ext, struct sk_buff *skb);
+       size_t (*get_info)(struct sock *sk, u32 ext, int *attr,
+                          union tcp_cc_info *info);
 
        char            name[TCP_CA_NAME_MAX];
        struct module   *owner;
index ce55906..ac54c27 100644 (file)
@@ -160,7 +160,7 @@ static inline int rdma_ip2gid(struct sockaddr *addr, union ib_gid *gid)
 }
 
 /* Important - sockaddr should be a union of sockaddr_in and sockaddr_in6 */
-static inline int rdma_gid2ip(struct sockaddr *out, union ib_gid *gid)
+static inline void rdma_gid2ip(struct sockaddr *out, union ib_gid *gid)
 {
        if (ipv6_addr_v4mapped((struct in6_addr *)gid)) {
                struct sockaddr_in *out_in = (struct sockaddr_in *)out;
@@ -173,7 +173,6 @@ static inline int rdma_gid2ip(struct sockaddr *out, union ib_gid *gid)
                out_in->sin6_family = AF_INET6;
                memcpy(&out_in->sin6_addr.s6_addr, gid->raw, 16);
        }
-       return 0;
 }
 
 static inline void iboe_addr_get_sgid(struct rdma_dev_addr *dev_addr,
index 0e3ff30..39ed2d2 100644 (file)
@@ -105,7 +105,8 @@ enum ib_cm_data_size {
        IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE = 216,
        IB_CM_SIDR_REP_PRIVATE_DATA_SIZE = 136,
        IB_CM_SIDR_REP_INFO_LENGTH       = 72,
-       IB_CM_COMPARE_SIZE               = 64
+       /* compare done u32 at a time */
+       IB_CM_COMPARE_SIZE               = (64 / sizeof(u32))
 };
 
 struct ib_cm_id;
@@ -337,8 +338,8 @@ void ib_destroy_cm_id(struct ib_cm_id *cm_id);
 #define IB_SDP_SERVICE_ID_MASK cpu_to_be64(0xFFFFFFFFFFFF0000ULL)
 
 struct ib_cm_compare_data {
-       u8  data[IB_CM_COMPARE_SIZE];
-       u8  mask[IB_CM_COMPARE_SIZE];
+       u32  data[IB_CM_COMPARE_SIZE];
+       u32  mask[IB_CM_COMPARE_SIZE];
 };
 
 /**
index 928b277..fda3167 100644 (file)
@@ -148,6 +148,16 @@ int iwpm_add_mapping_cb(struct sk_buff *, struct netlink_callback *);
 int iwpm_add_and_query_mapping_cb(struct sk_buff *, struct netlink_callback *);
 
 /**
+ * iwpm_remote_info_cb - Process remote connecting peer address info, which
+ *                       the port mapper has received from the connecting peer
+ *
+ * @cb: Contains the received message (payload and netlink header)
+ *
+ * Stores the IPv4/IPv6 address info in a hash table
+ */
+int iwpm_remote_info_cb(struct sk_buff *, struct netlink_callback *);
+
+/**
  * iwpm_mapping_error_cb - Process port mapper notification for error
  *
  * @skb:
@@ -175,6 +185,21 @@ int iwpm_mapping_info_cb(struct sk_buff *, struct netlink_callback *);
 int iwpm_ack_mapping_info_cb(struct sk_buff *, struct netlink_callback *);
 
 /**
+ * iwpm_get_remote_info - Get the remote connecting peer address info
+ *
+ * @mapped_loc_addr: Mapped local address of the listening peer
+ * @mapped_rem_addr: Mapped remote address of the connecting peer
+ * @remote_addr: To store the remote address of the connecting peer
+ * @nl_client: The index of the netlink client
+ *
+ * The remote address info is retrieved and provided to the client in
+ * the remote_addr. After that it is removed from the hash table
+ */
+int iwpm_get_remote_info(struct sockaddr_storage *mapped_loc_addr,
+                       struct sockaddr_storage *mapped_rem_addr,
+                       struct sockaddr_storage *remote_addr, u8 nl_client);
+
+/**
  * iwpm_create_mapinfo - Store local and mapped IPv4/IPv6 address
  *                       info in a hash table
  * @local_addr: Local ip/tcp address
index 53a18b3..df70590 100644 (file)
@@ -9,6 +9,8 @@
 #include <sound/core.h>
 #include <sound/hdaudio.h>
 
+#define AC_AMP_FAKE_MUTE       0x10    /* fake mute bit set to amp verbs */
+
 int snd_hdac_regmap_init(struct hdac_device *codec);
 void snd_hdac_regmap_exit(struct hdac_device *codec);
 int snd_hdac_regmap_add_vendor_verb(struct hdac_device *codec,
index d61be72..5f12257 100644 (file)
@@ -1,9 +1,7 @@
 #ifndef TARGET_CORE_BACKEND_H
 #define TARGET_CORE_BACKEND_H
 
-#define TRANSPORT_PLUGIN_PHBA_PDEV             1
-#define TRANSPORT_PLUGIN_VHBA_PDEV             2
-#define TRANSPORT_PLUGIN_VHBA_VDEV             3
+#define TRANSPORT_FLAG_PASSTHROUGH             1
 
 struct target_backend_cits {
        struct config_item_type tb_dev_cit;
@@ -22,7 +20,7 @@ struct se_subsystem_api {
        char inquiry_rev[4];
        struct module *owner;
 
-       u8 transport_type;
+       u8 transport_flags;
 
        int (*attach_hba)(struct se_hba *, u32);
        void (*detach_hba)(struct se_hba *);
@@ -138,5 +136,7 @@ int se_dev_set_queue_depth(struct se_device *, u32);
 int    se_dev_set_max_sectors(struct se_device *, u32);
 int    se_dev_set_optimal_sectors(struct se_device *, u32);
 int    se_dev_set_block_size(struct se_device *, u32);
+sense_reason_t passthrough_parse_cdb(struct se_cmd *cmd,
+       sense_reason_t (*exec_cmd)(struct se_cmd *cmd));
 
 #endif /* TARGET_CORE_BACKEND_H */
index 25bb04c..b99c011 100644 (file)
@@ -40,8 +40,6 @@ struct target_fabric_configfs {
        struct config_item      *tf_fabric;
        /* Passed from fabric modules */
        struct config_item_type *tf_fabric_cit;
-       /* Pointer to target core subsystem */
-       struct configfs_subsystem *tf_subsys;
        /* Pointer to fabric's struct module */
        struct module *tf_module;
        struct target_core_fabric_ops tf_ops;
index 17c7f5a..0f4dc37 100644 (file)
@@ -4,7 +4,6 @@
 struct target_core_fabric_ops {
        struct module *module;
        const char *name;
-       struct configfs_subsystem *tf_subsys;
        char *(*get_fabric_name)(void);
        u8 (*get_fabric_proto_ident)(struct se_portal_group *);
        char *(*tpg_get_wwn)(struct se_portal_group *);
@@ -109,6 +108,9 @@ struct target_core_fabric_ops {
 int target_register_template(const struct target_core_fabric_ops *fo);
 void target_unregister_template(const struct target_core_fabric_ops *fo);
 
+int target_depend_item(struct config_item *item);
+void target_undepend_item(struct config_item *item);
+
 struct se_session *transport_init_session(enum target_prot_op);
 int transport_alloc_session_tags(struct se_session *, unsigned int,
                unsigned int);
index 81ea598..f7554fd 100644 (file)
@@ -140,19 +140,42 @@ DEFINE_EVENT(kmem_free, kfree,
        TP_ARGS(call_site, ptr)
 );
 
-DEFINE_EVENT(kmem_free, kmem_cache_free,
+DEFINE_EVENT_CONDITION(kmem_free, kmem_cache_free,
 
        TP_PROTO(unsigned long call_site, const void *ptr),
 
-       TP_ARGS(call_site, ptr)
+       TP_ARGS(call_site, ptr),
+
+       /*
+        * This trace can be potentially called from an offlined cpu.
+        * Since trace points use RCU and RCU should not be used from
+        * offline cpus, filter such calls out.
+        * While this trace can be called from a preemptable section,
+        * it has no impact on the condition since tasks can migrate
+        * only from online cpus to other online cpus. Thus its safe
+        * to use raw_smp_processor_id.
+        */
+       TP_CONDITION(cpu_online(raw_smp_processor_id()))
 );
 
-TRACE_EVENT(mm_page_free,
+TRACE_EVENT_CONDITION(mm_page_free,
 
        TP_PROTO(struct page *page, unsigned int order),
 
        TP_ARGS(page, order),
 
+
+       /*
+        * This trace can be potentially called from an offlined cpu.
+        * Since trace points use RCU and RCU should not be used from
+        * offline cpus, filter such calls out.
+        * While this trace can be called from a preemptable section,
+        * it has no impact on the condition since tasks can migrate
+        * only from online cpus to other online cpus. Thus its safe
+        * to use raw_smp_processor_id.
+        */
+       TP_CONDITION(cpu_online(raw_smp_processor_id())),
+
        TP_STRUCT__entry(
                __field(        unsigned long,  pfn             )
                __field(        unsigned int,   order           )
@@ -253,12 +276,35 @@ DEFINE_EVENT(mm_page, mm_page_alloc_zone_locked,
        TP_ARGS(page, order, migratetype)
 );
 
-DEFINE_EVENT_PRINT(mm_page, mm_page_pcpu_drain,
+TRACE_EVENT_CONDITION(mm_page_pcpu_drain,
 
        TP_PROTO(struct page *page, unsigned int order, int migratetype),
 
        TP_ARGS(page, order, migratetype),
 
+       /*
+        * This trace can be potentially called from an offlined cpu.
+        * Since trace points use RCU and RCU should not be used from
+        * offline cpus, filter such calls out.
+        * While this trace can be called from a preemptable section,
+        * it has no impact on the condition since tasks can migrate
+        * only from online cpus to other online cpus. Thus its safe
+        * to use raw_smp_processor_id.
+        */
+       TP_CONDITION(cpu_online(raw_smp_processor_id())),
+
+       TP_STRUCT__entry(
+               __field(        unsigned long,  pfn             )
+               __field(        unsigned int,   order           )
+               __field(        int,            migratetype     )
+       ),
+
+       TP_fast_assign(
+               __entry->pfn            = page ? page_to_pfn(page) : -1UL;
+               __entry->order          = order;
+               __entry->migratetype    = migratetype;
+       ),
+
        TP_printk("page=%p pfn=%lu order=%d migratetype=%d",
                pfn_to_page(__entry->pfn), __entry->pfn,
                __entry->order, __entry->migratetype)
index 30fedaf..d57a575 100644 (file)
@@ -147,7 +147,8 @@ TRACE_EVENT(sched_switch,
                  __print_flags(__entry->prev_state & (TASK_STATE_MAX-1), "|",
                                { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" },
                                { 16, "Z" }, { 32, "X" }, { 64, "x" },
-                               { 128, "K" }, { 256, "W" }, { 512, "P" }) : "R",
+                               { 128, "K" }, { 256, "W" }, { 512, "P" },
+                               { 1024, "N" }) : "R",
                __entry->prev_state & TASK_STATE_MAX ? "+" : "",
                __entry->next_comm, __entry->next_pid, __entry->next_prio)
 );
index 880dd74..c178d13 100644 (file)
@@ -250,7 +250,6 @@ DEFINE_EVENT(writeback_class, name, \
 DEFINE_WRITEBACK_EVENT(writeback_nowork);
 DEFINE_WRITEBACK_EVENT(writeback_wake_background);
 DEFINE_WRITEBACK_EVENT(writeback_bdi_register);
-DEFINE_WRITEBACK_EVENT(writeback_bdi_unregister);
 
 DECLARE_EVENT_CLASS(wbc_class,
        TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi),
index 871e73f..94d44ab 100644 (file)
@@ -1038,6 +1038,7 @@ struct drm_radeon_cs {
 #define RADEON_INFO_CURRENT_GPU_SCLK   0x22
 #define RADEON_INFO_CURRENT_GPU_MCLK   0x23
 #define RADEON_INFO_READ_REG           0x24
+#define RADEON_INFO_VA_UNMAP_WORKING   0x25
 
 struct drm_radeon_info {
        uint32_t                request;
index d65c0a0..c7093c7 100644 (file)
@@ -143,4 +143,8 @@ struct tcp_dctcp_info {
        __u32   dctcp_ab_tot;
 };
 
+union tcp_cc_info {
+       struct tcpvegas_info    vegas;
+       struct tcp_dctcp_info   dctcp;
+};
 #endif /* _UAPI_INET_DIAG_H_ */
index bc9abfe..139d4dd 100644 (file)
@@ -31,4 +31,14 @@ struct mpls_label {
 #define MPLS_LS_TTL_MASK        0x000000FF
 #define MPLS_LS_TTL_SHIFT       0
 
+/* Reserved labels */
+#define MPLS_LABEL_IPV4NULL            0 /* RFC3032 */
+#define MPLS_LABEL_RTALERT             1 /* RFC3032 */
+#define MPLS_LABEL_IPV6NULL            2 /* RFC3032 */
+#define MPLS_LABEL_IMPLNULL            3 /* RFC3032 */
+#define MPLS_LABEL_ENTROPY             7 /* RFC6790 */
+#define MPLS_LABEL_GAL                 13 /* RFC5586 */
+#define MPLS_LABEL_OAMALERT            14 /* RFC3429 */
+#define MPLS_LABEL_EXTENSION           15 /* RFC7274 */
+
 #endif /* _UAPI_MPLS_H */
index 9993a42..ef9f80f 100644 (file)
@@ -42,6 +42,9 @@ enum tcp_conntrack {
 /* The field td_maxack has been set */
 #define IP_CT_TCP_FLAG_MAXACK_SET              0x20
 
+/* Marks possibility for expected RFC5961 challenge ACK */
+#define IP_CT_EXP_CHALLENGE_ACK                0x40
+
 struct nf_ct_tcp_flags {
        __u8 flags;
        __u8 mask;
index 309211b..d97f84c 100644 (file)
@@ -167,6 +167,7 @@ enum perf_branch_sample_type_shift {
        PERF_SAMPLE_BRANCH_COND_SHIFT           = 10, /* conditional branches */
 
        PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT     = 11, /* call/ret stack */
+       PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT       = 12, /* indirect jumps */
 
        PERF_SAMPLE_BRANCH_MAX_SHIFT            /* non-ABI */
 };
@@ -186,6 +187,7 @@ enum perf_branch_sample_type {
        PERF_SAMPLE_BRANCH_COND         = 1U << PERF_SAMPLE_BRANCH_COND_SHIFT,
 
        PERF_SAMPLE_BRANCH_CALL_STACK   = 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT,
+       PERF_SAMPLE_BRANCH_IND_JUMP     = 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT,
 
        PERF_SAMPLE_BRANCH_MAX          = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
 };
@@ -564,6 +566,10 @@ struct perf_event_mmap_page {
 #define PERF_RECORD_MISC_GUEST_USER            (5 << 0)
 
 /*
+ * Indicates that /proc/PID/maps parsing are truncated by time out.
+ */
+#define PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT        (1 << 12)
+/*
  * PERF_RECORD_MISC_MMAP_DATA and PERF_RECORD_MISC_COMM_EXEC are used on
  * different events so can reuse the same bit position.
  */
@@ -800,6 +806,18 @@ enum perf_event_type {
         */
        PERF_RECORD_ITRACE_START                = 12,
 
+       /*
+        * Records the dropped/lost sample number.
+        *
+        * struct {
+        *      struct perf_event_header        header;
+        *
+        *      u64                             lost;
+        *      struct sample_id                sample_id;
+        * };
+        */
+       PERF_RECORD_LOST_SAMPLES                = 13,
+
        PERF_RECORD_MAX,                        /* non-ABI */
 };
 
index 974db03..17fb02f 100644 (file)
@@ -337,7 +337,7 @@ struct rtnexthop {
 #define RTNH_F_DEAD            1       /* Nexthop is dead (used by multipath)  */
 #define RTNH_F_PERVASIVE       2       /* Do recursive gateway lookup  */
 #define RTNH_F_ONLINK          4       /* Gateway is forced on link    */
-#define RTNH_F_EXTERNAL                8       /* Route installed externally   */
+#define RTNH_F_OFFLOAD         8       /* offloaded route */
 
 /* Macros to handle hexthops */
 
index 3b97183..faa72f4 100644 (file)
@@ -112,6 +112,7 @@ enum {
 #define TCP_FASTOPEN           23      /* Enable FastOpen on listeners */
 #define TCP_TIMESTAMP          24
 #define TCP_NOTSENT_LOWAT      25      /* limit number of unsent bytes in write queue */
+#define TCP_CC_INFO            26      /* Get Congestion Control (optional) info */
 
 struct tcp_repair_opt {
        __u32   opt_code;
@@ -189,6 +190,8 @@ struct tcp_info {
 
        __u64   tcpi_pacing_rate;
        __u64   tcpi_max_pacing_rate;
+       __u64   tcpi_bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked */
+       __u64   tcpi_bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived */
 };
 
 /* for TCP_MD5SIG socket option */
index 984169a..d7f1cbc 100644 (file)
@@ -26,6 +26,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE. */
 #include <linux/types.h>
+#include <linux/virtio_types.h>
 #include <linux/virtio_ids.h>
 #include <linux/virtio_config.h>
 
index de69170..6e4bb42 100644 (file)
@@ -37,6 +37,7 @@ enum {
        RDMA_NL_IWPM_ADD_MAPPING,
        RDMA_NL_IWPM_QUERY_MAPPING,
        RDMA_NL_IWPM_REMOVE_MAPPING,
+       RDMA_NL_IWPM_REMOTE_INFO,
        RDMA_NL_IWPM_HANDLE_ERR,
        RDMA_NL_IWPM_MAPINFO,
        RDMA_NL_IWPM_MAPINFO_NUM,
index 5321cd9..7d95fdf 100644 (file)
@@ -17,7 +17,7 @@ int bind_evtchn_to_irqhandler(unsigned int evtchn,
                              irq_handler_t handler,
                              unsigned long irqflags, const char *devname,
                              void *dev_id);
-int bind_virq_to_irq(unsigned int virq, unsigned int cpu);
+int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu);
 int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
                            irq_handler_t handler,
                            unsigned long irqflags, const char *devname,
index 143ca5f..4478f4b 100644 (file)
@@ -191,6 +191,7 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
                      struct gnttab_unmap_grant_ref *kunmap_ops,
                      struct page **pages, unsigned int count);
 void gnttab_unmap_refs_async(struct gntab_unmap_queue_data* item);
+int gnttab_unmap_refs_sync(struct gntab_unmap_queue_data *item);
 
 
 /* Perform a batch of grant map/copy operations. Retry every batch slot
index c643e6a..0ce4f32 100644 (file)
@@ -13,6 +13,7 @@ void xen_arch_post_suspend(int suspend_cancelled);
 
 void xen_timer_resume(void);
 void xen_arch_resume(void);
+void xen_arch_suspend(void);
 
 void xen_resume_notifier_register(struct notifier_block *nb);
 void xen_resume_notifier_unregister(struct notifier_block *nb);
index dc24dec..b999fa3 100644 (file)
@@ -465,13 +465,9 @@ endmenu # "CPU/Task time and stats accounting"
 
 menu "RCU Subsystem"
 
-choice
-       prompt "RCU Implementation"
-       default TREE_RCU
-
 config TREE_RCU
-       bool "Tree-based hierarchical RCU"
-       depends on !PREEMPT && SMP
+       bool
+       default y if !PREEMPT && SMP
        help
          This option selects the RCU implementation that is
          designed for very large SMP system with hundreds or
@@ -479,8 +475,8 @@ config TREE_RCU
          smaller systems.
 
 config PREEMPT_RCU
-       bool "Preemptible tree-based hierarchical RCU"
-       depends on PREEMPT
+       bool
+       default y if PREEMPT
        help
          This option selects the RCU implementation that is
          designed for very large SMP systems with hundreds or
@@ -491,15 +487,28 @@ config PREEMPT_RCU
          Select this option if you are unsure.
 
 config TINY_RCU
-       bool "UP-only small-memory-footprint RCU"
-       depends on !PREEMPT && !SMP
+       bool
+       default y if !PREEMPT && !SMP
        help
          This option selects the RCU implementation that is
          designed for UP systems from which real-time response
          is not required.  This option greatly reduces the
          memory footprint of RCU.
 
-endchoice
+config RCU_EXPERT
+       bool "Make expert-level adjustments to RCU configuration"
+       default n
+       help
+         This option needs to be enabled if you wish to make
+         expert-level adjustments to RCU configuration.  By default,
+         no such adjustments can be made, which has the often-beneficial
+         side-effect of preventing "make oldconfig" from asking you all
+         sorts of detailed questions about how you would like numerous
+         obscure RCU options to be set up.
+
+         Say Y if you need to make expert-level adjustments to RCU.
+
+         Say N if you are unsure.
 
 config SRCU
        bool
@@ -509,7 +518,7 @@ config SRCU
          sections.
 
 config TASKS_RCU
-       bool "Task_based RCU implementation using voluntary context switch"
+       bool
        default n
        select SRCU
        help
@@ -517,8 +526,6 @@ config TASKS_RCU
          only voluntary context switch (not preemption!), idle, and
          user-mode execution as quiescent states.
 
-         If unsure, say N.
-
 config RCU_STALL_COMMON
        def_bool ( TREE_RCU || PREEMPT_RCU || RCU_TRACE )
        help
@@ -531,9 +538,7 @@ config CONTEXT_TRACKING
        bool
 
 config RCU_USER_QS
-       bool "Consider userspace as in RCU extended quiescent state"
-       depends on HAVE_CONTEXT_TRACKING && SMP
-       select CONTEXT_TRACKING
+       bool
        help
          This option sets hooks on kernel / userspace boundaries and
          puts RCU in extended quiescent state when the CPU runs in
@@ -541,12 +546,6 @@ config RCU_USER_QS
          excluded from the global RCU state machine and thus doesn't
          try to keep the timer tick on for RCU.
 
-         Unless you want to hack and help the development of the full
-         dynticks mode, you shouldn't enable this option.  It also
-         adds unnecessary overhead.
-
-         If unsure say N
-
 config CONTEXT_TRACKING_FORCE
        bool "Force context tracking"
        depends on CONTEXT_TRACKING
@@ -578,7 +577,7 @@ config RCU_FANOUT
        int "Tree-based hierarchical RCU fanout value"
        range 2 64 if 64BIT
        range 2 32 if !64BIT
-       depends on TREE_RCU || PREEMPT_RCU
+       depends on (TREE_RCU || PREEMPT_RCU) && RCU_EXPERT
        default 64 if 64BIT
        default 32 if !64BIT
        help
@@ -596,9 +595,9 @@ config RCU_FANOUT
 
 config RCU_FANOUT_LEAF
        int "Tree-based hierarchical RCU leaf-level fanout value"
-       range 2 RCU_FANOUT if 64BIT
-       range 2 RCU_FANOUT if !64BIT
-       depends on TREE_RCU || PREEMPT_RCU
+       range 2 64 if 64BIT
+       range 2 32 if !64BIT
+       depends on (TREE_RCU || PREEMPT_RCU) && RCU_EXPERT
        default 16
        help
          This option controls the leaf-level fanout of hierarchical
@@ -621,23 +620,9 @@ config RCU_FANOUT_LEAF
 
          Take the default if unsure.
 
-config RCU_FANOUT_EXACT
-       bool "Disable tree-based hierarchical RCU auto-balancing"
-       depends on TREE_RCU || PREEMPT_RCU
-       default n
-       help
-         This option forces use of the exact RCU_FANOUT value specified,
-         regardless of imbalances in the hierarchy.  This is useful for
-         testing RCU itself, and might one day be useful on systems with
-         strong NUMA behavior.
-
-         Without RCU_FANOUT_EXACT, the code will balance the hierarchy.
-
-         Say N if unsure.
-
 config RCU_FAST_NO_HZ
        bool "Accelerate last non-dyntick-idle CPU's grace periods"
-       depends on NO_HZ_COMMON && SMP
+       depends on NO_HZ_COMMON && SMP && RCU_EXPERT
        default n
        help
          This option permits CPUs to enter dynticks-idle state even if
@@ -663,7 +648,7 @@ config TREE_RCU_TRACE
 
 config RCU_BOOST
        bool "Enable RCU priority boosting"
-       depends on RT_MUTEXES && PREEMPT_RCU
+       depends on RT_MUTEXES && PREEMPT_RCU && RCU_EXPERT
        default n
        help
          This option boosts the priority of preempted RCU readers that
@@ -680,6 +665,7 @@ config RCU_KTHREAD_PRIO
        range 0 99 if !RCU_BOOST
        default 1 if RCU_BOOST
        default 0 if !RCU_BOOST
+       depends on RCU_EXPERT
        help
          This option specifies the SCHED_FIFO priority value that will be
          assigned to the rcuc/n and rcub/n threads and is also the value
@@ -1637,7 +1623,7 @@ config PERF_EVENTS
 config DEBUG_PERF_USE_VMALLOC
        default n
        bool "Debug: use vmalloc to back perf mmap() buffers"
-       depends on PERF_EVENTS && DEBUG_KERNEL
+       depends on PERF_EVENTS && DEBUG_KERNEL && !PPC
        select PERF_USE_VMALLOC
        help
         Use vmalloc memory to back perf mmap() buffers.
index 8369ffa..a95bbdb 100644 (file)
@@ -225,10 +225,11 @@ dev_t name_to_dev_t(const char *name)
 #endif
 
        if (strncmp(name, "/dev/", 5) != 0) {
-               unsigned maj, min;
+               unsigned maj, min, offset;
                char dummy;
 
-               if (sscanf(name, "%u:%u%c", &maj, &min, &dummy) == 2) {
+               if ((sscanf(name, "%u:%u%c", &maj, &min, &dummy) == 2) ||
+                   (sscanf(name, "%u:%u:%u:%c", &maj, &min, &offset, &dummy) == 3)) {
                        res = MKDEV(maj, min);
                        if (maj != MAJOR(res) || min != MINOR(res))
                                goto fail;
index 3aaea7f..a24ba9f 100644 (file)
@@ -47,8 +47,7 @@
 #define RECV           1
 
 #define STATE_NONE     0
-#define STATE_PENDING  1
-#define STATE_READY    2
+#define STATE_READY    1
 
 struct posix_msg_tree_node {
        struct rb_node          rb_node;
@@ -571,15 +570,12 @@ static int wq_sleep(struct mqueue_inode_info *info, int sr,
        wq_add(info, sr, ewp);
 
        for (;;) {
-               set_current_state(TASK_INTERRUPTIBLE);
+               __set_current_state(TASK_INTERRUPTIBLE);
 
                spin_unlock(&info->lock);
                time = schedule_hrtimeout_range_clock(timeout, 0,
                        HRTIMER_MODE_ABS, CLOCK_REALTIME);
 
-               while (ewp->state == STATE_PENDING)
-                       cpu_relax();
-
                if (ewp->state == STATE_READY) {
                        retval = 0;
                        goto out;
@@ -907,11 +903,15 @@ out_name:
  * list of waiting receivers. A sender checks that list before adding the new
  * message into the message array. If there is a waiting receiver, then it
  * bypasses the message array and directly hands the message over to the
- * receiver.
- * The receiver accepts the message and returns without grabbing the queue
- * spinlock. Therefore an intermediate STATE_PENDING state and memory barriers
- * are necessary. The same algorithm is used for sysv semaphores, see
- * ipc/sem.c for more details.
+ * receiver. The receiver accepts the message and returns without grabbing the
+ * queue spinlock:
+ *
+ * - Set pointer to message.
+ * - Queue the receiver task for later wakeup (without the info->lock).
+ * - Update its state to STATE_READY. Now the receiver can continue.
+ * - Wake up the process after the lock is dropped. Should the process wake up
+ *   before this wakeup (due to a timeout or a signal) it will either see
+ *   STATE_READY and continue or acquire the lock to check the state again.
  *
  * The same algorithm is used for senders.
  */
@@ -919,21 +919,29 @@ out_name:
 /* pipelined_send() - send a message directly to the task waiting in
  * sys_mq_timedreceive() (without inserting message into a queue).
  */
-static inline void pipelined_send(struct mqueue_inode_info *info,
+static inline void pipelined_send(struct wake_q_head *wake_q,
+                                 struct mqueue_inode_info *info,
                                  struct msg_msg *message,
                                  struct ext_wait_queue *receiver)
 {
        receiver->msg = message;
        list_del(&receiver->list);
-       receiver->state = STATE_PENDING;
-       wake_up_process(receiver->task);
-       smp_wmb();
+       wake_q_add(wake_q, receiver->task);
+       /*
+        * Rely on the implicit cmpxchg barrier from wake_q_add such
+        * that we can ensure that updating receiver->state is the last
+        * write operation: As once set, the receiver can continue,
+        * and if we don't have the reference count from the wake_q,
+        * yet, at that point we can later have a use-after-free
+        * condition and bogus wakeup.
+        */
        receiver->state = STATE_READY;
 }
 
 /* pipelined_receive() - if there is task waiting in sys_mq_timedsend()
  * gets its message and put to the queue (we have one free place for sure). */
-static inline void pipelined_receive(struct mqueue_inode_info *info)
+static inline void pipelined_receive(struct wake_q_head *wake_q,
+                                    struct mqueue_inode_info *info)
 {
        struct ext_wait_queue *sender = wq_get_first_waiter(info, SEND);
 
@@ -944,10 +952,9 @@ static inline void pipelined_receive(struct mqueue_inode_info *info)
        }
        if (msg_insert(sender->msg, info))
                return;
+
        list_del(&sender->list);
-       sender->state = STATE_PENDING;
-       wake_up_process(sender->task);
-       smp_wmb();
+       wake_q_add(wake_q, sender->task);
        sender->state = STATE_READY;
 }
 
@@ -965,6 +972,7 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
        struct timespec ts;
        struct posix_msg_tree_node *new_leaf = NULL;
        int ret = 0;
+       WAKE_Q(wake_q);
 
        if (u_abs_timeout) {
                int res = prepare_timeout(u_abs_timeout, &expires, &ts);
@@ -1049,7 +1057,7 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
        } else {
                receiver = wq_get_first_waiter(info, RECV);
                if (receiver) {
-                       pipelined_send(info, msg_ptr, receiver);
+                       pipelined_send(&wake_q, info, msg_ptr, receiver);
                } else {
                        /* adds message to the queue */
                        ret = msg_insert(msg_ptr, info);
@@ -1062,6 +1070,7 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
        }
 out_unlock:
        spin_unlock(&info->lock);
+       wake_up_q(&wake_q);
 out_free:
        if (ret)
                free_msg(msg_ptr);
@@ -1149,14 +1158,17 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr,
                        msg_ptr = wait.msg;
                }
        } else {
+               WAKE_Q(wake_q);
+
                msg_ptr = msg_get(info);
 
                inode->i_atime = inode->i_mtime = inode->i_ctime =
                                CURRENT_TIME;
 
                /* There is now free space in queue. */
-               pipelined_receive(info);
+               pipelined_receive(&wake_q, info);
                spin_unlock(&info->lock);
+               wake_up_q(&wake_q);
                ret = 0;
        }
        if (ret == 0) {
index 08561f1..ebdb004 100644 (file)
@@ -235,9 +235,16 @@ config LOCK_SPIN_ON_OWNER
        def_bool y
        depends on MUTEX_SPIN_ON_OWNER || RWSEM_SPIN_ON_OWNER
 
-config ARCH_USE_QUEUE_RWLOCK
+config ARCH_USE_QUEUED_SPINLOCKS
        bool
 
-config QUEUE_RWLOCK
-       def_bool y if ARCH_USE_QUEUE_RWLOCK
+config QUEUED_SPINLOCKS
+       def_bool y if ARCH_USE_QUEUED_SPINLOCKS
+       depends on SMP
+
+config ARCH_USE_QUEUED_RWLOCKS
+       bool
+
+config QUEUED_RWLOCKS
+       def_bool y if ARCH_USE_QUEUED_RWLOCKS
        depends on SMP
index 24f0061..333d364 100644 (file)
@@ -912,7 +912,8 @@ long compat_get_bitmap(unsigned long *mask, const compat_ulong_t __user *umask,
                         * bitmap. We must however ensure the end of the
                         * kernel bitmap is zeroed.
                         */
-                       if (nr_compat_longs-- > 0) {
+                       if (nr_compat_longs) {
+                               nr_compat_longs--;
                                if (__get_user(um, umask))
                                        return -EFAULT;
                        } else {
@@ -954,7 +955,8 @@ long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask,
                         * We dont want to write past the end of the userspace
                         * bitmap.
                         */
-                       if (nr_compat_longs-- > 0) {
+                       if (nr_compat_longs) {
+                               nr_compat_longs--;
                                if (__put_user(um, umask))
                                        return -EFAULT;
                        }
index 94bbe46..9c9c9fa 100644 (file)
@@ -398,7 +398,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
        err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
        if (err) {
                /* CPU didn't die: tell everyone.  Can't complain. */
-               smpboot_unpark_threads(cpu);
                cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu);
                goto out_release;
        }
@@ -463,6 +462,7 @@ static int smpboot_thread_call(struct notifier_block *nfb,
 
        switch (action & ~CPU_TASKS_FROZEN) {
 
+       case CPU_DOWN_FAILED:
        case CPU_ONLINE:
                smpboot_unpark_threads(cpu);
                break;
@@ -479,7 +479,7 @@ static struct notifier_block smpboot_thread_notifier = {
        .priority = CPU_PRI_SMPBOOT,
 };
 
-void __cpuinit smpboot_thread_init(void)
+void smpboot_thread_init(void)
 {
        register_cpu_notifier(&smpboot_thread_notifier);
 }
index 81aa3a4..f2003b9 100644 (file)
@@ -913,10 +913,30 @@ static void put_ctx(struct perf_event_context *ctx)
  * Those places that change perf_event::ctx will hold both
  * perf_event_ctx::mutex of the 'old' and 'new' ctx value.
  *
- * Lock ordering is by mutex address. There is one other site where
- * perf_event_context::mutex nests and that is put_event(). But remember that
- * that is a parent<->child context relation, and migration does not affect
- * children, therefore these two orderings should not interact.
+ * Lock ordering is by mutex address. There are two other sites where
+ * perf_event_context::mutex nests and those are:
+ *
+ *  - perf_event_exit_task_context()   [ child , 0 ]
+ *      __perf_event_exit_task()
+ *        sync_child_event()
+ *          put_event()                        [ parent, 1 ]
+ *
+ *  - perf_event_init_context()                [ parent, 0 ]
+ *      inherit_task_group()
+ *        inherit_group()
+ *          inherit_event()
+ *            perf_event_alloc()
+ *              perf_init_event()
+ *                perf_try_init_event()        [ child , 1 ]
+ *
+ * While it appears there is an obvious deadlock here -- the parent and child
+ * nesting levels are inverted between the two. This is in fact safe because
+ * life-time rules separate them. That is an exiting task cannot fork, and a
+ * spawning task cannot (yet) exit.
+ *
+ * But remember that that these are parent<->child context relations, and
+ * migration does not affect children, therefore these two orderings should not
+ * interact.
  *
  * The change in perf_event::ctx does not affect children (as claimed above)
  * because the sys_perf_event_open() case will install a new event and break
@@ -3422,7 +3442,6 @@ static void free_event_rcu(struct rcu_head *head)
        if (event->ns)
                put_pid_ns(event->ns);
        perf_event_free_filter(event);
-       perf_event_free_bpf_prog(event);
        kfree(event);
 }
 
@@ -3553,6 +3572,8 @@ static void __free_event(struct perf_event *event)
                        put_callchain_buffers();
        }
 
+       perf_event_free_bpf_prog(event);
+
        if (event->destroy)
                event->destroy(event);
 
@@ -3657,9 +3678,6 @@ static void perf_remove_from_owner(struct perf_event *event)
        }
 }
 
-/*
- * Called when the last reference to the file is gone.
- */
 static void put_event(struct perf_event *event)
 {
        struct perf_event_context *ctx;
@@ -3697,6 +3715,9 @@ int perf_event_release_kernel(struct perf_event *event)
 }
 EXPORT_SYMBOL_GPL(perf_event_release_kernel);
 
+/*
+ * Called when the last reference to the file is gone.
+ */
 static int perf_release(struct inode *inode, struct file *file)
 {
        put_event(file->private_data);
@@ -4310,20 +4331,20 @@ static void ring_buffer_attach(struct perf_event *event,
                WARN_ON_ONCE(event->rcu_pending);
 
                old_rb = event->rb;
-               event->rcu_batches = get_state_synchronize_rcu();
-               event->rcu_pending = 1;
-
                spin_lock_irqsave(&old_rb->event_lock, flags);
                list_del_rcu(&event->rb_entry);
                spin_unlock_irqrestore(&old_rb->event_lock, flags);
-       }
 
-       if (event->rcu_pending && rb) {
-               cond_synchronize_rcu(event->rcu_batches);
-               event->rcu_pending = 0;
+               event->rcu_batches = get_state_synchronize_rcu();
+               event->rcu_pending = 1;
        }
 
        if (rb) {
+               if (event->rcu_pending) {
+                       cond_synchronize_rcu(event->rcu_batches);
+                       event->rcu_pending = 0;
+               }
+
                spin_lock_irqsave(&rb->event_lock, flags);
                list_add_rcu(&event->rb_entry, &rb->event_list);
                spin_unlock_irqrestore(&rb->event_lock, flags);
@@ -5360,9 +5381,9 @@ void perf_prepare_sample(struct perf_event_header *header,
        }
 }
 
-static void perf_event_output(struct perf_event *event,
-                               struct perf_sample_data *data,
-                               struct pt_regs *regs)
+void perf_event_output(struct perf_event *event,
+                       struct perf_sample_data *data,
+                       struct pt_regs *regs)
 {
        struct perf_output_handle handle;
        struct perf_event_header header;
@@ -5954,6 +5975,39 @@ void perf_event_aux_event(struct perf_event *event, unsigned long head,
 }
 
 /*
+ * Lost/dropped samples logging
+ */
+void perf_log_lost_samples(struct perf_event *event, u64 lost)
+{
+       struct perf_output_handle handle;
+       struct perf_sample_data sample;
+       int ret;
+
+       struct {
+               struct perf_event_header        header;
+               u64                             lost;
+       } lost_samples_event = {
+               .header = {
+                       .type = PERF_RECORD_LOST_SAMPLES,
+                       .misc = 0,
+                       .size = sizeof(lost_samples_event),
+               },
+               .lost           = lost,
+       };
+
+       perf_event_header__init_id(&lost_samples_event.header, &sample, event);
+
+       ret = perf_output_begin(&handle, event,
+                               lost_samples_event.header.size);
+       if (ret)
+               return;
+
+       perf_output_put(&handle, lost_samples_event);
+       perf_event__output_id_sample(event, &handle, &sample);
+       perf_output_end(&handle);
+}
+
+/*
  * IRQ throttle logging
  */
 
@@ -7364,7 +7418,12 @@ static int perf_try_init_event(struct pmu *pmu, struct perf_event *event)
                return -ENODEV;
 
        if (event->group_leader != event) {
-               ctx = perf_event_ctx_lock(event->group_leader);
+               /*
+                * This ctx->mutex can nest when we're called through
+                * inheritance. See the perf_event_ctx_lock_nested() comment.
+                */
+               ctx = perf_event_ctx_lock_nested(event->group_leader,
+                                                SINGLE_DEPTH_NESTING);
                BUG_ON(!ctx);
        }
 
index 9f6ce9b..2deb24c 100644 (file)
@@ -72,15 +72,6 @@ static inline bool rb_has_aux(struct ring_buffer *rb)
 void perf_event_aux_event(struct perf_event *event, unsigned long head,
                          unsigned long size, u64 flags);
 
-extern void
-perf_event_header__init_id(struct perf_event_header *header,
-                          struct perf_sample_data *data,
-                          struct perf_event *event);
-extern void
-perf_event__output_id_sample(struct perf_event *event,
-                            struct perf_output_handle *handle,
-                            struct perf_sample_data *sample);
-
 extern struct page *
 perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff);
 
index 232f00f..9647282 100644 (file)
@@ -141,7 +141,7 @@ int perf_output_begin(struct perf_output_handle *handle,
        perf_output_get_handle(handle);
 
        do {
-               tail = ACCESS_ONCE(rb->user_page->data_tail);
+               tail = READ_ONCE_CTRL(rb->user_page->data_tail);
                offset = head = local_read(&rb->head);
                if (!rb->overwrite &&
                    unlikely(CIRC_SPACE(head, tail, perf_data_size(rb)) < size))
@@ -493,6 +493,20 @@ int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event,
                        rb->aux_pages[rb->aux_nr_pages] = page_address(page++);
        }
 
+       /*
+        * In overwrite mode, PMUs that don't support SG may not handle more
+        * than one contiguous allocation, since they rely on PMI to do double
+        * buffering. In this case, the entire buffer has to be one contiguous
+        * chunk.
+        */
+       if ((event->pmu->capabilities & PERF_PMU_CAP_AUX_NO_SG) &&
+           overwrite) {
+               struct page *page = virt_to_page(rb->aux_pages[0]);
+
+               if (page_private(page) != max_order)
+                       goto out;
+       }
+
        rb->aux_priv = event->pmu->setup_aux(event->cpu, rb->aux_pages, nr_pages,
                                             overwrite);
        if (!rb->aux_priv)
index 03c1eaa..0bb88b5 100644 (file)
@@ -1091,10 +1091,7 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig)
 {
        unsigned long cpu_limit;
 
-       /* Thread group counters. */
-       thread_group_cputime_init(sig);
-
-       cpu_limit = ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);
+       cpu_limit = READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);
        if (cpu_limit != RLIM_INFINITY) {
                sig->cputime_expires.prof_exp = secs_to_cputime(cpu_limit);
                sig->cputimer.running = 1;
@@ -1396,6 +1393,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        p->hardirq_context = 0;
        p->softirq_context = 0;
 #endif
+
+       p->pagefault_disabled = 0;
+
 #ifdef CONFIG_LOCKDEP
        p->lockdep_depth = 0; /* no locks held yet */
        p->curr_chain_key = 0;
index 2579e40..aacc706 100644 (file)
@@ -1090,9 +1090,11 @@ static void __unqueue_futex(struct futex_q *q)
 
 /*
  * The hash bucket lock must be held when this is called.
- * Afterwards, the futex_q must not be accessed.
+ * Afterwards, the futex_q must not be accessed. Callers
+ * must ensure to later call wake_up_q() for the actual
+ * wakeups to occur.
  */
-static void wake_futex(struct futex_q *q)
+static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q)
 {
        struct task_struct *p = q->task;
 
@@ -1100,14 +1102,10 @@ static void wake_futex(struct futex_q *q)
                return;
 
        /*
-        * We set q->lock_ptr = NULL _before_ we wake up the task. If
-        * a non-futex wake up happens on another CPU then the task
-        * might exit and p would dereference a non-existing task
-        * struct. Prevent this by holding a reference on p across the
-        * wake up.
+        * Queue the task for later wakeup for after we've released
+        * the hb->lock. wake_q_add() grabs reference to p.
         */
-       get_task_struct(p);
-
+       wake_q_add(wake_q, p);
        __unqueue_futex(q);
        /*
         * The waiting task can free the futex_q as soon as
@@ -1117,9 +1115,6 @@ static void wake_futex(struct futex_q *q)
         */
        smp_wmb();
        q->lock_ptr = NULL;
-
-       wake_up_state(p, TASK_NORMAL);
-       put_task_struct(p);
 }
 
 static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
@@ -1217,6 +1212,7 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
        struct futex_q *this, *next;
        union futex_key key = FUTEX_KEY_INIT;
        int ret;
+       WAKE_Q(wake_q);
 
        if (!bitset)
                return -EINVAL;
@@ -1244,13 +1240,14 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
                        if (!(this->bitset & bitset))
                                continue;
 
-                       wake_futex(this);
+                       mark_wake_futex(&wake_q, this);
                        if (++ret >= nr_wake)
                                break;
                }
        }
 
        spin_unlock(&hb->lock);
+       wake_up_q(&wake_q);
 out_put_key:
        put_futex_key(&key);
 out:
@@ -1269,6 +1266,7 @@ futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
        struct futex_hash_bucket *hb1, *hb2;
        struct futex_q *this, *next;
        int ret, op_ret;
+       WAKE_Q(wake_q);
 
 retry:
        ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
@@ -1320,7 +1318,7 @@ retry_private:
                                ret = -EINVAL;
                                goto out_unlock;
                        }
-                       wake_futex(this);
+                       mark_wake_futex(&wake_q, this);
                        if (++ret >= nr_wake)
                                break;
                }
@@ -1334,7 +1332,7 @@ retry_private:
                                        ret = -EINVAL;
                                        goto out_unlock;
                                }
-                               wake_futex(this);
+                               mark_wake_futex(&wake_q, this);
                                if (++op_ret >= nr_wake2)
                                        break;
                        }
@@ -1344,6 +1342,7 @@ retry_private:
 
 out_unlock:
        double_unlock_hb(hb1, hb2);
+       wake_up_q(&wake_q);
 out_put_keys:
        put_futex_key(&key2);
 out_put_key1:
@@ -1503,6 +1502,7 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
        struct futex_pi_state *pi_state = NULL;
        struct futex_hash_bucket *hb1, *hb2;
        struct futex_q *this, *next;
+       WAKE_Q(wake_q);
 
        if (requeue_pi) {
                /*
@@ -1679,7 +1679,7 @@ retry_private:
                 * woken by futex_unlock_pi().
                 */
                if (++task_count <= nr_wake && !requeue_pi) {
-                       wake_futex(this);
+                       mark_wake_futex(&wake_q, this);
                        continue;
                }
 
@@ -1719,6 +1719,7 @@ retry_private:
 out_unlock:
        free_pi_state(pi_state);
        double_unlock_hb(hb1, hb2);
+       wake_up_q(&wake_q);
        hb_waiters_dec(hb2);
 
        /*
@@ -2055,7 +2056,7 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
 {
        /*
         * The task state is guaranteed to be set before another task can
-        * wake it. set_current_state() is implemented using set_mb() and
+        * wake it. set_current_state() is implemented using smp_store_mb() and
         * queue_me() calls spin_unlock() upon completion, both serializing
         * access to the hash list and forcing another memory barrier.
         */
index 988dc58..2feb6fe 100644 (file)
@@ -57,5 +57,6 @@ struct irq_chip dummy_irq_chip = {
        .irq_ack        = noop,
        .irq_mask       = noop,
        .irq_unmask     = noop,
+       .flags          = IRQCHIP_SKIP_SET_WAKE,
 };
 EXPORT_SYMBOL_GPL(dummy_irq_chip);
index de7a416..7dd5c99 100644 (file)
@@ -17,6 +17,7 @@ obj-$(CONFIG_SMP) += spinlock.o
 obj-$(CONFIG_LOCK_SPIN_ON_OWNER) += osq_lock.o
 obj-$(CONFIG_SMP) += lglock.o
 obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
+obj-$(CONFIG_QUEUED_SPINLOCKS) += qspinlock.o
 obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
 obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
 obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
@@ -25,5 +26,5 @@ obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
 obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
 obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o
 obj-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o
-obj-$(CONFIG_QUEUE_RWLOCK) += qrwlock.o
+obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o
 obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o
index 86ae2ae..951cfcd 100644 (file)
@@ -60,6 +60,28 @@ void lg_local_unlock_cpu(struct lglock *lg, int cpu)
 }
 EXPORT_SYMBOL(lg_local_unlock_cpu);
 
+void lg_double_lock(struct lglock *lg, int cpu1, int cpu2)
+{
+       BUG_ON(cpu1 == cpu2);
+
+       /* lock in cpu order, just like lg_global_lock */
+       if (cpu2 < cpu1)
+               swap(cpu1, cpu2);
+
+       preempt_disable();
+       lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
+       arch_spin_lock(per_cpu_ptr(lg->lock, cpu1));
+       arch_spin_lock(per_cpu_ptr(lg->lock, cpu2));
+}
+
+void lg_double_unlock(struct lglock *lg, int cpu1, int cpu2)
+{
+       lock_release(&lg->lock_dep_map, 1, _RET_IP_);
+       arch_spin_unlock(per_cpu_ptr(lg->lock, cpu1));
+       arch_spin_unlock(per_cpu_ptr(lg->lock, cpu2));
+       preempt_enable();
+}
+
 void lg_global_lock(struct lglock *lg)
 {
        int i;
index a0831e1..4566141 100644 (file)
@@ -3900,7 +3900,8 @@ static void zap_class(struct lock_class *class)
        list_del_rcu(&class->hash_entry);
        list_del_rcu(&class->lock_entry);
 
-       class->key = NULL;
+       RCU_INIT_POINTER(class->key, NULL);
+       RCU_INIT_POINTER(class->name, NULL);
 }
 
 static inline int within(const void *addr, void *start, unsigned long size)
@@ -4066,8 +4067,7 @@ void __init lockdep_info(void)
 
 #ifdef CONFIG_DEBUG_LOCKDEP
        if (lockdep_init_error) {
-               printk("WARNING: lockdep init error! lock-%s was acquired"
-                       "before lockdep_init\n", lock_init_error);
+               printk("WARNING: lockdep init error: lock '%s' was acquired before lockdep_init().\n", lock_init_error);
                printk("Call stack leading to lockdep invocation was:\n");
                print_stack_trace(&lockdep_init_trace, 0);
        }
index ef43ac4..d83d798 100644 (file)
@@ -426,10 +426,12 @@ static void seq_lock_time(struct seq_file *m, struct lock_time *lt)
 
 static void seq_stats(struct seq_file *m, struct lock_stat_data *data)
 {
-       char name[39];
-       struct lock_class *class;
+       struct lockdep_subclass_key *ckey;
        struct lock_class_stats *stats;
+       struct lock_class *class;
+       const char *cname;
        int i, namelen;
+       char name[39];
 
        class = data->class;
        stats = &data->stats;
@@ -440,15 +442,25 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data)
        if (class->subclass)
                namelen -= 2;
 
-       if (!class->name) {
+       rcu_read_lock_sched();
+       cname = rcu_dereference_sched(class->name);
+       ckey  = rcu_dereference_sched(class->key);
+
+       if (!cname && !ckey) {
+               rcu_read_unlock_sched();
+               return;
+
+       } else if (!cname) {
                char str[KSYM_NAME_LEN];
                const char *key_name;
 
-               key_name = __get_key_name(class->key, str);
+               key_name = __get_key_name(ckey, str);
                snprintf(name, namelen, "%s", key_name);
        } else {
-               snprintf(name, namelen, "%s", class->name);
+               snprintf(name, namelen, "%s", cname);
        }
+       rcu_read_unlock_sched();
+
        namelen = strlen(name);
        if (class->name_version > 1) {
                snprintf(name+namelen, 3, "#%d", class->name_version);
index ec8cce2..3224418 100644 (file)
@@ -122,12 +122,12 @@ static int torture_lock_busted_write_lock(void)
 
 static void torture_lock_busted_write_delay(struct torture_random_state *trsp)
 {
-       const unsigned long longdelay_us = 100;
+       const unsigned long longdelay_ms = 100;
 
        /* We want a long delay occasionally to force massive contention.  */
        if (!(torture_random(trsp) %
-             (cxt.nrealwriters_stress * 2000 * longdelay_us)))
-               mdelay(longdelay_us);
+             (cxt.nrealwriters_stress * 2000 * longdelay_ms)))
+               mdelay(longdelay_ms);
 #ifdef CONFIG_PREEMPT
        if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000)))
                preempt_schedule();  /* Allow test to be preempted. */
@@ -160,14 +160,14 @@ static int torture_spin_lock_write_lock(void) __acquires(torture_spinlock)
 static void torture_spin_lock_write_delay(struct torture_random_state *trsp)
 {
        const unsigned long shortdelay_us = 2;
-       const unsigned long longdelay_us = 100;
+       const unsigned long longdelay_ms = 100;
 
        /* We want a short delay mostly to emulate likely code, and
         * we want a long delay occasionally to force massive contention.
         */
        if (!(torture_random(trsp) %
-             (cxt.nrealwriters_stress * 2000 * longdelay_us)))
-               mdelay(longdelay_us);
+             (cxt.nrealwriters_stress * 2000 * longdelay_ms)))
+               mdelay(longdelay_ms);
        if (!(torture_random(trsp) %
              (cxt.nrealwriters_stress * 2 * shortdelay_us)))
                udelay(shortdelay_us);
@@ -309,7 +309,7 @@ static int torture_rwlock_read_lock_irq(void) __acquires(torture_rwlock)
 static void torture_rwlock_read_unlock_irq(void)
 __releases(torture_rwlock)
 {
-       write_unlock_irqrestore(&torture_rwlock, cxt.cur_ops->flags);
+       read_unlock_irqrestore(&torture_rwlock, cxt.cur_ops->flags);
 }
 
 static struct lock_torture_ops rw_lock_irq_ops = {
index 75e114b..fd91aaa 100644 (file)
@@ -17,6 +17,7 @@
 struct mcs_spinlock {
        struct mcs_spinlock *next;
        int locked; /* 1 if lock acquired */
+       int count;  /* nesting count, see qspinlock.c */
 };
 
 #ifndef arch_mcs_spin_lock_contended
index f956ede..6c5da48 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Queue read/write lock
+ * Queued read/write locks
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
 #include <linux/hardirq.h>
 #include <asm/qrwlock.h>
 
+/*
+ * This internal data structure is used for optimizing access to some of
+ * the subfields within the atomic_t cnts.
+ */
+struct __qrwlock {
+       union {
+               atomic_t cnts;
+               struct {
+#ifdef __LITTLE_ENDIAN
+                       u8 wmode;       /* Writer mode   */
+                       u8 rcnts[3];    /* Reader counts */
+#else
+                       u8 rcnts[3];    /* Reader counts */
+                       u8 wmode;       /* Writer mode   */
+#endif
+               };
+       };
+       arch_spinlock_t lock;
+};
+
 /**
  * rspin_until_writer_unlock - inc reader count & spin until writer is gone
  * @lock  : Pointer to queue rwlock structure
@@ -107,10 +127,10 @@ void queue_write_lock_slowpath(struct qrwlock *lock)
         * or wait for a previous writer to go away.
         */
        for (;;) {
-               cnts = atomic_read(&lock->cnts);
-               if (!(cnts & _QW_WMASK) &&
-                   (atomic_cmpxchg(&lock->cnts, cnts,
-                                   cnts | _QW_WAITING) == cnts))
+               struct __qrwlock *l = (struct __qrwlock *)lock;
+
+               if (!READ_ONCE(l->wmode) &&
+                  (cmpxchg(&l->wmode, 0, _QW_WAITING) == 0))
                        break;
 
                cpu_relax_lowlatency();
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
new file mode 100644 (file)
index 0000000..38c4920
--- /dev/null
@@ -0,0 +1,473 @@
+/*
+ * Queued spinlock
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P.
+ * (C) Copyright 2013-2014 Red Hat, Inc.
+ * (C) Copyright 2015 Intel Corp.
+ *
+ * Authors: Waiman Long <waiman.long@hp.com>
+ *          Peter Zijlstra <peterz@infradead.org>
+ */
+
+#ifndef _GEN_PV_LOCK_SLOWPATH
+
+#include <linux/smp.h>
+#include <linux/bug.h>
+#include <linux/cpumask.h>
+#include <linux/percpu.h>
+#include <linux/hardirq.h>
+#include <linux/mutex.h>
+#include <asm/byteorder.h>
+#include <asm/qspinlock.h>
+
+/*
+ * The basic principle of a queue-based spinlock can best be understood
+ * by studying a classic queue-based spinlock implementation called the
+ * MCS lock. The paper below provides a good description for this kind
+ * of lock.
+ *
+ * http://www.cise.ufl.edu/tr/DOC/REP-1992-71.pdf
+ *
+ * This queued spinlock implementation is based on the MCS lock, however to make
+ * it fit the 4 bytes we assume spinlock_t to be, and preserve its existing
+ * API, we must modify it somehow.
+ *
+ * In particular; where the traditional MCS lock consists of a tail pointer
+ * (8 bytes) and needs the next pointer (another 8 bytes) of its own node to
+ * unlock the next pending (next->locked), we compress both these: {tail,
+ * next->locked} into a single u32 value.
+ *
+ * Since a spinlock disables recursion of its own context and there is a limit
+ * to the contexts that can nest; namely: task, softirq, hardirq, nmi. As there
+ * are at most 4 nesting levels, it can be encoded by a 2-bit number. Now
+ * we can encode the tail by combining the 2-bit nesting level with the cpu
+ * number. With one byte for the lock value and 3 bytes for the tail, only a
+ * 32-bit word is now needed. Even though we only need 1 bit for the lock,
+ * we extend it to a full byte to achieve better performance for architectures
+ * that support atomic byte write.
+ *
+ * We also change the first spinner to spin on the lock bit instead of its
+ * node; whereby avoiding the need to carry a node from lock to unlock, and
+ * preserving existing lock API. This also makes the unlock code simpler and
+ * faster.
+ *
+ * N.B. The current implementation only supports architectures that allow
+ *      atomic operations on smaller 8-bit and 16-bit data types.
+ *
+ */
+
+#include "mcs_spinlock.h"
+
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+#define MAX_NODES      8
+#else
+#define MAX_NODES      4
+#endif
+
+/*
+ * Per-CPU queue node structures; we can never have more than 4 nested
+ * contexts: task, softirq, hardirq, nmi.
+ *
+ * Exactly fits one 64-byte cacheline on a 64-bit architecture.
+ *
+ * PV doubles the storage and uses the second cacheline for PV state.
+ */
+static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[MAX_NODES]);
+
+/*
+ * We must be able to distinguish between no-tail and the tail at 0:0,
+ * therefore increment the cpu number by one.
+ */
+
+static inline u32 encode_tail(int cpu, int idx)
+{
+       u32 tail;
+
+#ifdef CONFIG_DEBUG_SPINLOCK
+       BUG_ON(idx > 3);
+#endif
+       tail  = (cpu + 1) << _Q_TAIL_CPU_OFFSET;
+       tail |= idx << _Q_TAIL_IDX_OFFSET; /* assume < 4 */
+
+       return tail;
+}
+
+static inline struct mcs_spinlock *decode_tail(u32 tail)
+{
+       int cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1;
+       int idx = (tail &  _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET;
+
+       return per_cpu_ptr(&mcs_nodes[idx], cpu);
+}
+
+#define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK)
+
+/*
+ * By using the whole 2nd least significant byte for the pending bit, we
+ * can allow better optimization of the lock acquisition for the pending
+ * bit holder.
+ *
+ * This internal structure is also used by the set_locked function which
+ * is not restricted to _Q_PENDING_BITS == 8.
+ */
+struct __qspinlock {
+       union {
+               atomic_t val;
+#ifdef __LITTLE_ENDIAN
+               struct {
+                       u8      locked;
+                       u8      pending;
+               };
+               struct {
+                       u16     locked_pending;
+                       u16     tail;
+               };
+#else
+               struct {
+                       u16     tail;
+                       u16     locked_pending;
+               };
+               struct {
+                       u8      reserved[2];
+                       u8      pending;
+                       u8      locked;
+               };
+#endif
+       };
+};
+
+#if _Q_PENDING_BITS == 8
+/**
+ * clear_pending_set_locked - take ownership and clear the pending bit.
+ * @lock: Pointer to queued spinlock structure
+ *
+ * *,1,0 -> *,0,1
+ *
+ * Lock stealing is not allowed if this function is used.
+ */
+static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
+{
+       struct __qspinlock *l = (void *)lock;
+
+       WRITE_ONCE(l->locked_pending, _Q_LOCKED_VAL);
+}
+
+/*
+ * xchg_tail - Put in the new queue tail code word & retrieve previous one
+ * @lock : Pointer to queued spinlock structure
+ * @tail : The new queue tail code word
+ * Return: The previous queue tail code word
+ *
+ * xchg(lock, tail)
+ *
+ * p,*,* -> n,*,* ; prev = xchg(lock, node)
+ */
+static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
+{
+       struct __qspinlock *l = (void *)lock;
+
+       return (u32)xchg(&l->tail, tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
+}
+
+#else /* _Q_PENDING_BITS == 8 */
+
+/**
+ * clear_pending_set_locked - take ownership and clear the pending bit.
+ * @lock: Pointer to queued spinlock structure
+ *
+ * *,1,0 -> *,0,1
+ */
+static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
+{
+       atomic_add(-_Q_PENDING_VAL + _Q_LOCKED_VAL, &lock->val);
+}
+
+/**
+ * xchg_tail - Put in the new queue tail code word & retrieve previous one
+ * @lock : Pointer to queued spinlock structure
+ * @tail : The new queue tail code word
+ * Return: The previous queue tail code word
+ *
+ * xchg(lock, tail)
+ *
+ * p,*,* -> n,*,* ; prev = xchg(lock, node)
+ */
+static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
+{
+       u32 old, new, val = atomic_read(&lock->val);
+
+       for (;;) {
+               new = (val & _Q_LOCKED_PENDING_MASK) | tail;
+               old = atomic_cmpxchg(&lock->val, val, new);
+               if (old == val)
+                       break;
+
+               val = old;
+       }
+       return old;
+}
+#endif /* _Q_PENDING_BITS == 8 */
+
+/**
+ * set_locked - Set the lock bit and own the lock
+ * @lock: Pointer to queued spinlock structure
+ *
+ * *,*,0 -> *,0,1
+ */
+static __always_inline void set_locked(struct qspinlock *lock)
+{
+       struct __qspinlock *l = (void *)lock;
+
+       WRITE_ONCE(l->locked, _Q_LOCKED_VAL);
+}
+
+
+/*
+ * Generate the native code for queued_spin_unlock_slowpath(); provide NOPs for
+ * all the PV callbacks.
+ */
+
+static __always_inline void __pv_init_node(struct mcs_spinlock *node) { }
+static __always_inline void __pv_wait_node(struct mcs_spinlock *node) { }
+static __always_inline void __pv_kick_node(struct mcs_spinlock *node) { }
+
+static __always_inline void __pv_wait_head(struct qspinlock *lock,
+                                          struct mcs_spinlock *node) { }
+
+#define pv_enabled()           false
+
+#define pv_init_node           __pv_init_node
+#define pv_wait_node           __pv_wait_node
+#define pv_kick_node           __pv_kick_node
+#define pv_wait_head           __pv_wait_head
+
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+#define queued_spin_lock_slowpath      native_queued_spin_lock_slowpath
+#endif
+
+#endif /* _GEN_PV_LOCK_SLOWPATH */
+
+/**
+ * queued_spin_lock_slowpath - acquire the queued spinlock
+ * @lock: Pointer to queued spinlock structure
+ * @val: Current value of the queued spinlock 32-bit word
+ *
+ * (queue tail, pending bit, lock value)
+ *
+ *              fast     :    slow                                  :    unlock
+ *                       :                                          :
+ * uncontended  (0,0,0) -:--> (0,0,1) ------------------------------:--> (*,*,0)
+ *                       :       | ^--------.------.             /  :
+ *                       :       v           \      \            |  :
+ * pending               :    (0,1,1) +--> (0,1,0)   \           |  :
+ *                       :       | ^--'              |           |  :
+ *                       :       v                   |           |  :
+ * uncontended           :    (n,x,y) +--> (n,0,0) --'           |  :
+ *   queue               :       | ^--'                          |  :
+ *                       :       v                               |  :
+ * contended             :    (*,x,y) +--> (*,0,0) ---> (*,0,1) -'  :
+ *   queue               :         ^--'                             :
+ */
+void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
+{
+       struct mcs_spinlock *prev, *next, *node;
+       u32 new, old, tail;
+       int idx;
+
+       BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
+
+       if (pv_enabled())
+               goto queue;
+
+       if (virt_queued_spin_lock(lock))
+               return;
+
+       /*
+        * wait for in-progress pending->locked hand-overs
+        *
+        * 0,1,0 -> 0,0,1
+        */
+       if (val == _Q_PENDING_VAL) {
+               while ((val = atomic_read(&lock->val)) == _Q_PENDING_VAL)
+                       cpu_relax();
+       }
+
+       /*
+        * trylock || pending
+        *
+        * 0,0,0 -> 0,0,1 ; trylock
+        * 0,0,1 -> 0,1,1 ; pending
+        */
+       for (;;) {
+               /*
+                * If we observe any contention; queue.
+                */
+               if (val & ~_Q_LOCKED_MASK)
+                       goto queue;
+
+               new = _Q_LOCKED_VAL;
+               if (val == new)
+                       new |= _Q_PENDING_VAL;
+
+               old = atomic_cmpxchg(&lock->val, val, new);
+               if (old == val)
+                       break;
+
+               val = old;
+       }
+
+       /*
+        * we won the trylock
+        */
+       if (new == _Q_LOCKED_VAL)
+               return;
+
+       /*
+        * we're pending, wait for the owner to go away.
+        *
+        * *,1,1 -> *,1,0
+        *
+        * this wait loop must be a load-acquire such that we match the
+        * store-release that clears the locked bit and create lock
+        * sequentiality; this is because not all clear_pending_set_locked()
+        * implementations imply full barriers.
+        */
+       while ((val = smp_load_acquire(&lock->val.counter)) & _Q_LOCKED_MASK)
+               cpu_relax();
+
+       /*
+        * take ownership and clear the pending bit.
+        *
+        * *,1,0 -> *,0,1
+        */
+       clear_pending_set_locked(lock);
+       return;
+
+       /*
+        * End of pending bit optimistic spinning and beginning of MCS
+        * queuing.
+        */
+queue:
+       node = this_cpu_ptr(&mcs_nodes[0]);
+       idx = node->count++;
+       tail = encode_tail(smp_processor_id(), idx);
+
+       node += idx;
+       node->locked = 0;
+       node->next = NULL;
+       pv_init_node(node);
+
+       /*
+        * We touched a (possibly) cold cacheline in the per-cpu queue node;
+        * attempt the trylock once more in the hope someone let go while we
+        * weren't watching.
+        */
+       if (queued_spin_trylock(lock))
+               goto release;
+
+       /*
+        * We have already touched the queueing cacheline; don't bother with
+        * pending stuff.
+        *
+        * p,*,* -> n,*,*
+        */
+       old = xchg_tail(lock, tail);
+
+       /*
+        * if there was a previous node; link it and wait until reaching the
+        * head of the waitqueue.
+        */
+       if (old & _Q_TAIL_MASK) {
+               prev = decode_tail(old);
+               WRITE_ONCE(prev->next, node);
+
+               pv_wait_node(node);
+               arch_mcs_spin_lock_contended(&node->locked);
+       }
+
+       /*
+        * we're at the head of the waitqueue, wait for the owner & pending to
+        * go away.
+        *
+        * *,x,y -> *,0,0
+        *
+        * this wait loop must use a load-acquire such that we match the
+        * store-release that clears the locked bit and create lock
+        * sequentiality; this is because the set_locked() function below
+        * does not imply a full barrier.
+        *
+        */
+       pv_wait_head(lock, node);
+       while ((val = smp_load_acquire(&lock->val.counter)) & _Q_LOCKED_PENDING_MASK)
+               cpu_relax();
+
+       /*
+        * claim the lock:
+        *
+        * n,0,0 -> 0,0,1 : lock, uncontended
+        * *,0,0 -> *,0,1 : lock, contended
+        *
+        * If the queue head is the only one in the queue (lock value == tail),
+        * clear the tail code and grab the lock. Otherwise, we only need
+        * to grab the lock.
+        */
+       for (;;) {
+               if (val != tail) {
+                       set_locked(lock);
+                       break;
+               }
+               old = atomic_cmpxchg(&lock->val, val, _Q_LOCKED_VAL);
+               if (old == val)
+                       goto release;   /* No contention */
+
+               val = old;
+       }
+
+       /*
+        * contended path; wait for next, release.
+        */
+       while (!(next = READ_ONCE(node->next)))
+               cpu_relax();
+
+       arch_mcs_spin_unlock_contended(&next->locked);
+       pv_kick_node(next);
+
+release:
+       /*
+        * release the node
+        */
+       this_cpu_dec(mcs_nodes[0].count);
+}
+EXPORT_SYMBOL(queued_spin_lock_slowpath);
+
+/*
+ * Generate the paravirt code for queued_spin_unlock_slowpath().
+ */
+#if !defined(_GEN_PV_LOCK_SLOWPATH) && defined(CONFIG_PARAVIRT_SPINLOCKS)
+#define _GEN_PV_LOCK_SLOWPATH
+
+#undef  pv_enabled
+#define pv_enabled()   true
+
+#undef pv_init_node
+#undef pv_wait_node
+#undef pv_kick_node
+#undef pv_wait_head
+
+#undef  queued_spin_lock_slowpath
+#define queued_spin_lock_slowpath      __pv_queued_spin_lock_slowpath
+
+#include "qspinlock_paravirt.h"
+#include "qspinlock.c"
+
+#endif
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
new file mode 100644 (file)
index 0000000..04ab181
--- /dev/null
@@ -0,0 +1,325 @@
+#ifndef _GEN_PV_LOCK_SLOWPATH
+#error "do not include this file"
+#endif
+
+#include <linux/hash.h>
+#include <linux/bootmem.h>
+
+/*
+ * Implement paravirt qspinlocks; the general idea is to halt the vcpus instead
+ * of spinning them.
+ *
+ * This relies on the architecture to provide two paravirt hypercalls:
+ *
+ *   pv_wait(u8 *ptr, u8 val) -- suspends the vcpu if *ptr == val
+ *   pv_kick(cpu)             -- wakes a suspended vcpu
+ *
+ * Using these we implement __pv_queued_spin_lock_slowpath() and
+ * __pv_queued_spin_unlock() to replace native_queued_spin_lock_slowpath() and
+ * native_queued_spin_unlock().
+ */
+
+#define _Q_SLOW_VAL    (3U << _Q_LOCKED_OFFSET)
+
+enum vcpu_state {
+       vcpu_running = 0,
+       vcpu_halted,
+};
+
+struct pv_node {
+       struct mcs_spinlock     mcs;
+       struct mcs_spinlock     __res[3];
+
+       int                     cpu;
+       u8                      state;
+};
+
+/*
+ * Lock and MCS node addresses hash table for fast lookup
+ *
+ * Hashing is done on a per-cacheline basis to minimize the need to access
+ * more than one cacheline.
+ *
+ * Dynamically allocate a hash table big enough to hold at least 4X the
+ * number of possible cpus in the system. Allocation is done on page
+ * granularity. So the minimum number of hash buckets should be at least
+ * 256 (64-bit) or 512 (32-bit) to fully utilize a 4k page.
+ *
+ * Since we should not be holding locks from NMI context (very rare indeed) the
+ * max load factor is 0.75, which is around the point where open addressing
+ * breaks down.
+ *
+ */
+struct pv_hash_entry {
+       struct qspinlock *lock;
+       struct pv_node   *node;
+};
+
+#define PV_HE_PER_LINE (SMP_CACHE_BYTES / sizeof(struct pv_hash_entry))
+#define PV_HE_MIN      (PAGE_SIZE / sizeof(struct pv_hash_entry))
+
+static struct pv_hash_entry *pv_lock_hash;
+static unsigned int pv_lock_hash_bits __read_mostly;
+
+/*
+ * Allocate memory for the PV qspinlock hash buckets
+ *
+ * This function should be called from the paravirt spinlock initialization
+ * routine.
+ */
+void __init __pv_init_lock_hash(void)
+{
+       int pv_hash_size = ALIGN(4 * num_possible_cpus(), PV_HE_PER_LINE);
+
+       if (pv_hash_size < PV_HE_MIN)
+               pv_hash_size = PV_HE_MIN;
+
+       /*
+        * Allocate space from bootmem which should be page-size aligned
+        * and hence cacheline aligned.
+        */
+       pv_lock_hash = alloc_large_system_hash("PV qspinlock",
+                                              sizeof(struct pv_hash_entry),
+                                              pv_hash_size, 0, HASH_EARLY,
+                                              &pv_lock_hash_bits, NULL,
+                                              pv_hash_size, pv_hash_size);
+}
+
+#define for_each_hash_entry(he, offset, hash)                                          \
+       for (hash &= ~(PV_HE_PER_LINE - 1), he = &pv_lock_hash[hash], offset = 0;       \
+            offset < (1 << pv_lock_hash_bits);                                         \
+            offset++, he = &pv_lock_hash[(hash + offset) & ((1 << pv_lock_hash_bits) - 1)])
+
+static struct qspinlock **pv_hash(struct qspinlock *lock, struct pv_node *node)
+{
+       unsigned long offset, hash = hash_ptr(lock, pv_lock_hash_bits);
+       struct pv_hash_entry *he;
+
+       for_each_hash_entry(he, offset, hash) {
+               if (!cmpxchg(&he->lock, NULL, lock)) {
+                       WRITE_ONCE(he->node, node);
+                       return &he->lock;
+               }
+       }
+       /*
+        * Hard assume there is a free entry for us.
+        *
+        * This is guaranteed by ensuring every blocked lock only ever consumes
+        * a single entry, and since we only have 4 nesting levels per CPU
+        * and allocated 4*nr_possible_cpus(), this must be so.
+        *
+        * The single entry is guaranteed by having the lock owner unhash
+        * before it releases.
+        */
+       BUG();
+}
+
+static struct pv_node *pv_unhash(struct qspinlock *lock)
+{
+       unsigned long offset, hash = hash_ptr(lock, pv_lock_hash_bits);
+       struct pv_hash_entry *he;
+       struct pv_node *node;
+
+       for_each_hash_entry(he, offset, hash) {
+               if (READ_ONCE(he->lock) == lock) {
+                       node = READ_ONCE(he->node);
+                       WRITE_ONCE(he->lock, NULL);
+                       return node;
+               }
+       }
+       /*
+        * Hard assume we'll find an entry.
+        *
+        * This guarantees a limited lookup time and is itself guaranteed by
+        * having the lock owner do the unhash -- IFF the unlock sees the
+        * SLOW flag, there MUST be a hash entry.
+        */
+       BUG();
+}
+
+/*
+ * Initialize the PV part of the mcs_spinlock node.
+ */
+static void pv_init_node(struct mcs_spinlock *node)
+{
+       struct pv_node *pn = (struct pv_node *)node;
+
+       BUILD_BUG_ON(sizeof(struct pv_node) > 5*sizeof(struct mcs_spinlock));
+
+       pn->cpu = smp_processor_id();
+       pn->state = vcpu_running;
+}
+
+/*
+ * Wait for node->locked to become true, halt the vcpu after a short spin.
+ * pv_kick_node() is used to wake the vcpu again.
+ */
+static void pv_wait_node(struct mcs_spinlock *node)
+{
+       struct pv_node *pn = (struct pv_node *)node;
+       int loop;
+
+       for (;;) {
+               for (loop = SPIN_THRESHOLD; loop; loop--) {
+                       if (READ_ONCE(node->locked))
+                               return;
+                       cpu_relax();
+               }
+
+               /*
+                * Order pn->state vs pn->locked thusly:
+                *
+                * [S] pn->state = vcpu_halted    [S] next->locked = 1
+                *     MB                             MB
+                * [L] pn->locked               [RmW] pn->state = vcpu_running
+                *
+                * Matches the xchg() from pv_kick_node().
+                */
+               smp_store_mb(pn->state, vcpu_halted);
+
+               if (!READ_ONCE(node->locked))
+                       pv_wait(&pn->state, vcpu_halted);
+
+               /*
+                * Reset the vCPU state to avoid unncessary CPU kicking
+                */
+               WRITE_ONCE(pn->state, vcpu_running);
+
+               /*
+                * If the locked flag is still not set after wakeup, it is a
+                * spurious wakeup and the vCPU should wait again. However,
+                * there is a pretty high overhead for CPU halting and kicking.
+                * So it is better to spin for a while in the hope that the
+                * MCS lock will be released soon.
+                */
+       }
+       /*
+        * By now our node->locked should be 1 and our caller will not actually
+        * spin-wait for it. We do however rely on our caller to do a
+        * load-acquire for us.
+        */
+}
+
+/*
+ * Called after setting next->locked = 1, used to wake those stuck in
+ * pv_wait_node().
+ */
+static void pv_kick_node(struct mcs_spinlock *node)
+{
+       struct pv_node *pn = (struct pv_node *)node;
+
+       /*
+        * Note that because node->locked is already set, this actual
+        * mcs_spinlock entry could be re-used already.
+        *
+        * This should be fine however, kicking people for no reason is
+        * harmless.
+        *
+        * See the comment in pv_wait_node().
+        */
+       if (xchg(&pn->state, vcpu_running) == vcpu_halted)
+               pv_kick(pn->cpu);
+}
+
+/*
+ * Wait for l->locked to become clear; halt the vcpu after a short spin.
+ * __pv_queued_spin_unlock() will wake us.
+ */
+static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node)
+{
+       struct pv_node *pn = (struct pv_node *)node;
+       struct __qspinlock *l = (void *)lock;
+       struct qspinlock **lp = NULL;
+       int loop;
+
+       for (;;) {
+               for (loop = SPIN_THRESHOLD; loop; loop--) {
+                       if (!READ_ONCE(l->locked))
+                               return;
+                       cpu_relax();
+               }
+
+               WRITE_ONCE(pn->state, vcpu_halted);
+               if (!lp) { /* ONCE */
+                       lp = pv_hash(lock, pn);
+                       /*
+                        * lp must be set before setting _Q_SLOW_VAL
+                        *
+                        * [S] lp = lock                [RmW] l = l->locked = 0
+                        *     MB                             MB
+                        * [S] l->locked = _Q_SLOW_VAL  [L]   lp
+                        *
+                        * Matches the cmpxchg() in __pv_queued_spin_unlock().
+                        */
+                       if (!cmpxchg(&l->locked, _Q_LOCKED_VAL, _Q_SLOW_VAL)) {
+                               /*
+                                * The lock is free and _Q_SLOW_VAL has never
+                                * been set. Therefore we need to unhash before
+                                * getting the lock.
+                                */
+                               WRITE_ONCE(*lp, NULL);
+                               return;
+                       }
+               }
+               pv_wait(&l->locked, _Q_SLOW_VAL);
+
+               /*
+                * The unlocker should have freed the lock before kicking the
+                * CPU. So if the lock is still not free, it is a spurious
+                * wakeup and so the vCPU should wait again after spinning for
+                * a while.
+                */
+       }
+
+       /*
+        * Lock is unlocked now; the caller will acquire it without waiting.
+        * As with pv_wait_node() we rely on the caller to do a load-acquire
+        * for us.
+        */
+}
+
+/*
+ * PV version of the unlock function to be used in stead of
+ * queued_spin_unlock().
+ */
+__visible void __pv_queued_spin_unlock(struct qspinlock *lock)
+{
+       struct __qspinlock *l = (void *)lock;
+       struct pv_node *node;
+
+       /*
+        * We must not unlock if SLOW, because in that case we must first
+        * unhash. Otherwise it would be possible to have multiple @lock
+        * entries, which would be BAD.
+        */
+       if (likely(cmpxchg(&l->locked, _Q_LOCKED_VAL, 0) == _Q_LOCKED_VAL))
+               return;
+
+       /*
+        * Since the above failed to release, this must be the SLOW path.
+        * Therefore start by looking up the blocked node and unhashing it.
+        */
+       node = pv_unhash(lock);
+
+       /*
+        * Now that we have a reference to the (likely) blocked pv_node,
+        * release the lock.
+        */
+       smp_store_release(&l->locked, 0);
+
+       /*
+        * At this point the memory pointed at by lock can be freed/reused,
+        * however we can still use the pv_node to kick the CPU.
+        */
+       if (READ_ONCE(node->state) == vcpu_halted)
+               pv_kick(node->cpu);
+}
+/*
+ * Include the architecture specific callee-save thunk of the
+ * __pv_queued_spin_unlock(). This thunk is put together with
+ * __pv_queued_spin_unlock() near the top of the file to make sure
+ * that the callee-save thunk and the real unlock function are close
+ * to each other sharing consecutive instruction cachelines.
+ */
+#include <asm/qspinlock_paravirt.h>
+
index b732793..30ec5b4 100644 (file)
@@ -70,10 +70,10 @@ static void fixup_rt_mutex_waiters(struct rt_mutex *lock)
 }
 
 /*
- * We can speed up the acquire/release, if the architecture
- * supports cmpxchg and if there's no debugging state to be set up
+ * We can speed up the acquire/release, if there's no debugging state to be
+ * set up.
  */
-#if defined(__HAVE_ARCH_CMPXCHG) && !defined(CONFIG_DEBUG_RT_MUTEXES)
+#ifndef CONFIG_DEBUG_RT_MUTEXES
 # define rt_mutex_cmpxchg(l,c,n)       (cmpxchg(&l->owner, c, n) == c)
 static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
 {
@@ -265,15 +265,17 @@ struct task_struct *rt_mutex_get_top_task(struct task_struct *task)
 }
 
 /*
- * Called by sched_setscheduler() to check whether the priority change
- * is overruled by a possible priority boosting.
+ * Called by sched_setscheduler() to get the priority which will be
+ * effective after the change.
  */
-int rt_mutex_check_prio(struct task_struct *task, int newprio)
+int rt_mutex_get_effective_prio(struct task_struct *task, int newprio)
 {
        if (!task_has_pi_waiters(task))
-               return 0;
+               return newprio;
 
-       return task_top_pi_waiter(task)->task->prio <= newprio;
+       if (task_top_pi_waiter(task)->task->prio <= newprio)
+               return task_top_pi_waiter(task)->task->prio;
+       return newprio;
 }
 
 /*
@@ -1441,10 +1443,17 @@ EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
  *
  * @lock:      the rt_mutex to be locked
  *
+ * This function can only be called in thread context. It's safe to
+ * call it from atomic regions, but not from hard interrupt or soft
+ * interrupt context.
+ *
  * Returns 1 on success and 0 on contention
  */
 int __sched rt_mutex_trylock(struct rt_mutex *lock)
 {
+       if (WARN_ON(in_irq() || in_nmi() || in_serving_softirq()))
+               return 0;
+
        return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock);
 }
 EXPORT_SYMBOL_GPL(rt_mutex_trylock);
index 3417d01..0f18971 100644 (file)
@@ -409,11 +409,24 @@ done:
        return taken;
 }
 
+/*
+ * Return true if the rwsem has active spinner
+ */
+static inline bool rwsem_has_spinner(struct rw_semaphore *sem)
+{
+       return osq_is_locked(&sem->osq);
+}
+
 #else
 static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
 {
        return false;
 }
+
+static inline bool rwsem_has_spinner(struct rw_semaphore *sem)
+{
+       return false;
+}
 #endif
 
 /*
@@ -496,7 +509,38 @@ struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
 {
        unsigned long flags;
 
+       /*
+        * If a spinner is present, it is not necessary to do the wakeup.
+        * Try to do wakeup only if the trylock succeeds to minimize
+        * spinlock contention which may introduce too much delay in the
+        * unlock operation.
+        *
+        *    spinning writer           up_write/up_read caller
+        *    ---------------           -----------------------
+        * [S]   osq_unlock()           [L]   osq
+        *       MB                           RMB
+        * [RmW] rwsem_try_write_lock() [RmW] spin_trylock(wait_lock)
+        *
+        * Here, it is important to make sure that there won't be a missed
+        * wakeup while the rwsem is free and the only spinning writer goes
+        * to sleep without taking the rwsem. Even when the spinning writer
+        * is just going to break out of the waiting loop, it will still do
+        * a trylock in rwsem_down_write_failed() before sleeping. IOW, if
+        * rwsem_has_spinner() is true, it will guarantee at least one
+        * trylock attempt on the rwsem later on.
+        */
+       if (rwsem_has_spinner(sem)) {
+               /*
+                * The smp_rmb() here is to make sure that the spinner
+                * state is consulted before reading the wait_lock.
+                */
+               smp_rmb();
+               if (!raw_spin_trylock_irqsave(&sem->wait_lock, flags))
+                       return sem;
+               goto locked;
+       }
        raw_spin_lock_irqsave(&sem->wait_lock, flags);
+locked:
 
        /* do nothing if list empty */
        if (!list_empty(&sem->wait_list))
index 42a1d2a..cfc9e84 100644 (file)
@@ -3370,6 +3370,9 @@ static int load_module(struct load_info *info, const char __user *uargs,
        module_bug_cleanup(mod);
        mutex_unlock(&module_mutex);
 
+       blocking_notifier_call_chain(&module_notify_list,
+                                    MODULE_STATE_GOING, mod);
+
        /* we can't deallocate the module until we clear memory protection */
        unset_module_init_ro_nx(mod);
        unset_module_core_ro_nx(mod);
index 8dbe276..59e3268 100644 (file)
@@ -241,6 +241,7 @@ rcu_torture_free(struct rcu_torture *p)
 struct rcu_torture_ops {
        int ttype;
        void (*init)(void);
+       void (*cleanup)(void);
        int (*readlock)(void);
        void (*read_delay)(struct torture_random_state *rrsp);
        void (*readunlock)(int idx);
@@ -477,10 +478,12 @@ static struct rcu_torture_ops rcu_busted_ops = {
  */
 
 DEFINE_STATIC_SRCU(srcu_ctl);
+static struct srcu_struct srcu_ctld;
+static struct srcu_struct *srcu_ctlp = &srcu_ctl;
 
-static int srcu_torture_read_lock(void) __acquires(&srcu_ctl)
+static int srcu_torture_read_lock(void) __acquires(srcu_ctlp)
 {
-       return srcu_read_lock(&srcu_ctl);
+       return srcu_read_lock(srcu_ctlp);
 }
 
 static void srcu_read_delay(struct torture_random_state *rrsp)
@@ -499,49 +502,49 @@ static void srcu_read_delay(struct torture_random_state *rrsp)
                rcu_read_delay(rrsp);
 }
 
-static void srcu_torture_read_unlock(int idx) __releases(&srcu_ctl)
+static void srcu_torture_read_unlock(int idx) __releases(srcu_ctlp)
 {
-       srcu_read_unlock(&srcu_ctl, idx);
+       srcu_read_unlock(srcu_ctlp, idx);
 }
 
 static unsigned long srcu_torture_completed(void)
 {
-       return srcu_batches_completed(&srcu_ctl);
+       return srcu_batches_completed(srcu_ctlp);
 }
 
 static void srcu_torture_deferred_free(struct rcu_torture *rp)
 {
-       call_srcu(&srcu_ctl, &rp->rtort_rcu, rcu_torture_cb);
+       call_srcu(srcu_ctlp, &rp->rtort_rcu, rcu_torture_cb);
 }
 
 static void srcu_torture_synchronize(void)
 {
-       synchronize_srcu(&srcu_ctl);
+       synchronize_srcu(srcu_ctlp);
 }
 
 static void srcu_torture_call(struct rcu_head *head,
                              void (*func)(struct rcu_head *head))
 {
-       call_srcu(&srcu_ctl, head, func);
+       call_srcu(srcu_ctlp, head, func);
 }
 
 static void srcu_torture_barrier(void)
 {
-       srcu_barrier(&srcu_ctl);
+       srcu_barrier(srcu_ctlp);
 }
 
 static void srcu_torture_stats(void)
 {
        int cpu;
-       int idx = srcu_ctl.completed & 0x1;
+       int idx = srcu_ctlp->completed & 0x1;
 
        pr_alert("%s%s per-CPU(idx=%d):",
                 torture_type, TORTURE_FLAG, idx);
        for_each_possible_cpu(cpu) {
                long c0, c1;
 
-               c0 = (long)per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[!idx];
-               c1 = (long)per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[idx];
+               c0 = (long)per_cpu_ptr(srcu_ctlp->per_cpu_ref, cpu)->c[!idx];
+               c1 = (long)per_cpu_ptr(srcu_ctlp->per_cpu_ref, cpu)->c[idx];
                pr_cont(" %d(%ld,%ld)", cpu, c0, c1);
        }
        pr_cont("\n");
@@ -549,7 +552,7 @@ static void srcu_torture_stats(void)
 
 static void srcu_torture_synchronize_expedited(void)
 {
-       synchronize_srcu_expedited(&srcu_ctl);
+       synchronize_srcu_expedited(srcu_ctlp);
 }
 
 static struct rcu_torture_ops srcu_ops = {
@@ -569,6 +572,38 @@ static struct rcu_torture_ops srcu_ops = {
        .name           = "srcu"
 };
 
+static void srcu_torture_init(void)
+{
+       rcu_sync_torture_init();
+       WARN_ON(init_srcu_struct(&srcu_ctld));
+       srcu_ctlp = &srcu_ctld;
+}
+
+static void srcu_torture_cleanup(void)
+{
+       cleanup_srcu_struct(&srcu_ctld);
+       srcu_ctlp = &srcu_ctl; /* In case of a later rcutorture run. */
+}
+
+/* As above, but dynamically allocated. */
+static struct rcu_torture_ops srcud_ops = {
+       .ttype          = SRCU_FLAVOR,
+       .init           = srcu_torture_init,
+       .cleanup        = srcu_torture_cleanup,
+       .readlock       = srcu_torture_read_lock,
+       .read_delay     = srcu_read_delay,
+       .readunlock     = srcu_torture_read_unlock,
+       .started        = NULL,
+       .completed      = srcu_torture_completed,
+       .deferred_free  = srcu_torture_deferred_free,
+       .sync           = srcu_torture_synchronize,
+       .exp_sync       = srcu_torture_synchronize_expedited,
+       .call           = srcu_torture_call,
+       .cb_barrier     = srcu_torture_barrier,
+       .stats          = srcu_torture_stats,
+       .name           = "srcud"
+};
+
 /*
  * Definitions for sched torture testing.
  */
@@ -672,8 +707,8 @@ static void rcu_torture_boost_cb(struct rcu_head *head)
        struct rcu_boost_inflight *rbip =
                container_of(head, struct rcu_boost_inflight, rcu);
 
-       smp_mb(); /* Ensure RCU-core accesses precede clearing ->inflight */
-       rbip->inflight = 0;
+       /* Ensure RCU-core accesses precede clearing ->inflight */
+       smp_store_release(&rbip->inflight, 0);
 }
 
 static int rcu_torture_boost(void *arg)
@@ -710,9 +745,9 @@ static int rcu_torture_boost(void *arg)
                call_rcu_time = jiffies;
                while (ULONG_CMP_LT(jiffies, endtime)) {
                        /* If we don't have a callback in flight, post one. */
-                       if (!rbi.inflight) {
-                               smp_mb(); /* RCU core before ->inflight = 1. */
-                               rbi.inflight = 1;
+                       if (!smp_load_acquire(&rbi.inflight)) {
+                               /* RCU core before ->inflight = 1. */
+                               smp_store_release(&rbi.inflight, 1);
                                call_rcu(&rbi.rcu, rcu_torture_boost_cb);
                                if (jiffies - call_rcu_time >
                                         test_boost_duration * HZ - HZ / 2) {
@@ -751,11 +786,10 @@ checkwait:        stutter_wait("rcu_torture_boost");
        } while (!torture_must_stop());
 
        /* Clean up and exit. */
-       while (!kthread_should_stop() || rbi.inflight) {
+       while (!kthread_should_stop() || smp_load_acquire(&rbi.inflight)) {
                torture_shutdown_absorb("rcu_torture_boost");
                schedule_timeout_uninterruptible(1);
        }
-       smp_mb(); /* order accesses to ->inflight before stack-frame death. */
        destroy_rcu_head_on_stack(&rbi.rcu);
        torture_kthread_stopping("rcu_torture_boost");
        return 0;
@@ -1054,7 +1088,7 @@ static void rcu_torture_timer(unsigned long unused)
        p = rcu_dereference_check(rcu_torture_current,
                                  rcu_read_lock_bh_held() ||
                                  rcu_read_lock_sched_held() ||
-                                 srcu_read_lock_held(&srcu_ctl));
+                                 srcu_read_lock_held(srcu_ctlp));
        if (p == NULL) {
                /* Leave because rcu_torture_writer is not yet underway */
                cur_ops->readunlock(idx);
@@ -1128,7 +1162,7 @@ rcu_torture_reader(void *arg)
                p = rcu_dereference_check(rcu_torture_current,
                                          rcu_read_lock_bh_held() ||
                                          rcu_read_lock_sched_held() ||
-                                         srcu_read_lock_held(&srcu_ctl));
+                                         srcu_read_lock_held(srcu_ctlp));
                if (p == NULL) {
                        /* Wait for rcu_torture_writer to get underway */
                        cur_ops->readunlock(idx);
@@ -1413,12 +1447,15 @@ static int rcu_torture_barrier_cbs(void *arg)
        do {
                wait_event(barrier_cbs_wq[myid],
                           (newphase =
-                           ACCESS_ONCE(barrier_phase)) != lastphase ||
+                           smp_load_acquire(&barrier_phase)) != lastphase ||
                           torture_must_stop());
                lastphase = newphase;
-               smp_mb(); /* ensure barrier_phase load before ->call(). */
                if (torture_must_stop())
                        break;
+               /*
+                * The above smp_load_acquire() ensures barrier_phase load
+                * is ordered before the folloiwng ->call().
+                */
                cur_ops->call(&rcu, rcu_torture_barrier_cbf);
                if (atomic_dec_and_test(&barrier_cbs_count))
                        wake_up(&barrier_wq);
@@ -1439,8 +1476,8 @@ static int rcu_torture_barrier(void *arg)
        do {
                atomic_set(&barrier_cbs_invoked, 0);
                atomic_set(&barrier_cbs_count, n_barrier_cbs);
-               smp_mb(); /* Ensure barrier_phase after prior assignments. */
-               barrier_phase = !barrier_phase;
+               /* Ensure barrier_phase ordered after prior assignments. */
+               smp_store_release(&barrier_phase, !barrier_phase);
                for (i = 0; i < n_barrier_cbs; i++)
                        wake_up(&barrier_cbs_wq[i]);
                wait_event(barrier_wq,
@@ -1588,10 +1625,14 @@ rcu_torture_cleanup(void)
                        rcutorture_booster_cleanup(i);
        }
 
-       /* Wait for all RCU callbacks to fire.  */
-
+       /*
+        * Wait for all RCU callbacks to fire, then do flavor-specific
+        * cleanup operations.
+        */
        if (cur_ops->cb_barrier != NULL)
                cur_ops->cb_barrier();
+       if (cur_ops->cleanup != NULL)
+               cur_ops->cleanup();
 
        rcu_torture_stats_print();  /* -After- the stats thread is stopped! */
 
@@ -1668,8 +1709,8 @@ rcu_torture_init(void)
        int cpu;
        int firsterr = 0;
        static struct rcu_torture_ops *torture_ops[] = {
-               &rcu_ops, &rcu_bh_ops, &rcu_busted_ops, &srcu_ops, &sched_ops,
-               RCUTORTURE_TASKS_OPS
+               &rcu_ops, &rcu_bh_ops, &rcu_busted_ops, &srcu_ops, &srcud_ops,
+               &sched_ops, RCUTORTURE_TASKS_OPS
        };
 
        if (!torture_init_begin(torture_type, verbose, &torture_runnable))
@@ -1701,7 +1742,7 @@ rcu_torture_init(void)
        if (nreaders >= 0) {
                nrealreaders = nreaders;
        } else {
-               nrealreaders = num_online_cpus() - 1;
+               nrealreaders = num_online_cpus() - 2 - nreaders;
                if (nrealreaders <= 0)
                        nrealreaders = 1;
        }
index cad76e7..fb33d35 100644 (file)
@@ -151,7 +151,7 @@ static unsigned long srcu_readers_seq_idx(struct srcu_struct *sp, int idx)
        unsigned long t;
 
        for_each_possible_cpu(cpu) {
-               t = ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->seq[idx]);
+               t = READ_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->seq[idx]);
                sum += t;
        }
        return sum;
@@ -168,7 +168,7 @@ static unsigned long srcu_readers_active_idx(struct srcu_struct *sp, int idx)
        unsigned long t;
 
        for_each_possible_cpu(cpu) {
-               t = ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[idx]);
+               t = READ_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[idx]);
                sum += t;
        }
        return sum;
@@ -265,8 +265,8 @@ static int srcu_readers_active(struct srcu_struct *sp)
        unsigned long sum = 0;
 
        for_each_possible_cpu(cpu) {
-               sum += ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[0]);
-               sum += ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[1]);
+               sum += READ_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[0]);
+               sum += READ_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[1]);
        }
        return sum;
 }
@@ -296,7 +296,7 @@ int __srcu_read_lock(struct srcu_struct *sp)
 {
        int idx;
 
-       idx = ACCESS_ONCE(sp->completed) & 0x1;
+       idx = READ_ONCE(sp->completed) & 0x1;
        preempt_disable();
        __this_cpu_inc(sp->per_cpu_ref->c[idx]);
        smp_mb(); /* B */  /* Avoid leaking the critical section. */
index 069742d..591af0c 100644 (file)
@@ -49,39 +49,6 @@ static void __call_rcu(struct rcu_head *head,
 
 #include "tiny_plugin.h"
 
-/*
- * Enter idle, which is an extended quiescent state if we have fully
- * entered that mode.
- */
-void rcu_idle_enter(void)
-{
-}
-EXPORT_SYMBOL_GPL(rcu_idle_enter);
-
-/*
- * Exit an interrupt handler towards idle.
- */
-void rcu_irq_exit(void)
-{
-}
-EXPORT_SYMBOL_GPL(rcu_irq_exit);
-
-/*
- * Exit idle, so that we are no longer in an extended quiescent state.
- */
-void rcu_idle_exit(void)
-{
-}
-EXPORT_SYMBOL_GPL(rcu_idle_exit);
-
-/*
- * Enter an interrupt handler, moving away from idle.
- */
-void rcu_irq_enter(void)
-{
-}
-EXPORT_SYMBOL_GPL(rcu_irq_enter);
-
 #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE)
 
 /*
@@ -170,6 +137,11 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
 
        /* Move the ready-to-invoke callbacks to a local list. */
        local_irq_save(flags);
+       if (rcp->donetail == &rcp->rcucblist) {
+               /* No callbacks ready, so just leave. */
+               local_irq_restore(flags);
+               return;
+       }
        RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, rcp->qlen, -1));
        list = rcp->rcucblist;
        rcp->rcucblist = *rcp->donetail;
index f94e209..e492a52 100644 (file)
@@ -144,16 +144,17 @@ static void check_cpu_stall(struct rcu_ctrlblk *rcp)
                return;
        rcp->ticks_this_gp++;
        j = jiffies;
-       js = ACCESS_ONCE(rcp->jiffies_stall);
+       js = READ_ONCE(rcp->jiffies_stall);
        if (rcp->rcucblist && ULONG_CMP_GE(j, js)) {
                pr_err("INFO: %s stall on CPU (%lu ticks this GP) idle=%llx (t=%lu jiffies q=%ld)\n",
                       rcp->name, rcp->ticks_this_gp, DYNTICK_TASK_EXIT_IDLE,
                       jiffies - rcp->gp_start, rcp->qlen);
                dump_stack();
-               ACCESS_ONCE(rcp->jiffies_stall) = jiffies +
-                       3 * rcu_jiffies_till_stall_check() + 3;
+               WRITE_ONCE(rcp->jiffies_stall,
+                          jiffies + 3 * rcu_jiffies_till_stall_check() + 3);
        } else if (ULONG_CMP_GE(j, js)) {
-               ACCESS_ONCE(rcp->jiffies_stall) = jiffies + rcu_jiffies_till_stall_check();
+               WRITE_ONCE(rcp->jiffies_stall,
+                          jiffies + rcu_jiffies_till_stall_check());
        }
 }
 
@@ -161,7 +162,8 @@ static void reset_cpu_stall_ticks(struct rcu_ctrlblk *rcp)
 {
        rcp->ticks_this_gp = 0;
        rcp->gp_start = jiffies;
-       ACCESS_ONCE(rcp->jiffies_stall) = jiffies + rcu_jiffies_till_stall_check();
+       WRITE_ONCE(rcp->jiffies_stall,
+                  jiffies + rcu_jiffies_till_stall_check());
 }
 
 static void check_cpu_stalls(void)
index 233165d..add0429 100644 (file)
@@ -91,7 +91,7 @@ static const char *tp_##sname##_varname __used __tracepoint_string = sname##_var
 
 #define RCU_STATE_INITIALIZER(sname, sabbr, cr) \
 DEFINE_RCU_TPS(sname) \
-DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, sname##_data); \
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, sname##_data); \
 struct rcu_state sname##_state = { \
        .level = { &sname##_state.node[0] }, \
        .rda = &sname##_data, \
@@ -110,11 +110,18 @@ struct rcu_state sname##_state = { \
 RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched);
 RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh);
 
-static struct rcu_state *rcu_state_p;
+static struct rcu_state *const rcu_state_p;
+static struct rcu_data __percpu *const rcu_data_p;
 LIST_HEAD(rcu_struct_flavors);
 
-/* Increase (but not decrease) the CONFIG_RCU_FANOUT_LEAF at boot time. */
-static int rcu_fanout_leaf = CONFIG_RCU_FANOUT_LEAF;
+/* Dump rcu_node combining tree at boot to verify correct setup. */
+static bool dump_tree;
+module_param(dump_tree, bool, 0444);
+/* Control rcu_node-tree auto-balancing at boot time. */
+static bool rcu_fanout_exact;
+module_param(rcu_fanout_exact, bool, 0444);
+/* Increase (but not decrease) the RCU_FANOUT_LEAF at boot time. */
+static int rcu_fanout_leaf = RCU_FANOUT_LEAF;
 module_param(rcu_fanout_leaf, int, 0444);
 int rcu_num_lvls __read_mostly = RCU_NUM_LVLS;
 static int num_rcu_lvl[] = {  /* Number of rcu_nodes at specified level. */
@@ -159,14 +166,46 @@ static void invoke_rcu_core(void);
 static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp);
 
 /* rcuc/rcub kthread realtime priority */
+#ifdef CONFIG_RCU_KTHREAD_PRIO
 static int kthread_prio = CONFIG_RCU_KTHREAD_PRIO;
+#else /* #ifdef CONFIG_RCU_KTHREAD_PRIO */
+static int kthread_prio = IS_ENABLED(CONFIG_RCU_BOOST) ? 1 : 0;
+#endif /* #else #ifdef CONFIG_RCU_KTHREAD_PRIO */
 module_param(kthread_prio, int, 0644);
 
-/* Delay in jiffies for grace-period initialization delays. */
-static int gp_init_delay = IS_ENABLED(CONFIG_RCU_TORTURE_TEST_SLOW_INIT)
-                               ? CONFIG_RCU_TORTURE_TEST_SLOW_INIT_DELAY
-                               : 0;
+/* Delay in jiffies for grace-period initialization delays, debug only. */
+
+#ifdef CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT
+static int gp_preinit_delay = CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT_DELAY;
+module_param(gp_preinit_delay, int, 0644);
+#else /* #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT */
+static const int gp_preinit_delay;
+#endif /* #else #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT */
+
+#ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT
+static int gp_init_delay = CONFIG_RCU_TORTURE_TEST_SLOW_INIT_DELAY;
 module_param(gp_init_delay, int, 0644);
+#else /* #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT */
+static const int gp_init_delay;
+#endif /* #else #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT */
+
+#ifdef CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP
+static int gp_cleanup_delay = CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP_DELAY;
+module_param(gp_cleanup_delay, int, 0644);
+#else /* #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP */
+static const int gp_cleanup_delay;
+#endif /* #else #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP */
+
+/*
+ * Number of grace periods between delays, normalized by the duration of
+ * the delay.  The longer the the delay, the more the grace periods between
+ * each delay.  The reason for this normalization is that it means that,
+ * for non-zero delays, the overall slowdown of grace periods is constant
+ * regardless of the duration of the delay.  This arrangement balances
+ * the need for long delays to increase some race probabilities with the
+ * need for fast grace periods to increase other race probabilities.
+ */
+#define PER_RCU_NODE_PERIOD 3  /* Number of grace periods between delays. */
 
 /*
  * Track the rcutorture test sequence number and the update version
@@ -188,17 +227,17 @@ unsigned long rcutorture_vernum;
  */
 unsigned long rcu_rnp_online_cpus(struct rcu_node *rnp)
 {
-       return ACCESS_ONCE(rnp->qsmaskinitnext);
+       return READ_ONCE(rnp->qsmaskinitnext);
 }
 
 /*
- * Return true if an RCU grace period is in progress.  The ACCESS_ONCE()s
+ * Return true if an RCU grace period is in progress.  The READ_ONCE()s
  * permit this function to be invoked without holding the root rcu_node
  * structure's ->lock, but of course results can be subject to change.
  */
 static int rcu_gp_in_progress(struct rcu_state *rsp)
 {
-       return ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum);
+       return READ_ONCE(rsp->completed) != READ_ONCE(rsp->gpnum);
 }
 
 /*
@@ -275,8 +314,8 @@ static void rcu_momentary_dyntick_idle(void)
                if (!(resched_mask & rsp->flavor_mask))
                        continue;
                smp_mb(); /* rcu_sched_qs_mask before cond_resched_completed. */
-               if (ACCESS_ONCE(rdp->mynode->completed) !=
-                   ACCESS_ONCE(rdp->cond_resched_completed))
+               if (READ_ONCE(rdp->mynode->completed) !=
+                   READ_ONCE(rdp->cond_resched_completed))
                        continue;
 
                /*
@@ -488,9 +527,9 @@ void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
                break;
        }
        if (rsp != NULL) {
-               *flags = ACCESS_ONCE(rsp->gp_flags);
-               *gpnum = ACCESS_ONCE(rsp->gpnum);
-               *completed = ACCESS_ONCE(rsp->completed);
+               *flags = READ_ONCE(rsp->gp_flags);
+               *gpnum = READ_ONCE(rsp->gpnum);
+               *completed = READ_ONCE(rsp->completed);
                return;
        }
        *flags = 0;
@@ -536,10 +575,10 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
 static int rcu_future_needs_gp(struct rcu_state *rsp)
 {
        struct rcu_node *rnp = rcu_get_root(rsp);
-       int idx = (ACCESS_ONCE(rnp->completed) + 1) & 0x1;
+       int idx = (READ_ONCE(rnp->completed) + 1) & 0x1;
        int *fp = &rnp->need_future_gp[idx];
 
-       return ACCESS_ONCE(*fp);
+       return READ_ONCE(*fp);
 }
 
 /*
@@ -562,7 +601,7 @@ cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
                return 1;  /* Yes, this CPU has newly registered callbacks. */
        for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++)
                if (rdp->nxttail[i - 1] != rdp->nxttail[i] &&
-                   ULONG_CMP_LT(ACCESS_ONCE(rsp->completed),
+                   ULONG_CMP_LT(READ_ONCE(rsp->completed),
                                 rdp->nxtcompleted[i]))
                        return 1;  /* Yes, CBs for future grace period. */
        return 0; /* No grace period needed. */
@@ -582,7 +621,8 @@ static void rcu_eqs_enter_common(long long oldval, bool user)
        struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
 
        trace_rcu_dyntick(TPS("Start"), oldval, rdtp->dynticks_nesting);
-       if (!user && !is_idle_task(current)) {
+       if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
+           !user && !is_idle_task(current)) {
                struct task_struct *idle __maybe_unused =
                        idle_task(smp_processor_id());
 
@@ -601,7 +641,8 @@ static void rcu_eqs_enter_common(long long oldval, bool user)
        smp_mb__before_atomic();  /* See above. */
        atomic_inc(&rdtp->dynticks);
        smp_mb__after_atomic();  /* Force ordering with next sojourn. */
-       WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
+       WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
+                    atomic_read(&rdtp->dynticks) & 0x1);
        rcu_dynticks_task_enter();
 
        /*
@@ -627,7 +668,8 @@ static void rcu_eqs_enter(bool user)
 
        rdtp = this_cpu_ptr(&rcu_dynticks);
        oldval = rdtp->dynticks_nesting;
-       WARN_ON_ONCE((oldval & DYNTICK_TASK_NEST_MASK) == 0);
+       WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
+                    (oldval & DYNTICK_TASK_NEST_MASK) == 0);
        if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE) {
                rdtp->dynticks_nesting = 0;
                rcu_eqs_enter_common(oldval, user);
@@ -700,7 +742,8 @@ void rcu_irq_exit(void)
        rdtp = this_cpu_ptr(&rcu_dynticks);
        oldval = rdtp->dynticks_nesting;
        rdtp->dynticks_nesting--;
-       WARN_ON_ONCE(rdtp->dynticks_nesting < 0);
+       WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
+                    rdtp->dynticks_nesting < 0);
        if (rdtp->dynticks_nesting)
                trace_rcu_dyntick(TPS("--="), oldval, rdtp->dynticks_nesting);
        else
@@ -725,10 +768,12 @@ static void rcu_eqs_exit_common(long long oldval, int user)
        atomic_inc(&rdtp->dynticks);
        /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
        smp_mb__after_atomic();  /* See above. */
-       WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
+       WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
+                    !(atomic_read(&rdtp->dynticks) & 0x1));
        rcu_cleanup_after_idle();
        trace_rcu_dyntick(TPS("End"), oldval, rdtp->dynticks_nesting);
-       if (!user && !is_idle_task(current)) {
+       if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
+           !user && !is_idle_task(current)) {
                struct task_struct *idle __maybe_unused =
                        idle_task(smp_processor_id());
 
@@ -752,7 +797,7 @@ static void rcu_eqs_exit(bool user)
 
        rdtp = this_cpu_ptr(&rcu_dynticks);
        oldval = rdtp->dynticks_nesting;
-       WARN_ON_ONCE(oldval < 0);
+       WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && oldval < 0);
        if (oldval & DYNTICK_TASK_NEST_MASK) {
                rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE;
        } else {
@@ -825,7 +870,8 @@ void rcu_irq_enter(void)
        rdtp = this_cpu_ptr(&rcu_dynticks);
        oldval = rdtp->dynticks_nesting;
        rdtp->dynticks_nesting++;
-       WARN_ON_ONCE(rdtp->dynticks_nesting == 0);
+       WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
+                    rdtp->dynticks_nesting == 0);
        if (oldval)
                trace_rcu_dyntick(TPS("++="), oldval, rdtp->dynticks_nesting);
        else
@@ -1008,9 +1054,9 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp,
                trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti"));
                return 1;
        } else {
-               if (ULONG_CMP_LT(ACCESS_ONCE(rdp->gpnum) + ULONG_MAX / 4,
+               if (ULONG_CMP_LT(READ_ONCE(rdp->gpnum) + ULONG_MAX / 4,
                                 rdp->mynode->gpnum))
-                       ACCESS_ONCE(rdp->gpwrap) = true;
+                       WRITE_ONCE(rdp->gpwrap, true);
                return 0;
        }
 }
@@ -1090,12 +1136,12 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
        if (ULONG_CMP_GE(jiffies,
                         rdp->rsp->gp_start + jiffies_till_sched_qs) ||
            ULONG_CMP_GE(jiffies, rdp->rsp->jiffies_resched)) {
-               if (!(ACCESS_ONCE(*rcrmp) & rdp->rsp->flavor_mask)) {
-                       ACCESS_ONCE(rdp->cond_resched_completed) =
-                               ACCESS_ONCE(rdp->mynode->completed);
+               if (!(READ_ONCE(*rcrmp) & rdp->rsp->flavor_mask)) {
+                       WRITE_ONCE(rdp->cond_resched_completed,
+                                  READ_ONCE(rdp->mynode->completed));
                        smp_mb(); /* ->cond_resched_completed before *rcrmp. */
-                       ACCESS_ONCE(*rcrmp) =
-                               ACCESS_ONCE(*rcrmp) + rdp->rsp->flavor_mask;
+                       WRITE_ONCE(*rcrmp,
+                                  READ_ONCE(*rcrmp) + rdp->rsp->flavor_mask);
                        resched_cpu(rdp->cpu);  /* Force CPU into scheduler. */
                        rdp->rsp->jiffies_resched += 5; /* Enable beating. */
                } else if (ULONG_CMP_GE(jiffies, rdp->rsp->jiffies_resched)) {
@@ -1116,9 +1162,9 @@ static void record_gp_stall_check_time(struct rcu_state *rsp)
        rsp->gp_start = j;
        smp_wmb(); /* Record start time before stall time. */
        j1 = rcu_jiffies_till_stall_check();
-       ACCESS_ONCE(rsp->jiffies_stall) = j + j1;
+       WRITE_ONCE(rsp->jiffies_stall, j + j1);
        rsp->jiffies_resched = j + j1 / 2;
-       rsp->n_force_qs_gpstart = ACCESS_ONCE(rsp->n_force_qs);
+       rsp->n_force_qs_gpstart = READ_ONCE(rsp->n_force_qs);
 }
 
 /*
@@ -1130,10 +1176,11 @@ static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp)
        unsigned long j;
 
        j = jiffies;
-       gpa = ACCESS_ONCE(rsp->gp_activity);
+       gpa = READ_ONCE(rsp->gp_activity);
        if (j - gpa > 2 * HZ)
-               pr_err("%s kthread starved for %ld jiffies!\n",
-                      rsp->name, j - gpa);
+               pr_err("%s kthread starved for %ld jiffies! g%lu c%lu f%#x\n",
+                      rsp->name, j - gpa,
+                      rsp->gpnum, rsp->completed, rsp->gp_flags);
 }
 
 /*
@@ -1170,12 +1217,13 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum)
        /* Only let one CPU complain about others per time interval. */
 
        raw_spin_lock_irqsave(&rnp->lock, flags);
-       delta = jiffies - ACCESS_ONCE(rsp->jiffies_stall);
+       delta = jiffies - READ_ONCE(rsp->jiffies_stall);
        if (delta < RCU_STALL_RAT_DELAY || !rcu_gp_in_progress(rsp)) {
                raw_spin_unlock_irqrestore(&rnp->lock, flags);
                return;
        }
-       ACCESS_ONCE(rsp->jiffies_stall) = jiffies + 3 * rcu_jiffies_till_stall_check() + 3;
+       WRITE_ONCE(rsp->jiffies_stall,
+                  jiffies + 3 * rcu_jiffies_till_stall_check() + 3);
        raw_spin_unlock_irqrestore(&rnp->lock, flags);
 
        /*
@@ -1209,12 +1257,12 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum)
        if (ndetected) {
                rcu_dump_cpu_stacks(rsp);
        } else {
-               if (ACCESS_ONCE(rsp->gpnum) != gpnum ||
-                   ACCESS_ONCE(rsp->completed) == gpnum) {
+               if (READ_ONCE(rsp->gpnum) != gpnum ||
+                   READ_ONCE(rsp->completed) == gpnum) {
                        pr_err("INFO: Stall ended before state dump start\n");
                } else {
                        j = jiffies;
-                       gpa = ACCESS_ONCE(rsp->gp_activity);
+                       gpa = READ_ONCE(rsp->gp_activity);
                        pr_err("All QSes seen, last %s kthread activity %ld (%ld-%ld), jiffies_till_next_fqs=%ld, root ->qsmask %#lx\n",
                               rsp->name, j - gpa, j, gpa,
                               jiffies_till_next_fqs,
@@ -1259,9 +1307,9 @@ static void print_cpu_stall(struct rcu_state *rsp)
        rcu_dump_cpu_stacks(rsp);
 
        raw_spin_lock_irqsave(&rnp->lock, flags);
-       if (ULONG_CMP_GE(jiffies, ACCESS_ONCE(rsp->jiffies_stall)))
-               ACCESS_ONCE(rsp->jiffies_stall) = jiffies +
-                                    3 * rcu_jiffies_till_stall_check() + 3;
+       if (ULONG_CMP_GE(jiffies, READ_ONCE(rsp->jiffies_stall)))
+               WRITE_ONCE(rsp->jiffies_stall,
+                          jiffies + 3 * rcu_jiffies_till_stall_check() + 3);
        raw_spin_unlock_irqrestore(&rnp->lock, flags);
 
        /*
@@ -1304,20 +1352,20 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
         * Given this check, comparisons of jiffies, rsp->jiffies_stall,
         * and rsp->gp_start suffice to forestall false positives.
         */
-       gpnum = ACCESS_ONCE(rsp->gpnum);
+       gpnum = READ_ONCE(rsp->gpnum);
        smp_rmb(); /* Pick up ->gpnum first... */
-       js = ACCESS_ONCE(rsp->jiffies_stall);
+       js = READ_ONCE(rsp->jiffies_stall);
        smp_rmb(); /* ...then ->jiffies_stall before the rest... */
-       gps = ACCESS_ONCE(rsp->gp_start);
+       gps = READ_ONCE(rsp->gp_start);
        smp_rmb(); /* ...and finally ->gp_start before ->completed. */
-       completed = ACCESS_ONCE(rsp->completed);
+       completed = READ_ONCE(rsp->completed);
        if (ULONG_CMP_GE(completed, gpnum) ||
            ULONG_CMP_LT(j, js) ||
            ULONG_CMP_GE(gps, js))
                return; /* No stall or GP completed since entering function. */
        rnp = rdp->mynode;
        if (rcu_gp_in_progress(rsp) &&
-           (ACCESS_ONCE(rnp->qsmask) & rdp->grpmask)) {
+           (READ_ONCE(rnp->qsmask) & rdp->grpmask)) {
 
                /* We haven't checked in, so go dump stack. */
                print_cpu_stall(rsp);
@@ -1344,7 +1392,7 @@ void rcu_cpu_stall_reset(void)
        struct rcu_state *rsp;
 
        for_each_rcu_flavor(rsp)
-               ACCESS_ONCE(rsp->jiffies_stall) = jiffies + ULONG_MAX / 2;
+               WRITE_ONCE(rsp->jiffies_stall, jiffies + ULONG_MAX / 2);
 }
 
 /*
@@ -1454,7 +1502,7 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
         * doing some extra useless work.
         */
        if (rnp->gpnum != rnp->completed ||
-           ACCESS_ONCE(rnp_root->gpnum) != ACCESS_ONCE(rnp_root->completed)) {
+           READ_ONCE(rnp_root->gpnum) != READ_ONCE(rnp_root->completed)) {
                rnp->need_future_gp[c & 0x1]++;
                trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleaf"));
                goto out;
@@ -1539,7 +1587,7 @@ static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
 static void rcu_gp_kthread_wake(struct rcu_state *rsp)
 {
        if (current == rsp->gp_kthread ||
-           !ACCESS_ONCE(rsp->gp_flags) ||
+           !READ_ONCE(rsp->gp_flags) ||
            !rsp->gp_kthread)
                return;
        wake_up(&rsp->gp_wq);
@@ -1674,7 +1722,7 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp,
 
        /* Handle the ends of any preceding grace periods first. */
        if (rdp->completed == rnp->completed &&
-           !unlikely(ACCESS_ONCE(rdp->gpwrap))) {
+           !unlikely(READ_ONCE(rdp->gpwrap))) {
 
                /* No grace period end, so just accelerate recent callbacks. */
                ret = rcu_accelerate_cbs(rsp, rnp, rdp);
@@ -1689,7 +1737,7 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp,
                trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuend"));
        }
 
-       if (rdp->gpnum != rnp->gpnum || unlikely(ACCESS_ONCE(rdp->gpwrap))) {
+       if (rdp->gpnum != rnp->gpnum || unlikely(READ_ONCE(rdp->gpwrap))) {
                /*
                 * If the current grace period is waiting for this CPU,
                 * set up to detect a quiescent state, otherwise don't
@@ -1701,7 +1749,7 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp,
                rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr);
                rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask);
                zero_cpu_stall_ticks(rdp);
-               ACCESS_ONCE(rdp->gpwrap) = false;
+               WRITE_ONCE(rdp->gpwrap, false);
        }
        return ret;
 }
@@ -1714,9 +1762,9 @@ static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp)
 
        local_irq_save(flags);
        rnp = rdp->mynode;
-       if ((rdp->gpnum == ACCESS_ONCE(rnp->gpnum) &&
-            rdp->completed == ACCESS_ONCE(rnp->completed) &&
-            !unlikely(ACCESS_ONCE(rdp->gpwrap))) || /* w/out lock. */
+       if ((rdp->gpnum == READ_ONCE(rnp->gpnum) &&
+            rdp->completed == READ_ONCE(rnp->completed) &&
+            !unlikely(READ_ONCE(rdp->gpwrap))) || /* w/out lock. */
            !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */
                local_irq_restore(flags);
                return;
@@ -1728,6 +1776,13 @@ static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp)
                rcu_gp_kthread_wake(rsp);
 }
 
+static void rcu_gp_slow(struct rcu_state *rsp, int delay)
+{
+       if (delay > 0 &&
+           !(rsp->gpnum % (rcu_num_nodes * PER_RCU_NODE_PERIOD * delay)))
+               schedule_timeout_uninterruptible(delay);
+}
+
 /*
  * Initialize a new grace period.  Return 0 if no grace period required.
  */
@@ -1737,15 +1792,15 @@ static int rcu_gp_init(struct rcu_state *rsp)
        struct rcu_data *rdp;
        struct rcu_node *rnp = rcu_get_root(rsp);
 
-       ACCESS_ONCE(rsp->gp_activity) = jiffies;
+       WRITE_ONCE(rsp->gp_activity, jiffies);
        raw_spin_lock_irq(&rnp->lock);
        smp_mb__after_unlock_lock();
-       if (!ACCESS_ONCE(rsp->gp_flags)) {
+       if (!READ_ONCE(rsp->gp_flags)) {
                /* Spurious wakeup, tell caller to go back to sleep.  */
                raw_spin_unlock_irq(&rnp->lock);
                return 0;
        }
-       ACCESS_ONCE(rsp->gp_flags) = 0; /* Clear all flags: New grace period. */
+       WRITE_ONCE(rsp->gp_flags, 0); /* Clear all flags: New grace period. */
 
        if (WARN_ON_ONCE(rcu_gp_in_progress(rsp))) {
                /*
@@ -1770,6 +1825,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
         * will handle subsequent offline CPUs.
         */
        rcu_for_each_leaf_node(rsp, rnp) {
+               rcu_gp_slow(rsp, gp_preinit_delay);
                raw_spin_lock_irq(&rnp->lock);
                smp_mb__after_unlock_lock();
                if (rnp->qsmaskinit == rnp->qsmaskinitnext &&
@@ -1826,14 +1882,15 @@ static int rcu_gp_init(struct rcu_state *rsp)
         * process finishes, because this kthread handles both.
         */
        rcu_for_each_node_breadth_first(rsp, rnp) {
+               rcu_gp_slow(rsp, gp_init_delay);
                raw_spin_lock_irq(&rnp->lock);
                smp_mb__after_unlock_lock();
                rdp = this_cpu_ptr(rsp->rda);
                rcu_preempt_check_blocked_tasks(rnp);
                rnp->qsmask = rnp->qsmaskinit;
-               ACCESS_ONCE(rnp->gpnum) = rsp->gpnum;
+               WRITE_ONCE(rnp->gpnum, rsp->gpnum);
                if (WARN_ON_ONCE(rnp->completed != rsp->completed))
-                       ACCESS_ONCE(rnp->completed) = rsp->completed;
+                       WRITE_ONCE(rnp->completed, rsp->completed);
                if (rnp == rdp->mynode)
                        (void)__note_gp_changes(rsp, rnp, rdp);
                rcu_preempt_boost_start_gp(rnp);
@@ -1842,11 +1899,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
                                            rnp->grphi, rnp->qsmask);
                raw_spin_unlock_irq(&rnp->lock);
                cond_resched_rcu_qs();
-               ACCESS_ONCE(rsp->gp_activity) = jiffies;
-               if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_SLOW_INIT) &&
-                   gp_init_delay > 0 &&
-                   !(rsp->gpnum % (rcu_num_nodes * 10)))
-                       schedule_timeout_uninterruptible(gp_init_delay);
+               WRITE_ONCE(rsp->gp_activity, jiffies);
        }
 
        return 1;
@@ -1862,7 +1915,7 @@ static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
        unsigned long maxj;
        struct rcu_node *rnp = rcu_get_root(rsp);
 
-       ACCESS_ONCE(rsp->gp_activity) = jiffies;
+       WRITE_ONCE(rsp->gp_activity, jiffies);
        rsp->n_force_qs++;
        if (fqs_state == RCU_SAVE_DYNTICK) {
                /* Collect dyntick-idle snapshots. */
@@ -1880,11 +1933,11 @@ static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
                force_qs_rnp(rsp, rcu_implicit_dynticks_qs, &isidle, &maxj);
        }
        /* Clear flag to prevent immediate re-entry. */
-       if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
+       if (READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
                raw_spin_lock_irq(&rnp->lock);
                smp_mb__after_unlock_lock();
-               ACCESS_ONCE(rsp->gp_flags) =
-                       ACCESS_ONCE(rsp->gp_flags) & ~RCU_GP_FLAG_FQS;
+               WRITE_ONCE(rsp->gp_flags,
+                          READ_ONCE(rsp->gp_flags) & ~RCU_GP_FLAG_FQS);
                raw_spin_unlock_irq(&rnp->lock);
        }
        return fqs_state;
@@ -1901,7 +1954,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
        struct rcu_data *rdp;
        struct rcu_node *rnp = rcu_get_root(rsp);
 
-       ACCESS_ONCE(rsp->gp_activity) = jiffies;
+       WRITE_ONCE(rsp->gp_activity, jiffies);
        raw_spin_lock_irq(&rnp->lock);
        smp_mb__after_unlock_lock();
        gp_duration = jiffies - rsp->gp_start;
@@ -1932,7 +1985,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
                smp_mb__after_unlock_lock();
                WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp));
                WARN_ON_ONCE(rnp->qsmask);
-               ACCESS_ONCE(rnp->completed) = rsp->gpnum;
+               WRITE_ONCE(rnp->completed, rsp->gpnum);
                rdp = this_cpu_ptr(rsp->rda);
                if (rnp == rdp->mynode)
                        needgp = __note_gp_changes(rsp, rnp, rdp) || needgp;
@@ -1940,7 +1993,8 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
                nocb += rcu_future_gp_cleanup(rsp, rnp);
                raw_spin_unlock_irq(&rnp->lock);
                cond_resched_rcu_qs();
-               ACCESS_ONCE(rsp->gp_activity) = jiffies;
+               WRITE_ONCE(rsp->gp_activity, jiffies);
+               rcu_gp_slow(rsp, gp_cleanup_delay);
        }
        rnp = rcu_get_root(rsp);
        raw_spin_lock_irq(&rnp->lock);
@@ -1948,16 +2002,16 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
        rcu_nocb_gp_set(rnp, nocb);
 
        /* Declare grace period done. */
-       ACCESS_ONCE(rsp->completed) = rsp->gpnum;
+       WRITE_ONCE(rsp->completed, rsp->gpnum);
        trace_rcu_grace_period(rsp->name, rsp->completed, TPS("end"));
        rsp->fqs_state = RCU_GP_IDLE;
        rdp = this_cpu_ptr(rsp->rda);
        /* Advance CBs to reduce false positives below. */
        needgp = rcu_advance_cbs(rsp, rnp, rdp) || needgp;
        if (needgp || cpu_needs_another_gp(rsp, rdp)) {
-               ACCESS_ONCE(rsp->gp_flags) = RCU_GP_FLAG_INIT;
+               WRITE_ONCE(rsp->gp_flags, RCU_GP_FLAG_INIT);
                trace_rcu_grace_period(rsp->name,
-                                      ACCESS_ONCE(rsp->gpnum),
+                                      READ_ONCE(rsp->gpnum),
                                       TPS("newreq"));
        }
        raw_spin_unlock_irq(&rnp->lock);
@@ -1981,20 +2035,20 @@ static int __noreturn rcu_gp_kthread(void *arg)
                /* Handle grace-period start. */
                for (;;) {
                        trace_rcu_grace_period(rsp->name,
-                                              ACCESS_ONCE(rsp->gpnum),
+                                              READ_ONCE(rsp->gpnum),
                                               TPS("reqwait"));
                        rsp->gp_state = RCU_GP_WAIT_GPS;
                        wait_event_interruptible(rsp->gp_wq,
-                                                ACCESS_ONCE(rsp->gp_flags) &
+                                                READ_ONCE(rsp->gp_flags) &
                                                 RCU_GP_FLAG_INIT);
                        /* Locking provides needed memory barrier. */
                        if (rcu_gp_init(rsp))
                                break;
                        cond_resched_rcu_qs();
-                       ACCESS_ONCE(rsp->gp_activity) = jiffies;
+                       WRITE_ONCE(rsp->gp_activity, jiffies);
                        WARN_ON(signal_pending(current));
                        trace_rcu_grace_period(rsp->name,
-                                              ACCESS_ONCE(rsp->gpnum),
+                                              READ_ONCE(rsp->gpnum),
                                               TPS("reqwaitsig"));
                }
 
@@ -2010,39 +2064,39 @@ static int __noreturn rcu_gp_kthread(void *arg)
                        if (!ret)
                                rsp->jiffies_force_qs = jiffies + j;
                        trace_rcu_grace_period(rsp->name,
-                                              ACCESS_ONCE(rsp->gpnum),
+                                              READ_ONCE(rsp->gpnum),
                                               TPS("fqswait"));
                        rsp->gp_state = RCU_GP_WAIT_FQS;
                        ret = wait_event_interruptible_timeout(rsp->gp_wq,
-                                       ((gf = ACCESS_ONCE(rsp->gp_flags)) &
+                                       ((gf = READ_ONCE(rsp->gp_flags)) &
                                         RCU_GP_FLAG_FQS) ||
-                                       (!ACCESS_ONCE(rnp->qsmask) &&
+                                       (!READ_ONCE(rnp->qsmask) &&
                                         !rcu_preempt_blocked_readers_cgp(rnp)),
                                        j);
                        /* Locking provides needed memory barriers. */
                        /* If grace period done, leave loop. */
-                       if (!ACCESS_ONCE(rnp->qsmask) &&
+                       if (!READ_ONCE(rnp->qsmask) &&
                            !rcu_preempt_blocked_readers_cgp(rnp))
                                break;
                        /* If time for quiescent-state forcing, do it. */
                        if (ULONG_CMP_GE(jiffies, rsp->jiffies_force_qs) ||
                            (gf & RCU_GP_FLAG_FQS)) {
                                trace_rcu_grace_period(rsp->name,
-                                                      ACCESS_ONCE(rsp->gpnum),
+                                                      READ_ONCE(rsp->gpnum),
                                                       TPS("fqsstart"));
                                fqs_state = rcu_gp_fqs(rsp, fqs_state);
                                trace_rcu_grace_period(rsp->name,
-                                                      ACCESS_ONCE(rsp->gpnum),
+                                                      READ_ONCE(rsp->gpnum),
                                                       TPS("fqsend"));
                                cond_resched_rcu_qs();
-                               ACCESS_ONCE(rsp->gp_activity) = jiffies;
+                               WRITE_ONCE(rsp->gp_activity, jiffies);
                        } else {
                                /* Deal with stray signal. */
                                cond_resched_rcu_qs();
-                               ACCESS_ONCE(rsp->gp_activity) = jiffies;
+                               WRITE_ONCE(rsp->gp_activity, jiffies);
                                WARN_ON(signal_pending(current));
                                trace_rcu_grace_period(rsp->name,
-                                                      ACCESS_ONCE(rsp->gpnum),
+                                                      READ_ONCE(rsp->gpnum),
                                                       TPS("fqswaitsig"));
                        }
                        j = jiffies_till_next_fqs;
@@ -2084,8 +2138,8 @@ rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
                 */
                return false;
        }
-       ACCESS_ONCE(rsp->gp_flags) = RCU_GP_FLAG_INIT;
-       trace_rcu_grace_period(rsp->name, ACCESS_ONCE(rsp->gpnum),
+       WRITE_ONCE(rsp->gp_flags, RCU_GP_FLAG_INIT);
+       trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gpnum),
                               TPS("newreq"));
 
        /*
@@ -2135,6 +2189,7 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
        __releases(rcu_get_root(rsp)->lock)
 {
        WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
+       WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS);
        raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
        rcu_gp_kthread_wake(rsp);
 }
@@ -2332,8 +2387,6 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
        rcu_report_qs_rdp(rdp->cpu, rsp, rdp);
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
-
 /*
  * Send the specified CPU's RCU callbacks to the orphanage.  The
  * specified CPU must be offline, and the caller must hold the
@@ -2344,7 +2397,7 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
                          struct rcu_node *rnp, struct rcu_data *rdp)
 {
        /* No-CBs CPUs do not have orphanable callbacks. */
-       if (rcu_is_nocb_cpu(rdp->cpu))
+       if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) || rcu_is_nocb_cpu(rdp->cpu))
                return;
 
        /*
@@ -2357,7 +2410,7 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
                rsp->qlen += rdp->qlen;
                rdp->n_cbs_orphaned += rdp->qlen;
                rdp->qlen_lazy = 0;
-               ACCESS_ONCE(rdp->qlen) = 0;
+               WRITE_ONCE(rdp->qlen, 0);
        }
 
        /*
@@ -2403,7 +2456,8 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags)
        struct rcu_data *rdp = raw_cpu_ptr(rsp->rda);
 
        /* No-CBs CPUs are handled specially. */
-       if (rcu_nocb_adopt_orphan_cbs(rsp, rdp, flags))
+       if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) ||
+           rcu_nocb_adopt_orphan_cbs(rsp, rdp, flags))
                return;
 
        /* Do the accounting first. */
@@ -2450,6 +2504,9 @@ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
        RCU_TRACE(struct rcu_data *rdp = this_cpu_ptr(rsp->rda));
        RCU_TRACE(struct rcu_node *rnp = rdp->mynode);
 
+       if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
+               return;
+
        RCU_TRACE(mask = rdp->grpmask);
        trace_rcu_grace_period(rsp->name,
                               rnp->gpnum + 1 - !!(rnp->qsmask & mask),
@@ -2478,7 +2535,8 @@ static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf)
        long mask;
        struct rcu_node *rnp = rnp_leaf;
 
-       if (rnp->qsmaskinit || rcu_preempt_has_tasks(rnp))
+       if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) ||
+           rnp->qsmaskinit || rcu_preempt_has_tasks(rnp))
                return;
        for (;;) {
                mask = rnp->grpmask;
@@ -2509,6 +2567,9 @@ static void rcu_cleanup_dying_idle_cpu(int cpu, struct rcu_state *rsp)
        struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
        struct rcu_node *rnp = rdp->mynode;  /* Outgoing CPU's rdp & rnp. */
 
+       if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
+               return;
+
        /* Remove outgoing CPU from mask in the leaf rcu_node structure. */
        mask = rdp->grpmask;
        raw_spin_lock_irqsave(&rnp->lock, flags);
@@ -2530,6 +2591,9 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
        struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
        struct rcu_node *rnp = rdp->mynode;  /* Outgoing CPU's rdp & rnp. */
 
+       if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
+               return;
+
        /* Adjust any no-longer-needed kthreads. */
        rcu_boost_kthread_setaffinity(rnp, -1);
 
@@ -2544,26 +2608,6 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
                  cpu, rdp->qlen, rdp->nxtlist);
 }
 
-#else /* #ifdef CONFIG_HOTPLUG_CPU */
-
-static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
-{
-}
-
-static void __maybe_unused rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf)
-{
-}
-
-static void rcu_cleanup_dying_idle_cpu(int cpu, struct rcu_state *rsp)
-{
-}
-
-static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
-{
-}
-
-#endif /* #else #ifdef CONFIG_HOTPLUG_CPU */
-
 /*
  * Invoke any RCU callbacks that have made it to the end of their grace
  * period.  Thottle as specified by rdp->blimit.
@@ -2578,7 +2622,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
        /* If no callbacks are ready, just return. */
        if (!cpu_has_callbacks_ready_to_invoke(rdp)) {
                trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, 0);
-               trace_rcu_batch_end(rsp->name, 0, !!ACCESS_ONCE(rdp->nxtlist),
+               trace_rcu_batch_end(rsp->name, 0, !!READ_ONCE(rdp->nxtlist),
                                    need_resched(), is_idle_task(current),
                                    rcu_is_callbacks_kthread());
                return;
@@ -2634,7 +2678,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
        }
        smp_mb(); /* List handling before counting for rcu_barrier(). */
        rdp->qlen_lazy -= count_lazy;
-       ACCESS_ONCE(rdp->qlen) = rdp->qlen - count;
+       WRITE_ONCE(rdp->qlen, rdp->qlen - count);
        rdp->n_cbs_invoked += count;
 
        /* Reinstate batch limit if we have worked down the excess. */
@@ -2728,10 +2772,6 @@ static void force_qs_rnp(struct rcu_state *rsp,
                mask = 0;
                raw_spin_lock_irqsave(&rnp->lock, flags);
                smp_mb__after_unlock_lock();
-               if (!rcu_gp_in_progress(rsp)) {
-                       raw_spin_unlock_irqrestore(&rnp->lock, flags);
-                       return;
-               }
                if (rnp->qsmask == 0) {
                        if (rcu_state_p == &rcu_sched_state ||
                            rsp != rcu_state_p ||
@@ -2761,8 +2801,6 @@ static void force_qs_rnp(struct rcu_state *rsp,
                bit = 1;
                for (; cpu <= rnp->grphi; cpu++, bit <<= 1) {
                        if ((rnp->qsmask & bit) != 0) {
-                               if ((rnp->qsmaskinit & bit) == 0)
-                                       *isidle = false; /* Pending hotplug. */
                                if (f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj))
                                        mask |= bit;
                        }
@@ -2791,7 +2829,7 @@ static void force_quiescent_state(struct rcu_state *rsp)
        /* Funnel through hierarchy to reduce memory contention. */
        rnp = __this_cpu_read(rsp->rda->mynode);
        for (; rnp != NULL; rnp = rnp->parent) {
-               ret = (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) ||
+               ret = (READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) ||
                      !raw_spin_trylock(&rnp->fqslock);
                if (rnp_old != NULL)
                        raw_spin_unlock(&rnp_old->fqslock);
@@ -2807,13 +2845,12 @@ static void force_quiescent_state(struct rcu_state *rsp)
        raw_spin_lock_irqsave(&rnp_old->lock, flags);
        smp_mb__after_unlock_lock();
        raw_spin_unlock(&rnp_old->fqslock);
-       if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
+       if (READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
                rsp->n_force_qs_lh++;
                raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
                return;  /* Someone beat us to it. */
        }
-       ACCESS_ONCE(rsp->gp_flags) =
-               ACCESS_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS;
+       WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS);
        raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
        rcu_gp_kthread_wake(rsp);
 }
@@ -2879,7 +2916,7 @@ static void rcu_process_callbacks(struct softirq_action *unused)
  */
 static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
 {
-       if (unlikely(!ACCESS_ONCE(rcu_scheduler_fully_active)))
+       if (unlikely(!READ_ONCE(rcu_scheduler_fully_active)))
                return;
        if (likely(!rsp->boost)) {
                rcu_do_batch(rsp, rdp);
@@ -2970,7 +3007,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
        WARN_ON_ONCE((unsigned long)head & 0x1); /* Misaligned rcu_head! */
        if (debug_rcu_head_queue(head)) {
                /* Probable double call_rcu(), so leak the callback. */
-               ACCESS_ONCE(head->func) = rcu_leak_callback;
+               WRITE_ONCE(head->func, rcu_leak_callback);
                WARN_ONCE(1, "__call_rcu(): Leaked duplicate callback\n");
                return;
        }
@@ -3009,7 +3046,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
                if (!likely(rdp->nxtlist))
                        init_default_callback_list(rdp);
        }
-       ACCESS_ONCE(rdp->qlen) = rdp->qlen + 1;
+       WRITE_ONCE(rdp->qlen, rdp->qlen + 1);
        if (lazy)
                rdp->qlen_lazy++;
        else
@@ -3285,7 +3322,7 @@ void synchronize_sched_expedited(void)
        if (ULONG_CMP_GE((ulong)atomic_long_read(&rsp->expedited_start),
                         (ulong)atomic_long_read(&rsp->expedited_done) +
                         ULONG_MAX / 8)) {
-               synchronize_sched();
+               wait_rcu_gp(call_rcu_sched);
                atomic_long_inc(&rsp->expedited_wrap);
                return;
        }
@@ -3448,14 +3485,14 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
        }
 
        /* Has another RCU grace period completed?  */
-       if (ACCESS_ONCE(rnp->completed) != rdp->completed) { /* outside lock */
+       if (READ_ONCE(rnp->completed) != rdp->completed) { /* outside lock */
                rdp->n_rp_gp_completed++;
                return 1;
        }
 
        /* Has a new RCU grace period started? */
-       if (ACCESS_ONCE(rnp->gpnum) != rdp->gpnum ||
-           unlikely(ACCESS_ONCE(rdp->gpwrap))) { /* outside lock */
+       if (READ_ONCE(rnp->gpnum) != rdp->gpnum ||
+           unlikely(READ_ONCE(rdp->gpwrap))) { /* outside lock */
                rdp->n_rp_gp_started++;
                return 1;
        }
@@ -3491,7 +3528,7 @@ static int rcu_pending(void)
  * non-NULL, store an indication of whether all callbacks are lazy.
  * (If there are no callbacks, all of them are deemed to be lazy.)
  */
-static int __maybe_unused rcu_cpu_has_callbacks(bool *all_lazy)
+static bool __maybe_unused rcu_cpu_has_callbacks(bool *all_lazy)
 {
        bool al = true;
        bool hc = false;
@@ -3562,7 +3599,7 @@ static void _rcu_barrier(struct rcu_state *rsp)
 {
        int cpu;
        struct rcu_data *rdp;
-       unsigned long snap = ACCESS_ONCE(rsp->n_barrier_done);
+       unsigned long snap = READ_ONCE(rsp->n_barrier_done);
        unsigned long snap_done;
 
        _rcu_barrier_trace(rsp, "Begin", -1, snap);
@@ -3604,10 +3641,10 @@ static void _rcu_barrier(struct rcu_state *rsp)
 
        /*
         * Increment ->n_barrier_done to avoid duplicate work.  Use
-        * ACCESS_ONCE() to prevent the compiler from speculating
+        * WRITE_ONCE() to prevent the compiler from speculating
         * the increment to precede the early-exit check.
         */
-       ACCESS_ONCE(rsp->n_barrier_done) = rsp->n_barrier_done + 1;
+       WRITE_ONCE(rsp->n_barrier_done, rsp->n_barrier_done + 1);
        WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 1);
        _rcu_barrier_trace(rsp, "Inc1", -1, rsp->n_barrier_done);
        smp_mb(); /* Order ->n_barrier_done increment with below mechanism. */
@@ -3643,7 +3680,7 @@ static void _rcu_barrier(struct rcu_state *rsp)
                                __call_rcu(&rdp->barrier_head,
                                           rcu_barrier_callback, rsp, cpu, 0);
                        }
-               } else if (ACCESS_ONCE(rdp->qlen)) {
+               } else if (READ_ONCE(rdp->qlen)) {
                        _rcu_barrier_trace(rsp, "OnlineQ", cpu,
                                           rsp->n_barrier_done);
                        smp_call_function_single(cpu, rcu_barrier_func, rsp, 1);
@@ -3663,7 +3700,7 @@ static void _rcu_barrier(struct rcu_state *rsp)
 
        /* Increment ->n_barrier_done to prevent duplicate work. */
        smp_mb(); /* Keep increment after above mechanism. */
-       ACCESS_ONCE(rsp->n_barrier_done) = rsp->n_barrier_done + 1;
+       WRITE_ONCE(rsp->n_barrier_done, rsp->n_barrier_done + 1);
        WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 0);
        _rcu_barrier_trace(rsp, "Inc2", -1, rsp->n_barrier_done);
        smp_mb(); /* Keep increment before caller's subsequent code. */
@@ -3778,7 +3815,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
        rdp->gpnum = rnp->completed; /* Make CPU later note any new GP. */
        rdp->completed = rnp->completed;
        rdp->passed_quiesce = false;
-       rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr);
+       rdp->rcu_qs_ctr_snap = per_cpu(rcu_qs_ctr, cpu);
        rdp->qs_pending = false;
        trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl"));
        raw_spin_unlock_irqrestore(&rnp->lock, flags);
@@ -3922,16 +3959,16 @@ void rcu_scheduler_starting(void)
 
 /*
  * Compute the per-level fanout, either using the exact fanout specified
- * or balancing the tree, depending on CONFIG_RCU_FANOUT_EXACT.
+ * or balancing the tree, depending on the rcu_fanout_exact boot parameter.
  */
 static void __init rcu_init_levelspread(struct rcu_state *rsp)
 {
        int i;
 
-       if (IS_ENABLED(CONFIG_RCU_FANOUT_EXACT)) {
+       if (rcu_fanout_exact) {
                rsp->levelspread[rcu_num_lvls - 1] = rcu_fanout_leaf;
                for (i = rcu_num_lvls - 2; i >= 0; i--)
-                       rsp->levelspread[i] = CONFIG_RCU_FANOUT;
+                       rsp->levelspread[i] = RCU_FANOUT;
        } else {
                int ccur;
                int cprv;
@@ -3969,9 +4006,9 @@ static void __init rcu_init_one(struct rcu_state *rsp,
 
        BUILD_BUG_ON(MAX_RCU_LVLS > ARRAY_SIZE(buf));  /* Fix buf[] init! */
 
-       /* Silence gcc 4.8 warning about array index out of range. */
-       if (rcu_num_lvls > RCU_NUM_LVLS)
-               panic("rcu_init_one: rcu_num_lvls overflow");
+       /* Silence gcc 4.8 false positive about array index out of range. */
+       if (rcu_num_lvls <= 0 || rcu_num_lvls > RCU_NUM_LVLS)
+               panic("rcu_init_one: rcu_num_lvls out of range");
 
        /* Initialize the level-tracking arrays. */
 
@@ -4057,7 +4094,7 @@ static void __init rcu_init_geometry(void)
                jiffies_till_next_fqs = d;
 
        /* If the compile-time values are accurate, just leave. */
-       if (rcu_fanout_leaf == CONFIG_RCU_FANOUT_LEAF &&
+       if (rcu_fanout_leaf == RCU_FANOUT_LEAF &&
            nr_cpu_ids == NR_CPUS)
                return;
        pr_info("RCU: Adjusting geometry for rcu_fanout_leaf=%d, nr_cpu_ids=%d\n",
@@ -4071,7 +4108,7 @@ static void __init rcu_init_geometry(void)
        rcu_capacity[0] = 1;
        rcu_capacity[1] = rcu_fanout_leaf;
        for (i = 2; i <= MAX_RCU_LVLS; i++)
-               rcu_capacity[i] = rcu_capacity[i - 1] * CONFIG_RCU_FANOUT;
+               rcu_capacity[i] = rcu_capacity[i - 1] * RCU_FANOUT;
 
        /*
         * The boot-time rcu_fanout_leaf parameter is only permitted
@@ -4081,7 +4118,7 @@ static void __init rcu_init_geometry(void)
         * the configured number of CPUs.  Complain and fall back to the
         * compile-time values if these limits are exceeded.
         */
-       if (rcu_fanout_leaf < CONFIG_RCU_FANOUT_LEAF ||
+       if (rcu_fanout_leaf < RCU_FANOUT_LEAF ||
            rcu_fanout_leaf > sizeof(unsigned long) * 8 ||
            n > rcu_capacity[MAX_RCU_LVLS]) {
                WARN_ON(1);
@@ -4107,6 +4144,28 @@ static void __init rcu_init_geometry(void)
        rcu_num_nodes -= n;
 }
 
+/*
+ * Dump out the structure of the rcu_node combining tree associated
+ * with the rcu_state structure referenced by rsp.
+ */
+static void __init rcu_dump_rcu_node_tree(struct rcu_state *rsp)
+{
+       int level = 0;
+       struct rcu_node *rnp;
+
+       pr_info("rcu_node tree layout dump\n");
+       pr_info(" ");
+       rcu_for_each_node_breadth_first(rsp, rnp) {
+               if (rnp->level != level) {
+                       pr_cont("\n");
+                       pr_info(" ");
+                       level = rnp->level;
+               }
+               pr_cont("%d:%d ^%d  ", rnp->grplo, rnp->grphi, rnp->grpnum);
+       }
+       pr_cont("\n");
+}
+
 void __init rcu_init(void)
 {
        int cpu;
@@ -4117,6 +4176,8 @@ void __init rcu_init(void)
        rcu_init_geometry();
        rcu_init_one(&rcu_bh_state, &rcu_bh_data);
        rcu_init_one(&rcu_sched_state, &rcu_sched_data);
+       if (dump_tree)
+               rcu_dump_rcu_node_tree(&rcu_sched_state);
        __rcu_init_preempt();
        open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
 
index a69d3da..4adb7ca 100644 (file)
  * In practice, this did work well going from three levels to four.
  * Of course, your mileage may vary.
  */
+
 #define MAX_RCU_LVLS 4
-#define RCU_FANOUT_1         (CONFIG_RCU_FANOUT_LEAF)
-#define RCU_FANOUT_2         (RCU_FANOUT_1 * CONFIG_RCU_FANOUT)
-#define RCU_FANOUT_3         (RCU_FANOUT_2 * CONFIG_RCU_FANOUT)
-#define RCU_FANOUT_4         (RCU_FANOUT_3 * CONFIG_RCU_FANOUT)
+
+#ifdef CONFIG_RCU_FANOUT
+#define RCU_FANOUT CONFIG_RCU_FANOUT
+#else /* #ifdef CONFIG_RCU_FANOUT */
+# ifdef CONFIG_64BIT
+# define RCU_FANOUT 64
+# else
+# define RCU_FANOUT 32
+# endif
+#endif /* #else #ifdef CONFIG_RCU_FANOUT */
+
+#ifdef CONFIG_RCU_FANOUT_LEAF
+#define RCU_FANOUT_LEAF CONFIG_RCU_FANOUT_LEAF
+#else /* #ifdef CONFIG_RCU_FANOUT_LEAF */
+# ifdef CONFIG_64BIT
+# define RCU_FANOUT_LEAF 64
+# else
+# define RCU_FANOUT_LEAF 32
+# endif
+#endif /* #else #ifdef CONFIG_RCU_FANOUT_LEAF */
+
+#define RCU_FANOUT_1         (RCU_FANOUT_LEAF)
+#define RCU_FANOUT_2         (RCU_FANOUT_1 * RCU_FANOUT)
+#define RCU_FANOUT_3         (RCU_FANOUT_2 * RCU_FANOUT)
+#define RCU_FANOUT_4         (RCU_FANOUT_3 * RCU_FANOUT)
 
 #if NR_CPUS <= RCU_FANOUT_1
 #  define RCU_NUM_LVLS       1
@@ -170,7 +192,6 @@ struct rcu_node {
                                /*  if there is no such task.  If there */
                                /*  is no current expedited grace period, */
                                /*  then there can cannot be any such task. */
-#ifdef CONFIG_RCU_BOOST
        struct list_head *boost_tasks;
                                /* Pointer to first task that needs to be */
                                /*  priority boosted, or NULL if no priority */
@@ -208,7 +229,6 @@ struct rcu_node {
        unsigned long n_balk_nos;
                                /* Refused to boost: not sure why, though. */
                                /*  This can happen due to race conditions. */
-#endif /* #ifdef CONFIG_RCU_BOOST */
 #ifdef CONFIG_RCU_NOCB_CPU
        wait_queue_head_t nocb_gp_wq[2];
                                /* Place for rcu_nocb_kthread() to wait GP. */
@@ -519,14 +539,11 @@ extern struct list_head rcu_struct_flavors;
  * RCU implementation internal declarations:
  */
 extern struct rcu_state rcu_sched_state;
-DECLARE_PER_CPU(struct rcu_data, rcu_sched_data);
 
 extern struct rcu_state rcu_bh_state;
-DECLARE_PER_CPU(struct rcu_data, rcu_bh_data);
 
 #ifdef CONFIG_PREEMPT_RCU
 extern struct rcu_state rcu_preempt_state;
-DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data);
 #endif /* #ifdef CONFIG_PREEMPT_RCU */
 
 #ifdef CONFIG_RCU_BOOST
index 8c0ec0f..3266434 100644 (file)
@@ -43,7 +43,17 @@ DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
 DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
 DEFINE_PER_CPU(char, rcu_cpu_has_work);
 
-#endif /* #ifdef CONFIG_RCU_BOOST */
+#else /* #ifdef CONFIG_RCU_BOOST */
+
+/*
+ * Some architectures do not define rt_mutexes, but if !CONFIG_RCU_BOOST,
+ * all uses are in dead code.  Provide a definition to keep the compiler
+ * happy, but add WARN_ON_ONCE() to complain if used in the wrong place.
+ * This probably needs to be excluded from -rt builds.
+ */
+#define rt_mutex_owner(a) ({ WARN_ON_ONCE(1); NULL; })
+
+#endif /* #else #ifdef CONFIG_RCU_BOOST */
 
 #ifdef CONFIG_RCU_NOCB_CPU
 static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
@@ -60,11 +70,11 @@ static void __init rcu_bootup_announce_oddness(void)
 {
        if (IS_ENABLED(CONFIG_RCU_TRACE))
                pr_info("\tRCU debugfs-based tracing is enabled.\n");
-       if ((IS_ENABLED(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 64) ||
-           (!IS_ENABLED(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 32))
+       if ((IS_ENABLED(CONFIG_64BIT) && RCU_FANOUT != 64) ||
+           (!IS_ENABLED(CONFIG_64BIT) && RCU_FANOUT != 32))
                pr_info("\tCONFIG_RCU_FANOUT set to non-default value of %d\n",
-                      CONFIG_RCU_FANOUT);
-       if (IS_ENABLED(CONFIG_RCU_FANOUT_EXACT))
+                      RCU_FANOUT);
+       if (rcu_fanout_exact)
                pr_info("\tHierarchical RCU autobalancing is disabled.\n");
        if (IS_ENABLED(CONFIG_RCU_FAST_NO_HZ))
                pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n");
@@ -76,10 +86,10 @@ static void __init rcu_bootup_announce_oddness(void)
                pr_info("\tAdditional per-CPU info printed with stalls.\n");
        if (NUM_RCU_LVL_4 != 0)
                pr_info("\tFour-level hierarchy is enabled.\n");
-       if (CONFIG_RCU_FANOUT_LEAF != 16)
+       if (RCU_FANOUT_LEAF != 16)
                pr_info("\tBuild-time adjustment of leaf fanout to %d.\n",
-                       CONFIG_RCU_FANOUT_LEAF);
-       if (rcu_fanout_leaf != CONFIG_RCU_FANOUT_LEAF)
+                       RCU_FANOUT_LEAF);
+       if (rcu_fanout_leaf != RCU_FANOUT_LEAF)
                pr_info("\tBoot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf);
        if (nr_cpu_ids != NR_CPUS)
                pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids);
@@ -90,7 +100,8 @@ static void __init rcu_bootup_announce_oddness(void)
 #ifdef CONFIG_PREEMPT_RCU
 
 RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu);
-static struct rcu_state *rcu_state_p = &rcu_preempt_state;
+static struct rcu_state *const rcu_state_p = &rcu_preempt_state;
+static struct rcu_data __percpu *const rcu_data_p = &rcu_preempt_data;
 
 static int rcu_preempted_readers_exp(struct rcu_node *rnp);
 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
@@ -116,11 +127,11 @@ static void __init rcu_bootup_announce(void)
  */
 static void rcu_preempt_qs(void)
 {
-       if (!__this_cpu_read(rcu_preempt_data.passed_quiesce)) {
+       if (!__this_cpu_read(rcu_data_p->passed_quiesce)) {
                trace_rcu_grace_period(TPS("rcu_preempt"),
-                                      __this_cpu_read(rcu_preempt_data.gpnum),
+                                      __this_cpu_read(rcu_data_p->gpnum),
                                       TPS("cpuqs"));
-               __this_cpu_write(rcu_preempt_data.passed_quiesce, 1);
+               __this_cpu_write(rcu_data_p->passed_quiesce, 1);
                barrier(); /* Coordinate with rcu_preempt_check_callbacks(). */
                current->rcu_read_unlock_special.b.need_qs = false;
        }
@@ -150,7 +161,7 @@ static void rcu_preempt_note_context_switch(void)
            !t->rcu_read_unlock_special.b.blocked) {
 
                /* Possibly blocking in an RCU read-side critical section. */
-               rdp = this_cpu_ptr(rcu_preempt_state.rda);
+               rdp = this_cpu_ptr(rcu_state_p->rda);
                rnp = rdp->mynode;
                raw_spin_lock_irqsave(&rnp->lock, flags);
                smp_mb__after_unlock_lock();
@@ -180,10 +191,9 @@ static void rcu_preempt_note_context_switch(void)
                if ((rnp->qsmask & rdp->grpmask) && rnp->gp_tasks != NULL) {
                        list_add(&t->rcu_node_entry, rnp->gp_tasks->prev);
                        rnp->gp_tasks = &t->rcu_node_entry;
-#ifdef CONFIG_RCU_BOOST
-                       if (rnp->boost_tasks != NULL)
+                       if (IS_ENABLED(CONFIG_RCU_BOOST) &&
+                           rnp->boost_tasks != NULL)
                                rnp->boost_tasks = rnp->gp_tasks;
-#endif /* #ifdef CONFIG_RCU_BOOST */
                } else {
                        list_add(&t->rcu_node_entry, &rnp->blkd_tasks);
                        if (rnp->qsmask & rdp->grpmask)
@@ -263,9 +273,7 @@ void rcu_read_unlock_special(struct task_struct *t)
        bool empty_exp_now;
        unsigned long flags;
        struct list_head *np;
-#ifdef CONFIG_RCU_BOOST
        bool drop_boost_mutex = false;
-#endif /* #ifdef CONFIG_RCU_BOOST */
        struct rcu_node *rnp;
        union rcu_special special;
 
@@ -307,9 +315,11 @@ void rcu_read_unlock_special(struct task_struct *t)
                t->rcu_read_unlock_special.b.blocked = false;
 
                /*
-                * Remove this task from the list it blocked on.  The
-                * task can migrate while we acquire the lock, but at
-                * most one time.  So at most two passes through loop.
+                * Remove this task from the list it blocked on.  The task
+                * now remains queued on the rcu_node corresponding to
+                * the CPU it first blocked on, so the first attempt to
+                * acquire the task's rcu_node's ->lock will succeed.
+                * Keep the loop and add a WARN_ON() out of sheer paranoia.
                 */
                for (;;) {
                        rnp = t->rcu_blocked_node;
@@ -317,6 +327,7 @@ void rcu_read_unlock_special(struct task_struct *t)
                        smp_mb__after_unlock_lock();
                        if (rnp == t->rcu_blocked_node)
                                break;
+                       WARN_ON_ONCE(1);
                        raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
                }
                empty_norm = !rcu_preempt_blocked_readers_cgp(rnp);
@@ -331,12 +342,12 @@ void rcu_read_unlock_special(struct task_struct *t)
                        rnp->gp_tasks = np;
                if (&t->rcu_node_entry == rnp->exp_tasks)
                        rnp->exp_tasks = np;
-#ifdef CONFIG_RCU_BOOST
-               if (&t->rcu_node_entry == rnp->boost_tasks)
-                       rnp->boost_tasks = np;
-               /* Snapshot ->boost_mtx ownership with rcu_node lock held. */
-               drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx) == t;
-#endif /* #ifdef CONFIG_RCU_BOOST */
+               if (IS_ENABLED(CONFIG_RCU_BOOST)) {
+                       if (&t->rcu_node_entry == rnp->boost_tasks)
+                               rnp->boost_tasks = np;
+                       /* Snapshot ->boost_mtx ownership w/rnp->lock held. */
+                       drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx) == t;
+               }
 
                /*
                 * If this was the last task on the current list, and if
@@ -353,24 +364,21 @@ void rcu_read_unlock_special(struct task_struct *t)
                                                         rnp->grplo,
                                                         rnp->grphi,
                                                         !!rnp->gp_tasks);
-                       rcu_report_unblock_qs_rnp(&rcu_preempt_state,
-                                                 rnp, flags);
+                       rcu_report_unblock_qs_rnp(rcu_state_p, rnp, flags);
                } else {
                        raw_spin_unlock_irqrestore(&rnp->lock, flags);
                }
 
-#ifdef CONFIG_RCU_BOOST
                /* Unboost if we were boosted. */
-               if (drop_boost_mutex)
+               if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex)
                        rt_mutex_unlock(&rnp->boost_mtx);
-#endif /* #ifdef CONFIG_RCU_BOOST */
 
                /*
                 * If this was the last task on the expedited lists,
                 * then we need to report up the rcu_node hierarchy.
                 */
                if (!empty_exp && empty_exp_now)
-                       rcu_report_exp_rnp(&rcu_preempt_state, rnp, true);
+                       rcu_report_exp_rnp(rcu_state_p, rnp, true);
        } else {
                local_irq_restore(flags);
        }
@@ -390,7 +398,7 @@ static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
                raw_spin_unlock_irqrestore(&rnp->lock, flags);
                return;
        }
-       t = list_entry(rnp->gp_tasks,
+       t = list_entry(rnp->gp_tasks->prev,
                       struct task_struct, rcu_node_entry);
        list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry)
                sched_show_task(t);
@@ -447,7 +455,7 @@ static int rcu_print_task_stall(struct rcu_node *rnp)
        if (!rcu_preempt_blocked_readers_cgp(rnp))
                return 0;
        rcu_print_task_stall_begin(rnp);
-       t = list_entry(rnp->gp_tasks,
+       t = list_entry(rnp->gp_tasks->prev,
                       struct task_struct, rcu_node_entry);
        list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
                pr_cont(" P%d", t->pid);
@@ -491,8 +499,8 @@ static void rcu_preempt_check_callbacks(void)
                return;
        }
        if (t->rcu_read_lock_nesting > 0 &&
-           __this_cpu_read(rcu_preempt_data.qs_pending) &&
-           !__this_cpu_read(rcu_preempt_data.passed_quiesce))
+           __this_cpu_read(rcu_data_p->qs_pending) &&
+           !__this_cpu_read(rcu_data_p->passed_quiesce))
                t->rcu_read_unlock_special.b.need_qs = true;
 }
 
@@ -500,7 +508,7 @@ static void rcu_preempt_check_callbacks(void)
 
 static void rcu_preempt_do_callbacks(void)
 {
-       rcu_do_batch(&rcu_preempt_state, this_cpu_ptr(&rcu_preempt_data));
+       rcu_do_batch(rcu_state_p, this_cpu_ptr(rcu_data_p));
 }
 
 #endif /* #ifdef CONFIG_RCU_BOOST */
@@ -510,7 +518,7 @@ static void rcu_preempt_do_callbacks(void)
  */
 void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
 {
-       __call_rcu(head, func, &rcu_preempt_state, -1, 0);
+       __call_rcu(head, func, rcu_state_p, -1, 0);
 }
 EXPORT_SYMBOL_GPL(call_rcu);
 
@@ -570,7 +578,7 @@ static int rcu_preempted_readers_exp(struct rcu_node *rnp)
 static int sync_rcu_preempt_exp_done(struct rcu_node *rnp)
 {
        return !rcu_preempted_readers_exp(rnp) &&
-              ACCESS_ONCE(rnp->expmask) == 0;
+              READ_ONCE(rnp->expmask) == 0;
 }
 
 /*
@@ -711,12 +719,12 @@ sync_rcu_preempt_exp_init2(struct rcu_state *rsp, struct rcu_node *rnp)
 void synchronize_rcu_expedited(void)
 {
        struct rcu_node *rnp;
-       struct rcu_state *rsp = &rcu_preempt_state;
+       struct rcu_state *rsp = rcu_state_p;
        unsigned long snap;
        int trycount = 0;
 
        smp_mb(); /* Caller's modifications seen first by other CPUs. */
-       snap = ACCESS_ONCE(sync_rcu_preempt_exp_count) + 1;
+       snap = READ_ONCE(sync_rcu_preempt_exp_count) + 1;
        smp_mb(); /* Above access cannot bleed into critical section. */
 
        /*
@@ -740,7 +748,7 @@ void synchronize_rcu_expedited(void)
         */
        while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) {
                if (ULONG_CMP_LT(snap,
-                   ACCESS_ONCE(sync_rcu_preempt_exp_count))) {
+                   READ_ONCE(sync_rcu_preempt_exp_count))) {
                        put_online_cpus();
                        goto mb_ret; /* Others did our work for us. */
                }
@@ -752,7 +760,7 @@ void synchronize_rcu_expedited(void)
                        return;
                }
        }
-       if (ULONG_CMP_LT(snap, ACCESS_ONCE(sync_rcu_preempt_exp_count))) {
+       if (ULONG_CMP_LT(snap, READ_ONCE(sync_rcu_preempt_exp_count))) {
                put_online_cpus();
                goto unlock_mb_ret; /* Others did our work for us. */
        }
@@ -780,8 +788,7 @@ void synchronize_rcu_expedited(void)
 
        /* Clean up and exit. */
        smp_mb(); /* ensure expedited GP seen before counter increment. */
-       ACCESS_ONCE(sync_rcu_preempt_exp_count) =
-                                       sync_rcu_preempt_exp_count + 1;
+       WRITE_ONCE(sync_rcu_preempt_exp_count, sync_rcu_preempt_exp_count + 1);
 unlock_mb_ret:
        mutex_unlock(&sync_rcu_preempt_exp_mutex);
 mb_ret:
@@ -799,7 +806,7 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
  */
 void rcu_barrier(void)
 {
-       _rcu_barrier(&rcu_preempt_state);
+       _rcu_barrier(rcu_state_p);
 }
 EXPORT_SYMBOL_GPL(rcu_barrier);
 
@@ -808,7 +815,7 @@ EXPORT_SYMBOL_GPL(rcu_barrier);
  */
 static void __init __rcu_init_preempt(void)
 {
-       rcu_init_one(&rcu_preempt_state, &rcu_preempt_data);
+       rcu_init_one(rcu_state_p, rcu_data_p);
 }
 
 /*
@@ -831,7 +838,8 @@ void exit_rcu(void)
 
 #else /* #ifdef CONFIG_PREEMPT_RCU */
 
-static struct rcu_state *rcu_state_p = &rcu_sched_state;
+static struct rcu_state *const rcu_state_p = &rcu_sched_state;
+static struct rcu_data __percpu *const rcu_data_p = &rcu_sched_data;
 
 /*
  * Tell them what RCU they are running.
@@ -994,8 +1002,8 @@ static int rcu_boost(struct rcu_node *rnp)
        struct task_struct *t;
        struct list_head *tb;
 
-       if (ACCESS_ONCE(rnp->exp_tasks) == NULL &&
-           ACCESS_ONCE(rnp->boost_tasks) == NULL)
+       if (READ_ONCE(rnp->exp_tasks) == NULL &&
+           READ_ONCE(rnp->boost_tasks) == NULL)
                return 0;  /* Nothing left to boost. */
 
        raw_spin_lock_irqsave(&rnp->lock, flags);
@@ -1048,8 +1056,8 @@ static int rcu_boost(struct rcu_node *rnp)
        rt_mutex_lock(&rnp->boost_mtx);
        rt_mutex_unlock(&rnp->boost_mtx);  /* Then keep lockdep happy. */
 
-       return ACCESS_ONCE(rnp->exp_tasks) != NULL ||
-              ACCESS_ONCE(rnp->boost_tasks) != NULL;
+       return READ_ONCE(rnp->exp_tasks) != NULL ||
+              READ_ONCE(rnp->boost_tasks) != NULL;
 }
 
 /*
@@ -1173,7 +1181,7 @@ static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
        struct sched_param sp;
        struct task_struct *t;
 
-       if (&rcu_preempt_state != rsp)
+       if (rcu_state_p != rsp)
                return 0;
 
        if (!rcu_scheduler_fully_active || rcu_rnp_online_cpus(rnp) == 0)
@@ -1367,13 +1375,12 @@ static void rcu_prepare_kthreads(int cpu)
  * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs
  * any flavor of RCU.
  */
-#ifndef CONFIG_RCU_NOCB_CPU_ALL
 int rcu_needs_cpu(unsigned long *delta_jiffies)
 {
        *delta_jiffies = ULONG_MAX;
-       return rcu_cpu_has_callbacks(NULL);
+       return IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL)
+              ? 0 : rcu_cpu_has_callbacks(NULL);
 }
-#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */
 
 /*
  * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up
@@ -1462,7 +1469,7 @@ static bool __maybe_unused rcu_try_advance_all_cbs(void)
                 * callbacks not yet ready to invoke.
                 */
                if ((rdp->completed != rnp->completed ||
-                    unlikely(ACCESS_ONCE(rdp->gpwrap))) &&
+                    unlikely(READ_ONCE(rdp->gpwrap))) &&
                    rdp->nxttail[RCU_DONE_TAIL] != rdp->nxttail[RCU_NEXT_TAIL])
                        note_gp_changes(rsp, rdp);
 
@@ -1480,11 +1487,15 @@ static bool __maybe_unused rcu_try_advance_all_cbs(void)
  *
  * The caller must have disabled interrupts.
  */
-#ifndef CONFIG_RCU_NOCB_CPU_ALL
 int rcu_needs_cpu(unsigned long *dj)
 {
        struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
 
+       if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL)) {
+               *dj = ULONG_MAX;
+               return 0;
+       }
+
        /* Snapshot to detect later posting of non-lazy callback. */
        rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted;
 
@@ -1511,7 +1522,6 @@ int rcu_needs_cpu(unsigned long *dj)
        }
        return 0;
 }
-#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */
 
 /*
  * Prepare a CPU for idle from an RCU perspective.  The first major task
@@ -1525,7 +1535,6 @@ int rcu_needs_cpu(unsigned long *dj)
  */
 static void rcu_prepare_for_idle(void)
 {
-#ifndef CONFIG_RCU_NOCB_CPU_ALL
        bool needwake;
        struct rcu_data *rdp;
        struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
@@ -1533,8 +1542,11 @@ static void rcu_prepare_for_idle(void)
        struct rcu_state *rsp;
        int tne;
 
+       if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL))
+               return;
+
        /* Handle nohz enablement switches conservatively. */
-       tne = ACCESS_ONCE(tick_nohz_active);
+       tne = READ_ONCE(tick_nohz_active);
        if (tne != rdtp->tick_nohz_enabled_snap) {
                if (rcu_cpu_has_callbacks(NULL))
                        invoke_rcu_core(); /* force nohz to see update. */
@@ -1580,7 +1592,6 @@ static void rcu_prepare_for_idle(void)
                if (needwake)
                        rcu_gp_kthread_wake(rsp);
        }
-#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */
 }
 
 /*
@@ -1590,12 +1601,11 @@ static void rcu_prepare_for_idle(void)
  */
 static void rcu_cleanup_after_idle(void)
 {
-#ifndef CONFIG_RCU_NOCB_CPU_ALL
-       if (rcu_is_nocb_cpu(smp_processor_id()))
+       if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL) ||
+           rcu_is_nocb_cpu(smp_processor_id()))
                return;
        if (rcu_try_advance_all_cbs())
                invoke_rcu_core();
-#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */
 }
 
 /*
@@ -1760,7 +1770,7 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)
               atomic_read(&rdtp->dynticks) & 0xfff,
               rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting,
               rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu),
-              ACCESS_ONCE(rsp->n_force_qs) - rsp->n_force_qs_gpstart,
+              READ_ONCE(rsp->n_force_qs) - rsp->n_force_qs_gpstart,
               fast_no_hz);
 }
 
@@ -1898,11 +1908,11 @@ static void wake_nocb_leader(struct rcu_data *rdp, bool force)
 {
        struct rcu_data *rdp_leader = rdp->nocb_leader;
 
-       if (!ACCESS_ONCE(rdp_leader->nocb_kthread))
+       if (!READ_ONCE(rdp_leader->nocb_kthread))
                return;
-       if (ACCESS_ONCE(rdp_leader->nocb_leader_sleep) || force) {
+       if (READ_ONCE(rdp_leader->nocb_leader_sleep) || force) {
                /* Prior smp_mb__after_atomic() orders against prior enqueue. */
-               ACCESS_ONCE(rdp_leader->nocb_leader_sleep) = false;
+               WRITE_ONCE(rdp_leader->nocb_leader_sleep, false);
                wake_up(&rdp_leader->nocb_wq);
        }
 }
@@ -1934,14 +1944,14 @@ static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu)
        ret = atomic_long_read(&rdp->nocb_q_count);
 
 #ifdef CONFIG_PROVE_RCU
-       rhp = ACCESS_ONCE(rdp->nocb_head);
+       rhp = READ_ONCE(rdp->nocb_head);
        if (!rhp)
-               rhp = ACCESS_ONCE(rdp->nocb_gp_head);
+               rhp = READ_ONCE(rdp->nocb_gp_head);
        if (!rhp)
-               rhp = ACCESS_ONCE(rdp->nocb_follower_head);
+               rhp = READ_ONCE(rdp->nocb_follower_head);
 
        /* Having no rcuo kthread but CBs after scheduler starts is bad! */
-       if (!ACCESS_ONCE(rdp->nocb_kthread) && rhp &&
+       if (!READ_ONCE(rdp->nocb_kthread) && rhp &&
            rcu_scheduler_fully_active) {
                /* RCU callback enqueued before CPU first came online??? */
                pr_err("RCU: Never-onlined no-CBs CPU %d has CB %p\n",
@@ -1975,12 +1985,12 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
        atomic_long_add(rhcount, &rdp->nocb_q_count);
        /* rcu_barrier() relies on ->nocb_q_count add before xchg. */
        old_rhpp = xchg(&rdp->nocb_tail, rhtp);
-       ACCESS_ONCE(*old_rhpp) = rhp;
+       WRITE_ONCE(*old_rhpp, rhp);
        atomic_long_add(rhcount_lazy, &rdp->nocb_q_count_lazy);
        smp_mb__after_atomic(); /* Store *old_rhpp before _wake test. */
 
        /* If we are not being polled and there is a kthread, awaken it ... */
-       t = ACCESS_ONCE(rdp->nocb_kthread);
+       t = READ_ONCE(rdp->nocb_kthread);
        if (rcu_nocb_poll || !t) {
                trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
                                    TPS("WakeNotPoll"));
@@ -2118,7 +2128,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
        for (;;) {
                wait_event_interruptible(
                        rnp->nocb_gp_wq[c & 0x1],
-                       (d = ULONG_CMP_GE(ACCESS_ONCE(rnp->completed), c)));
+                       (d = ULONG_CMP_GE(READ_ONCE(rnp->completed), c)));
                if (likely(d))
                        break;
                WARN_ON(signal_pending(current));
@@ -2145,7 +2155,7 @@ wait_again:
        if (!rcu_nocb_poll) {
                trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, "Sleep");
                wait_event_interruptible(my_rdp->nocb_wq,
-                               !ACCESS_ONCE(my_rdp->nocb_leader_sleep));
+                               !READ_ONCE(my_rdp->nocb_leader_sleep));
                /* Memory barrier handled by smp_mb() calls below and repoll. */
        } else if (firsttime) {
                firsttime = false; /* Don't drown trace log with "Poll"! */
@@ -2159,12 +2169,12 @@ wait_again:
         */
        gotcbs = false;
        for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower) {
-               rdp->nocb_gp_head = ACCESS_ONCE(rdp->nocb_head);
+               rdp->nocb_gp_head = READ_ONCE(rdp->nocb_head);
                if (!rdp->nocb_gp_head)
                        continue;  /* No CBs here, try next follower. */
 
                /* Move callbacks to wait-for-GP list, which is empty. */
-               ACCESS_ONCE(rdp->nocb_head) = NULL;
+               WRITE_ONCE(rdp->nocb_head, NULL);
                rdp->nocb_gp_tail = xchg(&rdp->nocb_tail, &rdp->nocb_head);
                gotcbs = true;
        }
@@ -2184,7 +2194,7 @@ wait_again:
                my_rdp->nocb_leader_sleep = true;
                smp_mb();  /* Ensure _sleep true before scan. */
                for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower)
-                       if (ACCESS_ONCE(rdp->nocb_head)) {
+                       if (READ_ONCE(rdp->nocb_head)) {
                                /* Found CB, so short-circuit next wait. */
                                my_rdp->nocb_leader_sleep = false;
                                break;
@@ -2205,7 +2215,7 @@ wait_again:
 
        /* Each pass through the following loop wakes a follower, if needed. */
        for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower) {
-               if (ACCESS_ONCE(rdp->nocb_head))
+               if (READ_ONCE(rdp->nocb_head))
                        my_rdp->nocb_leader_sleep = false;/* No need to sleep.*/
                if (!rdp->nocb_gp_head)
                        continue; /* No CBs, so no need to wake follower. */
@@ -2241,7 +2251,7 @@ static void nocb_follower_wait(struct rcu_data *rdp)
                        trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
                                            "FollowerSleep");
                        wait_event_interruptible(rdp->nocb_wq,
-                                                ACCESS_ONCE(rdp->nocb_follower_head));
+                                                READ_ONCE(rdp->nocb_follower_head));
                } else if (firsttime) {
                        /* Don't drown trace log with "Poll"! */
                        firsttime = false;
@@ -2282,10 +2292,10 @@ static int rcu_nocb_kthread(void *arg)
                        nocb_follower_wait(rdp);
 
                /* Pull the ready-to-invoke callbacks onto local list. */
-               list = ACCESS_ONCE(rdp->nocb_follower_head);
+               list = READ_ONCE(rdp->nocb_follower_head);
                BUG_ON(!list);
                trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, "WokeNonEmpty");
-               ACCESS_ONCE(rdp->nocb_follower_head) = NULL;
+               WRITE_ONCE(rdp->nocb_follower_head, NULL);
                tail = xchg(&rdp->nocb_follower_tail, &rdp->nocb_follower_head);
 
                /* Each pass through the following loop invokes a callback. */
@@ -2324,7 +2334,7 @@ static int rcu_nocb_kthread(void *arg)
 /* Is a deferred wakeup of rcu_nocb_kthread() required? */
 static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp)
 {
-       return ACCESS_ONCE(rdp->nocb_defer_wakeup);
+       return READ_ONCE(rdp->nocb_defer_wakeup);
 }
 
 /* Do a deferred wakeup of rcu_nocb_kthread(). */
@@ -2334,8 +2344,8 @@ static void do_nocb_deferred_wakeup(struct rcu_data *rdp)
 
        if (!rcu_nocb_need_deferred_wakeup(rdp))
                return;
-       ndw = ACCESS_ONCE(rdp->nocb_defer_wakeup);
-       ACCESS_ONCE(rdp->nocb_defer_wakeup) = RCU_NOGP_WAKE_NOT;
+       ndw = READ_ONCE(rdp->nocb_defer_wakeup);
+       WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOGP_WAKE_NOT);
        wake_nocb_leader(rdp, ndw == RCU_NOGP_WAKE_FORCE);
        trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("DeferredWake"));
 }
@@ -2448,7 +2458,7 @@ static void rcu_spawn_one_nocb_kthread(struct rcu_state *rsp, int cpu)
        t = kthread_run(rcu_nocb_kthread, rdp_spawn,
                        "rcuo%c/%d", rsp->abbr, cpu);
        BUG_ON(IS_ERR(t));
-       ACCESS_ONCE(rdp_spawn->nocb_kthread) = t;
+       WRITE_ONCE(rdp_spawn->nocb_kthread, t);
 }
 
 /*
@@ -2663,7 +2673,7 @@ static void rcu_sysidle_enter(int irq)
 
        /* Record start of fully idle period. */
        j = jiffies;
-       ACCESS_ONCE(rdtp->dynticks_idle_jiffies) = j;
+       WRITE_ONCE(rdtp->dynticks_idle_jiffies, j);
        smp_mb__before_atomic();
        atomic_inc(&rdtp->dynticks_idle);
        smp_mb__after_atomic();
@@ -2681,7 +2691,7 @@ static void rcu_sysidle_enter(int irq)
  */
 void rcu_sysidle_force_exit(void)
 {
-       int oldstate = ACCESS_ONCE(full_sysidle_state);
+       int oldstate = READ_ONCE(full_sysidle_state);
        int newoldstate;
 
        /*
@@ -2794,7 +2804,7 @@ static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle,
        smp_mb(); /* Read counters before timestamps. */
 
        /* Pick up timestamps. */
-       j = ACCESS_ONCE(rdtp->dynticks_idle_jiffies);
+       j = READ_ONCE(rdtp->dynticks_idle_jiffies);
        /* If this CPU entered idle more recently, update maxj timestamp. */
        if (ULONG_CMP_LT(*maxj, j))
                *maxj = j;
@@ -2831,11 +2841,11 @@ static unsigned long rcu_sysidle_delay(void)
 static void rcu_sysidle(unsigned long j)
 {
        /* Check the current state. */
-       switch (ACCESS_ONCE(full_sysidle_state)) {
+       switch (READ_ONCE(full_sysidle_state)) {
        case RCU_SYSIDLE_NOT:
 
                /* First time all are idle, so note a short idle period. */
-               ACCESS_ONCE(full_sysidle_state) = RCU_SYSIDLE_SHORT;
+               WRITE_ONCE(full_sysidle_state, RCU_SYSIDLE_SHORT);
                break;
 
        case RCU_SYSIDLE_SHORT:
@@ -2873,7 +2883,7 @@ static void rcu_sysidle_cancel(void)
 {
        smp_mb();
        if (full_sysidle_state > RCU_SYSIDLE_SHORT)
-               ACCESS_ONCE(full_sysidle_state) = RCU_SYSIDLE_NOT;
+               WRITE_ONCE(full_sysidle_state, RCU_SYSIDLE_NOT);
 }
 
 /*
@@ -2925,7 +2935,7 @@ static void rcu_sysidle_cb(struct rcu_head *rhp)
        smp_mb();  /* grace period precedes setting inuse. */
 
        rshp = container_of(rhp, struct rcu_sysidle_head, rh);
-       ACCESS_ONCE(rshp->inuse) = 0;
+       WRITE_ONCE(rshp->inuse, 0);
 }
 
 /*
@@ -2936,7 +2946,7 @@ static void rcu_sysidle_cb(struct rcu_head *rhp)
 bool rcu_sys_is_idle(void)
 {
        static struct rcu_sysidle_head rsh;
-       int rss = ACCESS_ONCE(full_sysidle_state);
+       int rss = READ_ONCE(full_sysidle_state);
 
        if (WARN_ON_ONCE(smp_processor_id() != tick_do_timer_cpu))
                return false;
@@ -2964,7 +2974,7 @@ bool rcu_sys_is_idle(void)
                        }
                        rcu_sysidle_report(rcu_state_p, isidle, maxj, false);
                        oldrss = rss;
-                       rss = ACCESS_ONCE(full_sysidle_state);
+                       rss = READ_ONCE(full_sysidle_state);
                }
        }
 
@@ -3048,10 +3058,10 @@ static bool rcu_nohz_full_cpu(struct rcu_state *rsp)
 #ifdef CONFIG_NO_HZ_FULL
        if (tick_nohz_full_cpu(smp_processor_id()) &&
            (!rcu_gp_in_progress(rsp) ||
-            ULONG_CMP_LT(jiffies, ACCESS_ONCE(rsp->gp_start) + HZ)))
-               return 1;
+            ULONG_CMP_LT(jiffies, READ_ONCE(rsp->gp_start) + HZ)))
+               return true;
 #endif /* #ifdef CONFIG_NO_HZ_FULL */
-       return 0;
+       return false;
 }
 
 /*
@@ -3077,7 +3087,7 @@ static void rcu_bind_gp_kthread(void)
 static void rcu_dynticks_task_enter(void)
 {
 #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
-       ACCESS_ONCE(current->rcu_tasks_idle_cpu) = smp_processor_id();
+       WRITE_ONCE(current->rcu_tasks_idle_cpu, smp_processor_id());
 #endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
 }
 
@@ -3085,6 +3095,6 @@ static void rcu_dynticks_task_enter(void)
 static void rcu_dynticks_task_exit(void)
 {
 #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
-       ACCESS_ONCE(current->rcu_tasks_idle_cpu) = -1;
+       WRITE_ONCE(current->rcu_tasks_idle_cpu, -1);
 #endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
 }
index f92361e..3ea7ffc 100644 (file)
@@ -277,7 +277,7 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
        seq_printf(m, "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld/%ld\n",
                   rsp->n_force_qs, rsp->n_force_qs_ngp,
                   rsp->n_force_qs - rsp->n_force_qs_ngp,
-                  ACCESS_ONCE(rsp->n_force_qs_lh), rsp->qlen_lazy, rsp->qlen);
+                  READ_ONCE(rsp->n_force_qs_lh), rsp->qlen_lazy, rsp->qlen);
        for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < rcu_num_nodes; rnp++) {
                if (rnp->level != level) {
                        seq_puts(m, "\n");
@@ -323,8 +323,8 @@ static void show_one_rcugp(struct seq_file *m, struct rcu_state *rsp)
        struct rcu_node *rnp = &rsp->node[0];
 
        raw_spin_lock_irqsave(&rnp->lock, flags);
-       completed = ACCESS_ONCE(rsp->completed);
-       gpnum = ACCESS_ONCE(rsp->gpnum);
+       completed = READ_ONCE(rsp->completed);
+       gpnum = READ_ONCE(rsp->gpnum);
        if (completed == gpnum)
                gpage = 0;
        else
index 1f13335..afaecb7 100644 (file)
@@ -150,14 +150,14 @@ void __rcu_read_unlock(void)
                barrier();  /* critical section before exit code. */
                t->rcu_read_lock_nesting = INT_MIN;
                barrier();  /* assign before ->rcu_read_unlock_special load */
-               if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special.s)))
+               if (unlikely(READ_ONCE(t->rcu_read_unlock_special.s)))
                        rcu_read_unlock_special(t);
                barrier();  /* ->rcu_read_unlock_special load before assign */
                t->rcu_read_lock_nesting = 0;
        }
 #ifdef CONFIG_PROVE_LOCKING
        {
-               int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting);
+               int rrln = READ_ONCE(t->rcu_read_lock_nesting);
 
                WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2);
        }
@@ -389,17 +389,17 @@ module_param(rcu_cpu_stall_timeout, int, 0644);
 
 int rcu_jiffies_till_stall_check(void)
 {
-       int till_stall_check = ACCESS_ONCE(rcu_cpu_stall_timeout);
+       int till_stall_check = READ_ONCE(rcu_cpu_stall_timeout);
 
        /*
         * Limit check must be consistent with the Kconfig limits
         * for CONFIG_RCU_CPU_STALL_TIMEOUT.
         */
        if (till_stall_check < 3) {
-               ACCESS_ONCE(rcu_cpu_stall_timeout) = 3;
+               WRITE_ONCE(rcu_cpu_stall_timeout, 3);
                till_stall_check = 3;
        } else if (till_stall_check > 300) {
-               ACCESS_ONCE(rcu_cpu_stall_timeout) = 300;
+               WRITE_ONCE(rcu_cpu_stall_timeout, 300);
                till_stall_check = 300;
        }
        return till_stall_check * HZ + RCU_STALL_DELAY_DELTA;
@@ -550,12 +550,12 @@ static void check_holdout_task(struct task_struct *t,
 {
        int cpu;
 
-       if (!ACCESS_ONCE(t->rcu_tasks_holdout) ||
-           t->rcu_tasks_nvcsw != ACCESS_ONCE(t->nvcsw) ||
-           !ACCESS_ONCE(t->on_rq) ||
+       if (!READ_ONCE(t->rcu_tasks_holdout) ||
+           t->rcu_tasks_nvcsw != READ_ONCE(t->nvcsw) ||
+           !READ_ONCE(t->on_rq) ||
            (IS_ENABLED(CONFIG_NO_HZ_FULL) &&
             !is_idle_task(t) && t->rcu_tasks_idle_cpu >= 0)) {
-               ACCESS_ONCE(t->rcu_tasks_holdout) = false;
+               WRITE_ONCE(t->rcu_tasks_holdout, false);
                list_del_init(&t->rcu_tasks_holdout_list);
                put_task_struct(t);
                return;
@@ -639,11 +639,11 @@ static int __noreturn rcu_tasks_kthread(void *arg)
                 */
                rcu_read_lock();
                for_each_process_thread(g, t) {
-                       if (t != current && ACCESS_ONCE(t->on_rq) &&
+                       if (t != current && READ_ONCE(t->on_rq) &&
                            !is_idle_task(t)) {
                                get_task_struct(t);
-                               t->rcu_tasks_nvcsw = ACCESS_ONCE(t->nvcsw);
-                               ACCESS_ONCE(t->rcu_tasks_holdout) = true;
+                               t->rcu_tasks_nvcsw = READ_ONCE(t->nvcsw);
+                               WRITE_ONCE(t->rcu_tasks_holdout, true);
                                list_add(&t->rcu_tasks_holdout_list,
                                         &rcu_tasks_holdouts);
                        }
@@ -672,7 +672,7 @@ static int __noreturn rcu_tasks_kthread(void *arg)
                        struct task_struct *t1;
 
                        schedule_timeout_interruptible(HZ);
-                       rtst = ACCESS_ONCE(rcu_task_stall_timeout);
+                       rtst = READ_ONCE(rcu_task_stall_timeout);
                        needreport = rtst > 0 &&
                                     time_after(jiffies, lastreport + rtst);
                        if (needreport)
@@ -728,7 +728,7 @@ static void rcu_spawn_tasks_kthread(void)
        static struct task_struct *rcu_tasks_kthread_ptr;
        struct task_struct *t;
 
-       if (ACCESS_ONCE(rcu_tasks_kthread_ptr)) {
+       if (READ_ONCE(rcu_tasks_kthread_ptr)) {
                smp_mb(); /* Ensure caller sees full kthread. */
                return;
        }
@@ -740,7 +740,7 @@ static void rcu_spawn_tasks_kthread(void)
        t = kthread_run(rcu_tasks_kthread, NULL, "rcu_tasks_kthread");
        BUG_ON(IS_ERR(t));
        smp_mb(); /* Ensure others see full kthread. */
-       ACCESS_ONCE(rcu_tasks_kthread_ptr) = t;
+       WRITE_ONCE(rcu_tasks_kthread_ptr, t);
        mutex_unlock(&rcu_tasks_kthread_mutex);
 }
 
index 46be870..6768797 100644 (file)
@@ -11,7 +11,7 @@ ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
 CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer
 endif
 
-obj-y += core.o proc.o clock.o cputime.o
+obj-y += core.o loadavg.o clock.o cputime.o
 obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o
 obj-y += wait.o completion.o idle.o
 obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o
index eae160d..750ed60 100644 (file)
@@ -1,5 +1,3 @@
-#ifdef CONFIG_SCHED_AUTOGROUP
-
 #include "sched.h"
 
 #include <linux/proc_fs.h>
@@ -141,7 +139,7 @@ autogroup_move_group(struct task_struct *p, struct autogroup *ag)
 
        p->signal->autogroup = autogroup_kref_get(ag);
 
-       if (!ACCESS_ONCE(sysctl_sched_autogroup_enabled))
+       if (!READ_ONCE(sysctl_sched_autogroup_enabled))
                goto out;
 
        for_each_thread(p, t)
@@ -249,5 +247,3 @@ int autogroup_path(struct task_group *tg, char *buf, int buflen)
        return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id);
 }
 #endif /* CONFIG_SCHED_DEBUG */
-
-#endif /* CONFIG_SCHED_AUTOGROUP */
index 8bd0471..890c95f 100644 (file)
@@ -29,7 +29,7 @@ extern bool task_wants_autogroup(struct task_struct *p, struct task_group *tg);
 static inline struct task_group *
 autogroup_task_group(struct task_struct *p, struct task_group *tg)
 {
-       int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled);
+       int enabled = READ_ONCE(sysctl_sched_autogroup_enabled);
 
        if (enabled && task_wants_autogroup(p, tg))
                return p->signal->autogroup->tg;
index fe22f75..f89ca9b 100644 (file)
@@ -511,7 +511,7 @@ static bool set_nr_and_not_polling(struct task_struct *p)
 static bool set_nr_if_polling(struct task_struct *p)
 {
        struct thread_info *ti = task_thread_info(p);
-       typeof(ti->flags) old, val = ACCESS_ONCE(ti->flags);
+       typeof(ti->flags) old, val = READ_ONCE(ti->flags);
 
        for (;;) {
                if (!(val & _TIF_POLLING_NRFLAG))
@@ -541,6 +541,52 @@ static bool set_nr_if_polling(struct task_struct *p)
 #endif
 #endif
 
+void wake_q_add(struct wake_q_head *head, struct task_struct *task)
+{
+       struct wake_q_node *node = &task->wake_q;
+
+       /*
+        * Atomically grab the task, if ->wake_q is !nil already it means
+        * its already queued (either by us or someone else) and will get the
+        * wakeup due to that.
+        *
+        * This cmpxchg() implies a full barrier, which pairs with the write
+        * barrier implied by the wakeup in wake_up_list().
+        */
+       if (cmpxchg(&node->next, NULL, WAKE_Q_TAIL))
+               return;
+
+       get_task_struct(task);
+
+       /*
+        * The head is context local, there can be no concurrency.
+        */
+       *head->lastp = node;
+       head->lastp = &node->next;
+}
+
+void wake_up_q(struct wake_q_head *head)
+{
+       struct wake_q_node *node = head->first;
+
+       while (node != WAKE_Q_TAIL) {
+               struct task_struct *task;
+
+               task = container_of(node, struct task_struct, wake_q);
+               BUG_ON(!task);
+               /* task can safely be re-inserted now */
+               node = node->next;
+               task->wake_q.next = NULL;
+
+               /*
+                * wake_up_process() implies a wmb() to pair with the queueing
+                * in wake_q_add() so as not to miss wakeups.
+                */
+               wake_up_process(task);
+               put_task_struct(task);
+       }
+}
+
 /*
  * resched_curr - mark rq's current task 'to be rescheduled now'.
  *
@@ -1049,7 +1095,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
                if (p->sched_class->migrate_task_rq)
                        p->sched_class->migrate_task_rq(p, new_cpu);
                p->se.nr_migrations++;
-               perf_sw_event_sched(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 0);
+               perf_event_task_migrate(p);
        }
 
        __set_task_cpu(p, new_cpu);
@@ -2105,12 +2151,15 @@ void wake_up_new_task(struct task_struct *p)
 
 #ifdef CONFIG_PREEMPT_NOTIFIERS
 
+static struct static_key preempt_notifier_key = STATIC_KEY_INIT_FALSE;
+
 /**
  * preempt_notifier_register - tell me when current is being preempted & rescheduled
  * @notifier: notifier struct to register
  */
 void preempt_notifier_register(struct preempt_notifier *notifier)
 {
+       static_key_slow_inc(&preempt_notifier_key);
        hlist_add_head(&notifier->link, &current->preempt_notifiers);
 }
 EXPORT_SYMBOL_GPL(preempt_notifier_register);
@@ -2119,15 +2168,16 @@ EXPORT_SYMBOL_GPL(preempt_notifier_register);
  * preempt_notifier_unregister - no longer interested in preemption notifications
  * @notifier: notifier struct to unregister
  *
- * This is safe to call from within a preemption notifier.
+ * This is *not* safe to call from within a preemption notifier.
  */
 void preempt_notifier_unregister(struct preempt_notifier *notifier)
 {
        hlist_del(&notifier->link);
+       static_key_slow_dec(&preempt_notifier_key);
 }
 EXPORT_SYMBOL_GPL(preempt_notifier_unregister);
 
-static void fire_sched_in_preempt_notifiers(struct task_struct *curr)
+static void __fire_sched_in_preempt_notifiers(struct task_struct *curr)
 {
        struct preempt_notifier *notifier;
 
@@ -2135,9 +2185,15 @@ static void fire_sched_in_preempt_notifiers(struct task_struct *curr)
                notifier->ops->sched_in(notifier, raw_smp_processor_id());
 }
 
+static __always_inline void fire_sched_in_preempt_notifiers(struct task_struct *curr)
+{
+       if (static_key_false(&preempt_notifier_key))
+               __fire_sched_in_preempt_notifiers(curr);
+}
+
 static void
-fire_sched_out_preempt_notifiers(struct task_struct *curr,
-                                struct task_struct *next)
+__fire_sched_out_preempt_notifiers(struct task_struct *curr,
+                                  struct task_struct *next)
 {
        struct preempt_notifier *notifier;
 
@@ -2145,13 +2201,21 @@ fire_sched_out_preempt_notifiers(struct task_struct *curr,
                notifier->ops->sched_out(notifier, next);
 }
 
+static __always_inline void
+fire_sched_out_preempt_notifiers(struct task_struct *curr,
+                                struct task_struct *next)
+{
+       if (static_key_false(&preempt_notifier_key))
+               __fire_sched_out_preempt_notifiers(curr, next);
+}
+
 #else /* !CONFIG_PREEMPT_NOTIFIERS */
 
-static void fire_sched_in_preempt_notifiers(struct task_struct *curr)
+static inline void fire_sched_in_preempt_notifiers(struct task_struct *curr)
 {
 }
 
-static void
+static inline void
 fire_sched_out_preempt_notifiers(struct task_struct *curr,
                                 struct task_struct *next)
 {
@@ -2397,9 +2461,9 @@ unsigned long nr_iowait_cpu(int cpu)
 
 void get_iowait_load(unsigned long *nr_waiters, unsigned long *load)
 {
-       struct rq *this = this_rq();
-       *nr_waiters = atomic_read(&this->nr_iowait);
-       *load = this->cpu_load[0];
+       struct rq *rq = this_rq();
+       *nr_waiters = atomic_read(&rq->nr_iowait);
+       *load = rq->load.weight;
 }
 
 #ifdef CONFIG_SMP
@@ -2497,6 +2561,7 @@ void scheduler_tick(void)
        update_rq_clock(rq);
        curr->sched_class->task_tick(rq, curr, 0);
        update_cpu_load_active(rq);
+       calc_global_load_tick(rq);
        raw_spin_unlock(&rq->lock);
 
        perf_event_task_tick();
@@ -2525,7 +2590,7 @@ void scheduler_tick(void)
 u64 scheduler_tick_max_deferment(void)
 {
        struct rq *rq = this_rq();
-       unsigned long next, now = ACCESS_ONCE(jiffies);
+       unsigned long next, now = READ_ONCE(jiffies);
 
        next = rq->last_sched_tick + HZ;
 
@@ -2726,9 +2791,7 @@ again:
  *          - return from syscall or exception to user-space
  *          - return from interrupt-handler to user-space
  *
- * WARNING: all callers must re-check need_resched() afterward and reschedule
- * accordingly in case an event triggered the need for rescheduling (such as
- * an interrupt waking up a task) while preemption was disabled in __schedule().
+ * WARNING: must be called with preemption disabled!
  */
 static void __sched __schedule(void)
 {
@@ -2737,7 +2800,6 @@ static void __sched __schedule(void)
        struct rq *rq;
        int cpu;
 
-       preempt_disable();
        cpu = smp_processor_id();
        rq = cpu_rq(cpu);
        rcu_note_context_switch();
@@ -2801,8 +2863,6 @@ static void __sched __schedule(void)
                raw_spin_unlock_irq(&rq->lock);
 
        post_schedule(rq);
-
-       sched_preempt_enable_no_resched();
 }
 
 static inline void sched_submit_work(struct task_struct *tsk)
@@ -2823,7 +2883,9 @@ asmlinkage __visible void __sched schedule(void)
 
        sched_submit_work(tsk);
        do {
+               preempt_disable();
                __schedule();
+               sched_preempt_enable_no_resched();
        } while (need_resched());
 }
 EXPORT_SYMBOL(schedule);
@@ -2862,15 +2924,14 @@ void __sched schedule_preempt_disabled(void)
 static void __sched notrace preempt_schedule_common(void)
 {
        do {
-               __preempt_count_add(PREEMPT_ACTIVE);
+               preempt_active_enter();
                __schedule();
-               __preempt_count_sub(PREEMPT_ACTIVE);
+               preempt_active_exit();
 
                /*
                 * Check again in case we missed a preemption opportunity
                 * between schedule and now.
                 */
-               barrier();
        } while (need_resched());
 }
 
@@ -2894,9 +2955,8 @@ asmlinkage __visible void __sched notrace preempt_schedule(void)
 NOKPROBE_SYMBOL(preempt_schedule);
 EXPORT_SYMBOL(preempt_schedule);
 
-#ifdef CONFIG_CONTEXT_TRACKING
 /**
- * preempt_schedule_context - preempt_schedule called by tracing
+ * preempt_schedule_notrace - preempt_schedule called by tracing
  *
  * The tracing infrastructure uses preempt_enable_notrace to prevent
  * recursion and tracing preempt enabling caused by the tracing
@@ -2909,7 +2969,7 @@ EXPORT_SYMBOL(preempt_schedule);
  * instead of preempt_schedule() to exit user context if needed before
  * calling the scheduler.
  */
-asmlinkage __visible void __sched notrace preempt_schedule_context(void)
+asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
 {
        enum ctx_state prev_ctx;
 
@@ -2917,7 +2977,13 @@ asmlinkage __visible void __sched notrace preempt_schedule_context(void)
                return;
 
        do {
-               __preempt_count_add(PREEMPT_ACTIVE);
+               /*
+                * Use raw __prempt_count() ops that don't call function.
+                * We can't call functions before disabling preemption which
+                * disarm preemption tracing recursions.
+                */
+               __preempt_count_add(PREEMPT_ACTIVE + PREEMPT_DISABLE_OFFSET);
+               barrier();
                /*
                 * Needs preempt disabled in case user_exit() is traced
                 * and the tracer calls preempt_enable_notrace() causing
@@ -2927,12 +2993,11 @@ asmlinkage __visible void __sched notrace preempt_schedule_context(void)
                __schedule();
                exception_exit(prev_ctx);
 
-               __preempt_count_sub(PREEMPT_ACTIVE);
                barrier();
+               __preempt_count_sub(PREEMPT_ACTIVE + PREEMPT_DISABLE_OFFSET);
        } while (need_resched());
 }
-EXPORT_SYMBOL_GPL(preempt_schedule_context);
-#endif /* CONFIG_CONTEXT_TRACKING */
+EXPORT_SYMBOL_GPL(preempt_schedule_notrace);
 
 #endif /* CONFIG_PREEMPT */
 
@@ -2952,17 +3017,11 @@ asmlinkage __visible void __sched preempt_schedule_irq(void)
        prev_state = exception_enter();
 
        do {
-               __preempt_count_add(PREEMPT_ACTIVE);
+               preempt_active_enter();
                local_irq_enable();
                __schedule();
                local_irq_disable();
-               __preempt_count_sub(PREEMPT_ACTIVE);
-
-               /*
-                * Check again in case we missed a preemption opportunity
-                * between schedule and now.
-                */
-               barrier();
+               preempt_active_exit();
        } while (need_resched());
 
        exception_exit(prev_state);
@@ -3040,7 +3099,6 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
                if (!dl_prio(p->normal_prio) ||
                    (pi_task && dl_entity_preempt(&pi_task->dl, &p->dl))) {
                        p->dl.dl_boosted = 1;
-                       p->dl.dl_throttled = 0;
                        enqueue_flag = ENQUEUE_REPLENISH;
                } else
                        p->dl.dl_boosted = 0;
@@ -3300,15 +3358,18 @@ static void __setscheduler_params(struct task_struct *p,
 
 /* Actually do priority change: must hold pi & rq lock. */
 static void __setscheduler(struct rq *rq, struct task_struct *p,
-                          const struct sched_attr *attr)
+                          const struct sched_attr *attr, bool keep_boost)
 {
        __setscheduler_params(p, attr);
 
        /*
-        * If we get here, there was no pi waiters boosting the
-        * task. It is safe to use the normal prio.
+        * Keep a potential priority boosting if called from
+        * sched_setscheduler().
         */
-       p->prio = normal_prio(p);
+       if (keep_boost)
+               p->prio = rt_mutex_get_effective_prio(p, normal_prio(p));
+       else
+               p->prio = normal_prio(p);
 
        if (dl_prio(p->prio))
                p->sched_class = &dl_sched_class;
@@ -3408,7 +3469,7 @@ static int __sched_setscheduler(struct task_struct *p,
        int newprio = dl_policy(attr->sched_policy) ? MAX_DL_PRIO - 1 :
                      MAX_RT_PRIO - 1 - attr->sched_priority;
        int retval, oldprio, oldpolicy = -1, queued, running;
-       int policy = attr->sched_policy;
+       int new_effective_prio, policy = attr->sched_policy;
        unsigned long flags;
        const struct sched_class *prev_class;
        struct rq *rq;
@@ -3590,15 +3651,14 @@ change:
        oldprio = p->prio;
 
        /*
-        * Special case for priority boosted tasks.
-        *
-        * If the new priority is lower or equal (user space view)
-        * than the current (boosted) priority, we just store the new
+        * Take priority boosted tasks into account. If the new
+        * effective priority is unchanged, we just store the new
         * normal parameters and do not touch the scheduler class and
         * the runqueue. This will be done when the task deboost
         * itself.
         */
-       if (rt_mutex_check_prio(p, newprio)) {
+       new_effective_prio = rt_mutex_get_effective_prio(p, newprio);
+       if (new_effective_prio == oldprio) {
                __setscheduler_params(p, attr);
                task_rq_unlock(rq, p, &flags);
                return 0;
@@ -3612,7 +3672,7 @@ change:
                put_prev_task(rq, p);
 
        prev_class = p->sched_class;
-       __setscheduler(rq, p, attr);
+       __setscheduler(rq, p, attr, true);
 
        if (running)
                p->sched_class->set_curr_task(rq);
@@ -4387,10 +4447,7 @@ long __sched io_schedule_timeout(long timeout)
        long ret;
 
        current->in_iowait = 1;
-       if (old_iowait)
-               blk_schedule_flush_plug(current);
-       else
-               blk_flush_plug(current);
+       blk_schedule_flush_plug(current);
 
        delayacct_blkio_start();
        rq = raw_rq();
@@ -5315,7 +5372,7 @@ static struct notifier_block migration_notifier = {
        .priority = CPU_PRI_MIGRATION,
 };
 
-static void __cpuinit set_cpu_rq_start_time(void)
+static void set_cpu_rq_start_time(void)
 {
        int cpu = smp_processor_id();
        struct rq *rq = cpu_rq(cpu);
@@ -6997,27 +7054,23 @@ static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action,
        unsigned long flags;
        long cpu = (long)hcpu;
        struct dl_bw *dl_b;
+       bool overflow;
+       int cpus;
 
-       switch (action & ~CPU_TASKS_FROZEN) {
+       switch (action) {
        case CPU_DOWN_PREPARE:
-               /* explicitly allow suspend */
-               if (!(action & CPU_TASKS_FROZEN)) {
-                       bool overflow;
-                       int cpus;
-
-                       rcu_read_lock_sched();
-                       dl_b = dl_bw_of(cpu);
+               rcu_read_lock_sched();
+               dl_b = dl_bw_of(cpu);
 
-                       raw_spin_lock_irqsave(&dl_b->lock, flags);
-                       cpus = dl_bw_cpus(cpu);
-                       overflow = __dl_overflow(dl_b, cpus, 0, 0);
-                       raw_spin_unlock_irqrestore(&dl_b->lock, flags);
+               raw_spin_lock_irqsave(&dl_b->lock, flags);
+               cpus = dl_bw_cpus(cpu);
+               overflow = __dl_overflow(dl_b, cpus, 0, 0);
+               raw_spin_unlock_irqrestore(&dl_b->lock, flags);
 
-                       rcu_read_unlock_sched();
+               rcu_read_unlock_sched();
 
-                       if (overflow)
-                               return notifier_from_errno(-EBUSY);
-               }
+               if (overflow)
+                       return notifier_from_errno(-EBUSY);
                cpuset_update_active_cpus(false);
                break;
        case CPU_DOWN_PREPARE_FROZEN:
@@ -7346,7 +7399,7 @@ static void normalize_task(struct rq *rq, struct task_struct *p)
        queued = task_on_rq_queued(p);
        if (queued)
                dequeue_task(rq, p, 0);
-       __setscheduler(rq, p, &attr);
+       __setscheduler(rq, p, &attr, false);
        if (queued) {
                enqueue_task(rq, p, 0);
                resched_curr(rq);
@@ -7739,11 +7792,11 @@ static long sched_group_rt_runtime(struct task_group *tg)
        return rt_runtime_us;
 }
 
-static int sched_group_set_rt_period(struct task_group *tg, long rt_period_us)
+static int sched_group_set_rt_period(struct task_group *tg, u64 rt_period_us)
 {
        u64 rt_runtime, rt_period;
 
-       rt_period = (u64)rt_period_us * NSEC_PER_USEC;
+       rt_period = rt_period_us * NSEC_PER_USEC;
        rt_runtime = tg->rt_bandwidth.rt_runtime;
 
        return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
index 8394b1e..f5a64ff 100644 (file)
@@ -567,7 +567,7 @@ static void cputime_advance(cputime_t *counter, cputime_t new)
 {
        cputime_t old;
 
-       while (new > (old = ACCESS_ONCE(*counter)))
+       while (new > (old = READ_ONCE(*counter)))
                cmpxchg_cputime(counter, old, new);
 }
 
index 5e95145..392e8fb 100644 (file)
@@ -640,7 +640,7 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se)
 }
 
 static
-int dl_runtime_exceeded(struct rq *rq, struct sched_dl_entity *dl_se)
+int dl_runtime_exceeded(struct sched_dl_entity *dl_se)
 {
        return (dl_se->runtime <= 0);
 }
@@ -684,7 +684,7 @@ static void update_curr_dl(struct rq *rq)
        sched_rt_avg_update(rq, delta_exec);
 
        dl_se->runtime -= dl_se->dl_yielded ? 0 : delta_exec;
-       if (dl_runtime_exceeded(rq, dl_se)) {
+       if (dl_runtime_exceeded(dl_se)) {
                dl_se->dl_throttled = 1;
                __dequeue_task_dl(rq, curr, 0);
                if (unlikely(!start_dl_timer(dl_se, curr->dl.dl_boosted)))
@@ -995,7 +995,7 @@ select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags)
        rq = cpu_rq(cpu);
 
        rcu_read_lock();
-       curr = ACCESS_ONCE(rq->curr); /* unlocked access */
+       curr = READ_ONCE(rq->curr); /* unlocked access */
 
        /*
         * If we are dealing with a -deadline task, we must
@@ -1012,7 +1012,9 @@ select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags)
            (p->nr_cpus_allowed > 1)) {
                int target = find_later_rq(p);
 
-               if (target != -1)
+               if (target != -1 &&
+                               dl_time_before(p->dl.deadline,
+                                       cpu_rq(target)->dl.earliest_dl.curr))
                        cpu = target;
        }
        rcu_read_unlock();
@@ -1230,6 +1232,32 @@ next_node:
        return NULL;
 }
 
+/*
+ * Return the earliest pushable rq's task, which is suitable to be executed
+ * on the CPU, NULL otherwise:
+ */
+static struct task_struct *pick_earliest_pushable_dl_task(struct rq *rq, int cpu)
+{
+       struct rb_node *next_node = rq->dl.pushable_dl_tasks_leftmost;
+       struct task_struct *p = NULL;
+
+       if (!has_pushable_dl_tasks(rq))
+               return NULL;
+
+next_node:
+       if (next_node) {
+               p = rb_entry(next_node, struct task_struct, pushable_dl_tasks);
+
+               if (pick_dl_task(rq, p, cpu))
+                       return p;
+
+               next_node = rb_next(next_node);
+               goto next_node;
+       }
+
+       return NULL;
+}
+
 static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl);
 
 static int find_later_rq(struct task_struct *task)
@@ -1333,6 +1361,17 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
 
                later_rq = cpu_rq(cpu);
 
+               if (!dl_time_before(task->dl.deadline,
+                                       later_rq->dl.earliest_dl.curr)) {
+                       /*
+                        * Target rq has tasks of equal or earlier deadline,
+                        * retrying does not release any lock and is unlikely
+                        * to yield a different result.
+                        */
+                       later_rq = NULL;
+                       break;
+               }
+
                /* Retry if something changed. */
                if (double_lock_balance(rq, later_rq)) {
                        if (unlikely(task_rq(task) != rq ||
@@ -1514,7 +1553,7 @@ static int pull_dl_task(struct rq *this_rq)
                if (src_rq->dl.dl_nr_running <= 1)
                        goto skip;
 
-               p = pick_next_earliest_dl_task(src_rq, this_cpu);
+               p = pick_earliest_pushable_dl_task(src_rq, this_cpu);
 
                /*
                 * We found a task to be pulled if:
@@ -1659,7 +1698,7 @@ static void rq_offline_dl(struct rq *rq)
        cpudl_clear_freecpu(&rq->rd->cpudl, rq->cpu);
 }
 
-void init_sched_dl_class(void)
+void __init init_sched_dl_class(void)
 {
        unsigned int i;
 
index a245c1f..704683c 100644 (file)
@@ -132,12 +132,14 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
                p->prio);
 #ifdef CONFIG_SCHEDSTATS
        SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
-               SPLIT_NS(p->se.vruntime),
+               SPLIT_NS(p->se.statistics.wait_sum),
                SPLIT_NS(p->se.sum_exec_runtime),
                SPLIT_NS(p->se.statistics.sum_sleep_runtime));
 #else
-       SEQ_printf(m, "%15Ld %15Ld %15Ld.%06ld %15Ld.%06ld %15Ld.%06ld",
-               0LL, 0LL, 0LL, 0L, 0LL, 0L, 0LL, 0L);
+       SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
+               0LL, 0L,
+               SPLIT_NS(p->se.sum_exec_runtime),
+               0LL, 0L);
 #endif
 #ifdef CONFIG_NUMA_BALANCING
        SEQ_printf(m, " %d", task_node(p));
@@ -156,7 +158,7 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
        SEQ_printf(m,
        "\nrunnable tasks:\n"
        "            task   PID         tree-key  switches  prio"
-       "     exec-runtime         sum-exec        sum-sleep\n"
+       "     wait-time             sum-exec        sum-sleep\n"
        "------------------------------------------------------"
        "----------------------------------------------------\n");
 
@@ -582,6 +584,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
        nr_switches = p->nvcsw + p->nivcsw;
 
 #ifdef CONFIG_SCHEDSTATS
+       PN(se.statistics.sum_sleep_runtime);
        PN(se.statistics.wait_start);
        PN(se.statistics.sleep_start);
        PN(se.statistics.block_start);
index ffeaa41..433061d 100644 (file)
@@ -141,9 +141,9 @@ static inline void update_load_set(struct load_weight *lw, unsigned long w)
  *
  * This idea comes from the SD scheduler of Con Kolivas:
  */
-static int get_update_sysctl_factor(void)
+static unsigned int get_update_sysctl_factor(void)
 {
-       unsigned int cpus = min_t(int, num_online_cpus(), 8);
+       unsigned int cpus = min_t(unsigned int, num_online_cpus(), 8);
        unsigned int factor;
 
        switch (sysctl_sched_tunable_scaling) {
@@ -576,7 +576,7 @@ int sched_proc_update_handler(struct ctl_table *table, int write,
                loff_t *ppos)
 {
        int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
-       int factor = get_update_sysctl_factor();
+       unsigned int factor = get_update_sysctl_factor();
 
        if (ret || !write)
                return ret;
@@ -834,7 +834,7 @@ static unsigned int task_nr_scan_windows(struct task_struct *p)
 
 static unsigned int task_scan_min(struct task_struct *p)
 {
-       unsigned int scan_size = ACCESS_ONCE(sysctl_numa_balancing_scan_size);
+       unsigned int scan_size = READ_ONCE(sysctl_numa_balancing_scan_size);
        unsigned int scan, floor;
        unsigned int windows = 1;
 
@@ -1198,11 +1198,9 @@ static void task_numa_assign(struct task_numa_env *env,
 static bool load_too_imbalanced(long src_load, long dst_load,
                                struct task_numa_env *env)
 {
+       long imb, old_imb;
+       long orig_src_load, orig_dst_load;
        long src_capacity, dst_capacity;
-       long orig_src_load;
-       long load_a, load_b;
-       long moved_load;
-       long imb;
 
        /*
         * The load is corrected for the CPU capacity available on each node.
@@ -1215,39 +1213,30 @@ static bool load_too_imbalanced(long src_load, long dst_load,
        dst_capacity = env->dst_stats.compute_capacity;
 
        /* We care about the slope of the imbalance, not the direction. */
-       load_a = dst_load;
-       load_b = src_load;
-       if (load_a < load_b)
-               swap(load_a, load_b);
+       if (dst_load < src_load)
+               swap(dst_load, src_load);
 
        /* Is the difference below the threshold? */
-       imb = load_a * src_capacity * 100 -
-               load_b * dst_capacity * env->imbalance_pct;
+       imb = dst_load * src_capacity * 100 -
+             src_load * dst_capacity * env->imbalance_pct;
        if (imb <= 0)
                return false;
 
        /*
         * The imbalance is above the allowed threshold.
-        * Allow a move that brings us closer to a balanced situation,
-        * without moving things past the point of balance.
+        * Compare it with the old imbalance.
         */
        orig_src_load = env->src_stats.load;
+       orig_dst_load = env->dst_stats.load;
 
-       /*
-        * In a task swap, there will be one load moving from src to dst,
-        * and another moving back. This is the net sum of both moves.
-        * A simple task move will always have a positive value.
-        * Allow the move if it brings the system closer to a balanced
-        * situation, without crossing over the balance point.
-        */
-       moved_load = orig_src_load - src_load;
+       if (orig_dst_load < orig_src_load)
+               swap(orig_dst_load, orig_src_load);
 
-       if (moved_load > 0)
-               /* Moving src -> dst. Did we overshoot balance? */
-               return src_load * dst_capacity < dst_load * src_capacity;
-       else
-               /* Moving dst -> src. Did we overshoot balance? */
-               return dst_load * src_capacity < src_load * dst_capacity;
+       old_imb = orig_dst_load * src_capacity * 100 -
+                 orig_src_load * dst_capacity * env->imbalance_pct;
+
+       /* Would this change make things worse? */
+       return (imb > old_imb);
 }
 
 /*
@@ -1409,6 +1398,30 @@ static void task_numa_find_cpu(struct task_numa_env *env,
        }
 }
 
+/* Only move tasks to a NUMA node less busy than the current node. */
+static bool numa_has_capacity(struct task_numa_env *env)
+{
+       struct numa_stats *src = &env->src_stats;
+       struct numa_stats *dst = &env->dst_stats;
+
+       if (src->has_free_capacity && !dst->has_free_capacity)
+               return false;
+
+       /*
+        * Only consider a task move if the source has a higher load
+        * than the destination, corrected for CPU capacity on each node.
+        *
+        *      src->load                dst->load
+        * --------------------- vs ---------------------
+        * src->compute_capacity    dst->compute_capacity
+        */
+       if (src->load * dst->compute_capacity >
+           dst->load * src->compute_capacity)
+               return true;
+
+       return false;
+}
+
 static int task_numa_migrate(struct task_struct *p)
 {
        struct task_numa_env env = {
@@ -1463,7 +1476,8 @@ static int task_numa_migrate(struct task_struct *p)
        update_numa_stats(&env.dst_stats, env.dst_nid);
 
        /* Try to find a spot on the preferred nid. */
-       task_numa_find_cpu(&env, taskimp, groupimp);
+       if (numa_has_capacity(&env))
+               task_numa_find_cpu(&env, taskimp, groupimp);
 
        /*
         * Look at other nodes in these cases:
@@ -1494,7 +1508,8 @@ static int task_numa_migrate(struct task_struct *p)
                        env.dist = dist;
                        env.dst_nid = nid;
                        update_numa_stats(&env.dst_stats, env.dst_nid);
-                       task_numa_find_cpu(&env, taskimp, groupimp);
+                       if (numa_has_capacity(&env))
+                               task_numa_find_cpu(&env, taskimp, groupimp);
                }
        }
 
@@ -1794,7 +1809,12 @@ static void task_numa_placement(struct task_struct *p)
        u64 runtime, period;
        spinlock_t *group_lock = NULL;
 
-       seq = ACCESS_ONCE(p->mm->numa_scan_seq);
+       /*
+        * The p->mm->numa_scan_seq field gets updated without
+        * exclusive access. Use READ_ONCE() here to ensure
+        * that the field is read in a single access:
+        */
+       seq = READ_ONCE(p->mm->numa_scan_seq);
        if (p->numa_scan_seq == seq)
                return;
        p->numa_scan_seq = seq;
@@ -1938,7 +1958,7 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags,
        }
 
        rcu_read_lock();
-       tsk = ACCESS_ONCE(cpu_rq(cpu)->curr);
+       tsk = READ_ONCE(cpu_rq(cpu)->curr);
 
        if (!cpupid_match_pid(tsk, cpupid))
                goto no_join;
@@ -2107,7 +2127,15 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags)
 
 static void reset_ptenuma_scan(struct task_struct *p)
 {
-       ACCESS_ONCE(p->mm->numa_scan_seq)++;
+       /*
+        * We only did a read acquisition of the mmap sem, so
+        * p->mm->numa_scan_seq is written to without exclusive access
+        * and the update is not guaranteed to be atomic. That's not
+        * much of an issue though, since this is just used for
+        * statistical sampling. Use READ_ONCE/WRITE_ONCE, which are not
+        * expensive, to avoid any form of compiler optimizations:
+        */
+       WRITE_ONCE(p->mm->numa_scan_seq, READ_ONCE(p->mm->numa_scan_seq) + 1);
        p->mm->numa_scan_offset = 0;
 }
 
@@ -2181,7 +2209,7 @@ void task_numa_work(struct callback_head *work)
        }
        for (; vma; vma = vma->vm_next) {
                if (!vma_migratable(vma) || !vma_policy_mof(vma) ||
-                       is_vm_hugetlb_page(vma)) {
+                       is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_MIXEDMAP)) {
                        continue;
                }
 
@@ -4323,6 +4351,189 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 }
 
 #ifdef CONFIG_SMP
+
+/*
+ * per rq 'load' arrray crap; XXX kill this.
+ */
+
+/*
+ * The exact cpuload at various idx values, calculated at every tick would be
+ * load = (2^idx - 1) / 2^idx * load + 1 / 2^idx * cur_load
+ *
+ * If a cpu misses updates for n-1 ticks (as it was idle) and update gets called
+ * on nth tick when cpu may be busy, then we have:
+ * load = ((2^idx - 1) / 2^idx)^(n-1) * load
+ * load = (2^idx - 1) / 2^idx) * load + 1 / 2^idx * cur_load
+ *
+ * decay_load_missed() below does efficient calculation of
+ * load = ((2^idx - 1) / 2^idx)^(n-1) * load
+ * avoiding 0..n-1 loop doing load = ((2^idx - 1) / 2^idx) * load
+ *
+ * The calculation is approximated on a 128 point scale.
+ * degrade_zero_ticks is the number of ticks after which load at any
+ * particular idx is approximated to be zero.
+ * degrade_factor is a precomputed table, a row for each load idx.
+ * Each column corresponds to degradation factor for a power of two ticks,
+ * based on 128 point scale.
+ * Example:
+ * row 2, col 3 (=12) says that the degradation at load idx 2 after
+ * 8 ticks is 12/128 (which is an approximation of exact factor 3^8/4^8).
+ *
+ * With this power of 2 load factors, we can degrade the load n times
+ * by looking at 1 bits in n and doing as many mult/shift instead of
+ * n mult/shifts needed by the exact degradation.
+ */
+#define DEGRADE_SHIFT          7
+static const unsigned char
+               degrade_zero_ticks[CPU_LOAD_IDX_MAX] = {0, 8, 32, 64, 128};
+static const unsigned char
+               degrade_factor[CPU_LOAD_IDX_MAX][DEGRADE_SHIFT + 1] = {
+                                       {0, 0, 0, 0, 0, 0, 0, 0},
+                                       {64, 32, 8, 0, 0, 0, 0, 0},
+                                       {96, 72, 40, 12, 1, 0, 0},
+                                       {112, 98, 75, 43, 15, 1, 0},
+                                       {120, 112, 98, 76, 45, 16, 2} };
+
+/*
+ * Update cpu_load for any missed ticks, due to tickless idle. The backlog
+ * would be when CPU is idle and so we just decay the old load without
+ * adding any new load.
+ */
+static unsigned long
+decay_load_missed(unsigned long load, unsigned long missed_updates, int idx)
+{
+       int j = 0;
+
+       if (!missed_updates)
+               return load;
+
+       if (missed_updates >= degrade_zero_ticks[idx])
+               return 0;
+
+       if (idx == 1)
+               return load >> missed_updates;
+
+       while (missed_updates) {
+               if (missed_updates % 2)
+                       load = (load * degrade_factor[idx][j]) >> DEGRADE_SHIFT;
+
+               missed_updates >>= 1;
+               j++;
+       }
+       return load;
+}
+
+/*
+ * Update rq->cpu_load[] statistics. This function is usually called every
+ * scheduler tick (TICK_NSEC). With tickless idle this will not be called
+ * every tick. We fix it up based on jiffies.
+ */
+static void __update_cpu_load(struct rq *this_rq, unsigned long this_load,
+                             unsigned long pending_updates)
+{
+       int i, scale;
+
+       this_rq->nr_load_updates++;
+
+       /* Update our load: */
+       this_rq->cpu_load[0] = this_load; /* Fasttrack for idx 0 */
+       for (i = 1, scale = 2; i < CPU_LOAD_IDX_MAX; i++, scale += scale) {
+               unsigned long old_load, new_load;
+
+               /* scale is effectively 1 << i now, and >> i divides by scale */
+
+               old_load = this_rq->cpu_load[i];
+               old_load = decay_load_missed(old_load, pending_updates - 1, i);
+               new_load = this_load;
+               /*
+                * Round up the averaging division if load is increasing. This
+                * prevents us from getting stuck on 9 if the load is 10, for
+                * example.
+                */
+               if (new_load > old_load)
+                       new_load += scale - 1;
+
+               this_rq->cpu_load[i] = (old_load * (scale - 1) + new_load) >> i;
+       }
+
+       sched_avg_update(this_rq);
+}
+
+#ifdef CONFIG_NO_HZ_COMMON
+/*
+ * There is no sane way to deal with nohz on smp when using jiffies because the
+ * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading
+ * causing off-by-one errors in observed deltas; {0,2} instead of {1,1}.
+ *
+ * Therefore we cannot use the delta approach from the regular tick since that
+ * would seriously skew the load calculation. However we'll make do for those
+ * updates happening while idle (nohz_idle_balance) or coming out of idle
+ * (tick_nohz_idle_exit).
+ *
+ * This means we might still be one tick off for nohz periods.
+ */
+
+/*
+ * Called from nohz_idle_balance() to update the load ratings before doing the
+ * idle balance.
+ */
+static void update_idle_cpu_load(struct rq *this_rq)
+{
+       unsigned long curr_jiffies = READ_ONCE(jiffies);
+       unsigned long load = this_rq->cfs.runnable_load_avg;
+       unsigned long pending_updates;
+
+       /*
+        * bail if there's load or we're actually up-to-date.
+        */
+       if (load || curr_jiffies == this_rq->last_load_update_tick)
+               return;
+
+       pending_updates = curr_jiffies - this_rq->last_load_update_tick;
+       this_rq->last_load_update_tick = curr_jiffies;
+
+       __update_cpu_load(this_rq, load, pending_updates);
+}
+
+/*
+ * Called from tick_nohz_idle_exit() -- try and fix up the ticks we missed.
+ */
+void update_cpu_load_nohz(void)
+{
+       struct rq *this_rq = this_rq();
+       unsigned long curr_jiffies = READ_ONCE(jiffies);
+       unsigned long pending_updates;
+
+       if (curr_jiffies == this_rq->last_load_update_tick)
+               return;
+
+       raw_spin_lock(&this_rq->lock);
+       pending_updates = curr_jiffies - this_rq->last_load_update_tick;
+       if (pending_updates) {
+               this_rq->last_load_update_tick = curr_jiffies;
+               /*
+                * We were idle, this means load 0, the current load might be
+                * !0 due to remote wakeups and the sort.
+                */
+               __update_cpu_load(this_rq, 0, pending_updates);
+       }
+       raw_spin_unlock(&this_rq->lock);
+}
+#endif /* CONFIG_NO_HZ */
+
+/*
+ * Called from scheduler_tick()
+ */
+void update_cpu_load_active(struct rq *this_rq)
+{
+       unsigned long load = this_rq->cfs.runnable_load_avg;
+       /*
+        * See the mess around update_idle_cpu_load() / update_cpu_load_nohz().
+        */
+       this_rq->last_load_update_tick = jiffies;
+       __update_cpu_load(this_rq, load, 1);
+}
+
 /* Used instead of source_load when we know the type == 0 */
 static unsigned long weighted_cpuload(const int cpu)
 {
@@ -4375,7 +4586,7 @@ static unsigned long capacity_orig_of(int cpu)
 static unsigned long cpu_avg_load_per_task(int cpu)
 {
        struct rq *rq = cpu_rq(cpu);
-       unsigned long nr_running = ACCESS_ONCE(rq->cfs.h_nr_running);
+       unsigned long nr_running = READ_ONCE(rq->cfs.h_nr_running);
        unsigned long load_avg = rq->cfs.runnable_load_avg;
 
        if (nr_running)
@@ -5126,18 +5337,21 @@ again:
                 * entity, update_curr() will update its vruntime, otherwise
                 * forget we've ever seen it.
                 */
-               if (curr && curr->on_rq)
-                       update_curr(cfs_rq);
-               else
-                       curr = NULL;
+               if (curr) {
+                       if (curr->on_rq)
+                               update_curr(cfs_rq);
+                       else
+                               curr = NULL;
 
-               /*
-                * This call to check_cfs_rq_runtime() will do the throttle and
-                * dequeue its entity in the parent(s). Therefore the 'simple'
-                * nr_running test will indeed be correct.
-                */
-               if (unlikely(check_cfs_rq_runtime(cfs_rq)))
-                       goto simple;
+                       /*
+                        * This call to check_cfs_rq_runtime() will do the
+                        * throttle and dequeue its entity in the parent(s).
+                        * Therefore the 'simple' nr_running test will indeed
+                        * be correct.
+                        */
+                       if (unlikely(check_cfs_rq_runtime(cfs_rq)))
+                               goto simple;
+               }
 
                se = pick_next_entity(cfs_rq, curr);
                cfs_rq = group_cfs_rq(se);
@@ -5467,10 +5681,15 @@ static int task_hot(struct task_struct *p, struct lb_env *env)
 }
 
 #ifdef CONFIG_NUMA_BALANCING
-/* Returns true if the destination node has incurred more faults */
+/*
+ * Returns true if the destination node is the preferred node.
+ * Needs to match fbq_classify_rq(): if there is a runnable task
+ * that is not on its preferred node, we should identify it.
+ */
 static bool migrate_improves_locality(struct task_struct *p, struct lb_env *env)
 {
        struct numa_group *numa_group = rcu_dereference(p->numa_group);
+       unsigned long src_faults, dst_faults;
        int src_nid, dst_nid;
 
        if (!sched_feat(NUMA_FAVOUR_HIGHER) || !p->numa_faults ||
@@ -5484,29 +5703,30 @@ static bool migrate_improves_locality(struct task_struct *p, struct lb_env *env)
        if (src_nid == dst_nid)
                return false;
 
-       if (numa_group) {
-               /* Task is already in the group's interleave set. */
-               if (node_isset(src_nid, numa_group->active_nodes))
-                       return false;
-
-               /* Task is moving into the group's interleave set. */
-               if (node_isset(dst_nid, numa_group->active_nodes))
-                       return true;
-
-               return group_faults(p, dst_nid) > group_faults(p, src_nid);
-       }
-
        /* Encourage migration to the preferred node. */
        if (dst_nid == p->numa_preferred_nid)
                return true;
 
-       return task_faults(p, dst_nid) > task_faults(p, src_nid);
+       /* Migrating away from the preferred node is bad. */
+       if (src_nid == p->numa_preferred_nid)
+               return false;
+
+       if (numa_group) {
+               src_faults = group_faults(p, src_nid);
+               dst_faults = group_faults(p, dst_nid);
+       } else {
+               src_faults = task_faults(p, src_nid);
+               dst_faults = task_faults(p, dst_nid);
+       }
+
+       return dst_faults > src_faults;
 }
 
 
 static bool migrate_degrades_locality(struct task_struct *p, struct lb_env *env)
 {
        struct numa_group *numa_group = rcu_dereference(p->numa_group);
+       unsigned long src_faults, dst_faults;
        int src_nid, dst_nid;
 
        if (!sched_feat(NUMA) || !sched_feat(NUMA_RESIST_LOWER))
@@ -5521,23 +5741,23 @@ static bool migrate_degrades_locality(struct task_struct *p, struct lb_env *env)
        if (src_nid == dst_nid)
                return false;
 
-       if (numa_group) {
-               /* Task is moving within/into the group's interleave set. */
-               if (node_isset(dst_nid, numa_group->active_nodes))
-                       return false;
+       /* Migrating away from the preferred node is bad. */
+       if (src_nid == p->numa_preferred_nid)
+               return true;
 
-               /* Task is moving out of the group's interleave set. */
-               if (node_isset(src_nid, numa_group->active_nodes))
-                       return true;
+       /* Encourage migration to the preferred node. */
+       if (dst_nid == p->numa_preferred_nid)
+               return false;
 
-               return group_faults(p, dst_nid) < group_faults(p, src_nid);
+       if (numa_group) {
+               src_faults = group_faults(p, src_nid);
+               dst_faults = group_faults(p, dst_nid);
+       } else {
+               src_faults = task_faults(p, src_nid);
+               dst_faults = task_faults(p, dst_nid);
        }
 
-       /* Migrating away from the preferred node is always bad. */
-       if (src_nid == p->numa_preferred_nid)
-               return true;
-
-       return task_faults(p, dst_nid) < task_faults(p, src_nid);
+       return dst_faults < src_faults;
 }
 
 #else
@@ -6037,8 +6257,8 @@ static unsigned long scale_rt_capacity(int cpu)
         * Since we're reading these variables without serialization make sure
         * we read them once before doing sanity checks on them.
         */
-       age_stamp = ACCESS_ONCE(rq->age_stamp);
-       avg = ACCESS_ONCE(rq->rt_avg);
+       age_stamp = READ_ONCE(rq->age_stamp);
+       avg = READ_ONCE(rq->rt_avg);
        delta = __rq_clock_broken(rq) - age_stamp;
 
        if (unlikely(delta < 0))
similarity index 62%
rename from kernel/sched/proc.c
rename to kernel/sched/loadavg.c
index 8ecd552..ef71590 100644 (file)
@@ -1,7 +1,9 @@
 /*
- *  kernel/sched/proc.c
+ * kernel/sched/loadavg.c
  *
- *  Kernel load calculations, forked from sched/core.c
+ * This file contains the magic bits required to compute the global loadavg
+ * figure. Its a silly number but people think its important. We go through
+ * great pains to make it work on big machines and tickless kernels.
  */
 
 #include <linux/export.h>
@@ -81,7 +83,7 @@ long calc_load_fold_active(struct rq *this_rq)
        long nr_active, delta = 0;
 
        nr_active = this_rq->nr_running;
-       nr_active += (long) this_rq->nr_uninterruptible;
+       nr_active += (long)this_rq->nr_uninterruptible;
 
        if (nr_active != this_rq->calc_load_active) {
                delta = nr_active - this_rq->calc_load_active;
@@ -186,6 +188,7 @@ void calc_load_enter_idle(void)
        delta = calc_load_fold_active(this_rq);
        if (delta) {
                int idx = calc_load_write_idx();
+
                atomic_long_add(delta, &calc_load_idle[idx]);
        }
 }
@@ -241,18 +244,20 @@ fixed_power_int(unsigned long x, unsigned int frac_bits, unsigned int n)
 {
        unsigned long result = 1UL << frac_bits;
 
-       if (n) for (;;) {
-               if (n & 1) {
-                       result *= x;
-                       result += 1UL << (frac_bits - 1);
-                       result >>= frac_bits;
+       if (n) {
+               for (;;) {
+                       if (n & 1) {
+                               result *= x;
+                               result += 1UL << (frac_bits - 1);
+                               result >>= frac_bits;
+                       }
+                       n >>= 1;
+                       if (!n)
+                               break;
+                       x *= x;
+                       x += 1UL << (frac_bits - 1);
+                       x >>= frac_bits;
                }
-               n >>= 1;
-               if (!n)
-                       break;
-               x *= x;
-               x += 1UL << (frac_bits - 1);
-               x >>= frac_bits;
        }
 
        return result;
@@ -285,7 +290,6 @@ static unsigned long
 calc_load_n(unsigned long load, unsigned long exp,
            unsigned long active, unsigned int n)
 {
-
        return calc_load(load, fixed_power_int(exp, FSHIFT, n), active);
 }
 
@@ -339,6 +343,8 @@ static inline void calc_global_nohz(void) { }
 /*
  * calc_load - update the avenrun load estimates 10 ticks after the
  * CPUs have updated calc_load_tasks.
+ *
+ * Called from the global timer code.
  */
 void calc_global_load(unsigned long ticks)
 {
@@ -370,10 +376,10 @@ void calc_global_load(unsigned long ticks)
 }
 
 /*
- * Called from update_cpu_load() to periodically update this CPU's
+ * Called from scheduler_tick() to periodically update this CPU's
  * active count.
  */
-static void calc_load_account_active(struct rq *this_rq)
+void calc_global_load_tick(struct rq *this_rq)
 {
        long delta;
 
@@ -386,199 +392,3 @@ static void calc_load_account_active(struct rq *this_rq)
 
        this_rq->calc_load_update += LOAD_FREQ;
 }
-
-/*
- * End of global load-average stuff
- */
-
-/*
- * The exact cpuload at various idx values, calculated at every tick would be
- * load = (2^idx - 1) / 2^idx * load + 1 / 2^idx * cur_load
- *
- * If a cpu misses updates for n-1 ticks (as it was idle) and update gets called
- * on nth tick when cpu may be busy, then we have:
- * load = ((2^idx - 1) / 2^idx)^(n-1) * load
- * load = (2^idx - 1) / 2^idx) * load + 1 / 2^idx * cur_load
- *
- * decay_load_missed() below does efficient calculation of
- * load = ((2^idx - 1) / 2^idx)^(n-1) * load
- * avoiding 0..n-1 loop doing load = ((2^idx - 1) / 2^idx) * load
- *
- * The calculation is approximated on a 128 point scale.
- * degrade_zero_ticks is the number of ticks after which load at any
- * particular idx is approximated to be zero.
- * degrade_factor is a precomputed table, a row for each load idx.
- * Each column corresponds to degradation factor for a power of two ticks,
- * based on 128 point scale.
- * Example:
- * row 2, col 3 (=12) says that the degradation at load idx 2 after
- * 8 ticks is 12/128 (which is an approximation of exact factor 3^8/4^8).
- *
- * With this power of 2 load factors, we can degrade the load n times
- * by looking at 1 bits in n and doing as many mult/shift instead of
- * n mult/shifts needed by the exact degradation.
- */
-#define DEGRADE_SHIFT          7
-static const unsigned char
-               degrade_zero_ticks[CPU_LOAD_IDX_MAX] = {0, 8, 32, 64, 128};
-static const unsigned char
-               degrade_factor[CPU_LOAD_IDX_MAX][DEGRADE_SHIFT + 1] = {
-                                       {0, 0, 0, 0, 0, 0, 0, 0},
-                                       {64, 32, 8, 0, 0, 0, 0, 0},
-                                       {96, 72, 40, 12, 1, 0, 0},
-                                       {112, 98, 75, 43, 15, 1, 0},
-                                       {120, 112, 98, 76, 45, 16, 2} };
-
-/*
- * Update cpu_load for any missed ticks, due to tickless idle. The backlog
- * would be when CPU is idle and so we just decay the old load without
- * adding any new load.
- */
-static unsigned long
-decay_load_missed(unsigned long load, unsigned long missed_updates, int idx)
-{
-       int j = 0;
-
-       if (!missed_updates)
-               return load;
-
-       if (missed_updates >= degrade_zero_ticks[idx])
-               return 0;
-
-       if (idx == 1)
-               return load >> missed_updates;
-
-       while (missed_updates) {
-               if (missed_updates % 2)
-                       load = (load * degrade_factor[idx][j]) >> DEGRADE_SHIFT;
-
-               missed_updates >>= 1;
-               j++;
-       }
-       return load;
-}
-
-/*
- * Update rq->cpu_load[] statistics. This function is usually called every
- * scheduler tick (TICK_NSEC). With tickless idle this will not be called
- * every tick. We fix it up based on jiffies.
- */
-static void __update_cpu_load(struct rq *this_rq, unsigned long this_load,
-                             unsigned long pending_updates)
-{
-       int i, scale;
-
-       this_rq->nr_load_updates++;
-
-       /* Update our load: */
-       this_rq->cpu_load[0] = this_load; /* Fasttrack for idx 0 */
-       for (i = 1, scale = 2; i < CPU_LOAD_IDX_MAX; i++, scale += scale) {
-               unsigned long old_load, new_load;
-
-               /* scale is effectively 1 << i now, and >> i divides by scale */
-
-               old_load = this_rq->cpu_load[i];
-               old_load = decay_load_missed(old_load, pending_updates - 1, i);
-               new_load = this_load;
-               /*
-                * Round up the averaging division if load is increasing. This
-                * prevents us from getting stuck on 9 if the load is 10, for
-                * example.
-                */
-               if (new_load > old_load)
-                       new_load += scale - 1;
-
-               this_rq->cpu_load[i] = (old_load * (scale - 1) + new_load) >> i;
-       }
-
-       sched_avg_update(this_rq);
-}
-
-#ifdef CONFIG_SMP
-static inline unsigned long get_rq_runnable_load(struct rq *rq)
-{
-       return rq->cfs.runnable_load_avg;
-}
-#else
-static inline unsigned long get_rq_runnable_load(struct rq *rq)
-{
-       return rq->load.weight;
-}
-#endif
-
-#ifdef CONFIG_NO_HZ_COMMON
-/*
- * There is no sane way to deal with nohz on smp when using jiffies because the
- * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading
- * causing off-by-one errors in observed deltas; {0,2} instead of {1,1}.
- *
- * Therefore we cannot use the delta approach from the regular tick since that
- * would seriously skew the load calculation. However we'll make do for those
- * updates happening while idle (nohz_idle_balance) or coming out of idle
- * (tick_nohz_idle_exit).
- *
- * This means we might still be one tick off for nohz periods.
- */
-
-/*
- * Called from nohz_idle_balance() to update the load ratings before doing the
- * idle balance.
- */
-void update_idle_cpu_load(struct rq *this_rq)
-{
-       unsigned long curr_jiffies = ACCESS_ONCE(jiffies);
-       unsigned long load = get_rq_runnable_load(this_rq);
-       unsigned long pending_updates;
-
-       /*
-        * bail if there's load or we're actually up-to-date.
-        */
-       if (load || curr_jiffies == this_rq->last_load_update_tick)
-               return;
-
-       pending_updates = curr_jiffies - this_rq->last_load_update_tick;
-       this_rq->last_load_update_tick = curr_jiffies;
-
-       __update_cpu_load(this_rq, load, pending_updates);
-}
-
-/*
- * Called from tick_nohz_idle_exit() -- try and fix up the ticks we missed.
- */
-void update_cpu_load_nohz(void)
-{
-       struct rq *this_rq = this_rq();
-       unsigned long curr_jiffies = ACCESS_ONCE(jiffies);
-       unsigned long pending_updates;
-
-       if (curr_jiffies == this_rq->last_load_update_tick)
-               return;
-
-       raw_spin_lock(&this_rq->lock);
-       pending_updates = curr_jiffies - this_rq->last_load_update_tick;
-       if (pending_updates) {
-               this_rq->last_load_update_tick = curr_jiffies;
-               /*
-                * We were idle, this means load 0, the current load might be
-                * !0 due to remote wakeups and the sort.
-                */
-               __update_cpu_load(this_rq, 0, pending_updates);
-       }
-       raw_spin_unlock(&this_rq->lock);
-}
-#endif /* CONFIG_NO_HZ */
-
-/*
- * Called from scheduler_tick()
- */
-void update_cpu_load_active(struct rq *this_rq)
-{
-       unsigned long load = get_rq_runnable_load(this_rq);
-       /*
-        * See the mess around update_idle_cpu_load() / update_cpu_load_nohz().
-        */
-       this_rq->last_load_update_tick = jiffies;
-       __update_cpu_load(this_rq, load, 1);
-
-       calc_load_account_active(this_rq);
-}
index 575da76..560d2fa 100644 (file)
@@ -1323,7 +1323,7 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
        rq = cpu_rq(cpu);
 
        rcu_read_lock();
-       curr = ACCESS_ONCE(rq->curr); /* unlocked access */
+       curr = READ_ONCE(rq->curr); /* unlocked access */
 
        /*
         * If the current task on @p's runqueue is an RT task, then
index e0e1299..d62b288 100644 (file)
@@ -26,8 +26,14 @@ extern __read_mostly int scheduler_running;
 extern unsigned long calc_load_update;
 extern atomic_long_t calc_load_tasks;
 
+extern void calc_global_load_tick(struct rq *this_rq);
 extern long calc_load_fold_active(struct rq *this_rq);
+
+#ifdef CONFIG_SMP
 extern void update_cpu_load_active(struct rq *this_rq);
+#else
+static inline void update_cpu_load_active(struct rq *this_rq) { }
+#endif
 
 /*
  * Helpers for converting nanosecond timing to jiffy resolution
@@ -707,7 +713,7 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
 
 static inline u64 __rq_clock_broken(struct rq *rq)
 {
-       return ACCESS_ONCE(rq->clock);
+       return READ_ONCE(rq->clock);
 }
 
 static inline u64 rq_clock(struct rq *rq)
@@ -1284,7 +1290,6 @@ extern void update_max_interval(void);
 extern void init_sched_dl_class(void);
 extern void init_sched_rt_class(void);
 extern void init_sched_fair_class(void);
-extern void init_sched_dl_class(void);
 
 extern void resched_curr(struct rq *rq);
 extern void resched_cpu(int cpu);
@@ -1298,8 +1303,6 @@ extern void init_dl_task_timer(struct sched_dl_entity *dl_se);
 
 unsigned long to_ratio(u64 period, u64 runtime);
 
-extern void update_idle_cpu_load(struct rq *this_rq);
-
 extern void init_task_runnable_average(struct task_struct *p);
 
 static inline void add_nr_running(struct rq *rq, unsigned count)
index 4ab7043..077ebbd 100644 (file)
@@ -174,7 +174,8 @@ static inline bool cputimer_running(struct task_struct *tsk)
 {
        struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
 
-       if (!cputimer->running)
+       /* Check if cputimer isn't running. This is accessed without locking. */
+       if (!READ_ONCE(cputimer->running))
                return false;
 
        /*
@@ -215,9 +216,7 @@ static inline void account_group_user_time(struct task_struct *tsk,
        if (!cputimer_running(tsk))
                return;
 
-       raw_spin_lock(&cputimer->lock);
-       cputimer->cputime.utime += cputime;
-       raw_spin_unlock(&cputimer->lock);
+       atomic64_add(cputime, &cputimer->cputime_atomic.utime);
 }
 
 /**
@@ -238,9 +237,7 @@ static inline void account_group_system_time(struct task_struct *tsk,
        if (!cputimer_running(tsk))
                return;
 
-       raw_spin_lock(&cputimer->lock);
-       cputimer->cputime.stime += cputime;
-       raw_spin_unlock(&cputimer->lock);
+       atomic64_add(cputime, &cputimer->cputime_atomic.stime);
 }
 
 /**
@@ -261,7 +258,5 @@ static inline void account_group_exec_runtime(struct task_struct *tsk,
        if (!cputimer_running(tsk))
                return;
 
-       raw_spin_lock(&cputimer->lock);
-       cputimer->cputime.sum_exec_runtime += ns;
-       raw_spin_unlock(&cputimer->lock);
+       atomic64_add(ns, &cputimer->cputime_atomic.sum_exec_runtime);
 }
index 852143a..052e026 100644 (file)
@@ -341,7 +341,7 @@ long wait_woken(wait_queue_t *wait, unsigned mode, long timeout)
         * condition being true _OR_ WQ_FLAG_WOKEN such that we will not miss
         * an event.
         */
-       set_mb(wait->flags, wait->flags & ~WQ_FLAG_WOKEN); /* B */
+       smp_store_mb(wait->flags, wait->flags & ~WQ_FLAG_WOKEN); /* B */
 
        return timeout;
 }
@@ -354,7 +354,7 @@ int woken_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
         * doesn't imply write barrier and the users expects write
         * barrier semantics on wakeup functions.  The following
         * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up()
-        * and is paired with set_mb() in wait_woken().
+        * and is paired with smp_store_mb() in wait_woken().
         */
        smp_wmb(); /* C */
        wait->flags |= WQ_FLAG_WOKEN;
@@ -601,7 +601,7 @@ EXPORT_SYMBOL(bit_wait_io);
 
 __sched int bit_wait_timeout(struct wait_bit_key *word)
 {
-       unsigned long now = ACCESS_ONCE(jiffies);
+       unsigned long now = READ_ONCE(jiffies);
        if (signal_pending_state(current->state, current))
                return 1;
        if (time_after_eq(now, word->timeout))
@@ -613,7 +613,7 @@ EXPORT_SYMBOL_GPL(bit_wait_timeout);
 
 __sched int bit_wait_io_timeout(struct wait_bit_key *word)
 {
-       unsigned long now = ACCESS_ONCE(jiffies);
+       unsigned long now = READ_ONCE(jiffies);
        if (signal_pending_state(current->state, current))
                return 1;
        if (time_after_eq(now, word->timeout))
index d51c5dd..f19833b 100644 (file)
@@ -245,7 +245,7 @@ static inline void print_dropped_signal(int sig)
  * RETURNS:
  * %true if @mask is set, %false if made noop because @task was dying.
  */
-bool task_set_jobctl_pending(struct task_struct *task, unsigned int mask)
+bool task_set_jobctl_pending(struct task_struct *task, unsigned long mask)
 {
        BUG_ON(mask & ~(JOBCTL_PENDING_MASK | JOBCTL_STOP_CONSUME |
                        JOBCTL_STOP_SIGMASK | JOBCTL_TRAPPING));
@@ -297,7 +297,7 @@ void task_clear_jobctl_trapping(struct task_struct *task)
  * CONTEXT:
  * Must be called with @task->sighand->siglock held.
  */
-void task_clear_jobctl_pending(struct task_struct *task, unsigned int mask)
+void task_clear_jobctl_pending(struct task_struct *task, unsigned long mask)
 {
        BUG_ON(mask & ~JOBCTL_PENDING_MASK);
 
@@ -2000,7 +2000,7 @@ static bool do_signal_stop(int signr)
        struct signal_struct *sig = current->signal;
 
        if (!(current->jobctl & JOBCTL_STOP_PENDING)) {
-               unsigned int gstop = JOBCTL_STOP_PENDING | JOBCTL_STOP_CONSUME;
+               unsigned long gstop = JOBCTL_STOP_PENDING | JOBCTL_STOP_CONSUME;
                struct task_struct *t;
 
                /* signr will be recorded in task->jobctl for retries */
index 695f0c6..fd643d8 100644 (file)
@@ -211,25 +211,6 @@ static int multi_cpu_stop(void *data)
        return err;
 }
 
-struct irq_cpu_stop_queue_work_info {
-       int cpu1;
-       int cpu2;
-       struct cpu_stop_work *work1;
-       struct cpu_stop_work *work2;
-};
-
-/*
- * This function is always run with irqs and preemption disabled.
- * This guarantees that both work1 and work2 get queued, before
- * our local migrate thread gets the chance to preempt us.
- */
-static void irq_cpu_stop_queue_work(void *arg)
-{
-       struct irq_cpu_stop_queue_work_info *info = arg;
-       cpu_stop_queue_work(info->cpu1, info->work1);
-       cpu_stop_queue_work(info->cpu2, info->work2);
-}
-
 /**
  * stop_two_cpus - stops two cpus
  * @cpu1: the cpu to stop
@@ -245,7 +226,6 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
 {
        struct cpu_stop_done done;
        struct cpu_stop_work work1, work2;
-       struct irq_cpu_stop_queue_work_info call_args;
        struct multi_stop_data msdata;
 
        preempt_disable();
@@ -262,13 +242,6 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
                .done = &done
        };
 
-       call_args = (struct irq_cpu_stop_queue_work_info){
-               .cpu1 = cpu1,
-               .cpu2 = cpu2,
-               .work1 = &work1,
-               .work2 = &work2,
-       };
-
        cpu_stop_init_done(&done, 2);
        set_state(&msdata, MULTI_STOP_PREPARE);
 
@@ -285,16 +258,11 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
                return -ENOENT;
        }
 
-       lg_local_lock(&stop_cpus_lock);
-       /*
-        * Queuing needs to be done by the lowest numbered CPU, to ensure
-        * that works are always queued in the same order on every CPU.
-        * This prevents deadlocks.
-        */
-       smp_call_function_single(min(cpu1, cpu2),
-                                &irq_cpu_stop_queue_work,
-                                &call_args, 1);
-       lg_local_unlock(&stop_cpus_lock);
+       lg_double_lock(&stop_cpus_lock, cpu1, cpu2);
+       cpu_stop_queue_work(cpu1, &work1);
+       cpu_stop_queue_work(cpu2, &work2);
+       lg_double_unlock(&stop_cpus_lock, cpu1, cpu2);
+
        preempt_enable();
 
        wait_for_completion(&done.completion);
index 11dc22a..637a094 100644 (file)
@@ -117,11 +117,7 @@ static int __clockevents_set_state(struct clock_event_device *dev,
        /* Transition with new state-specific callbacks */
        switch (state) {
        case CLOCK_EVT_STATE_DETACHED:
-               /*
-                * This is an internal state, which is guaranteed to go from
-                * SHUTDOWN to DETACHED. No driver interaction required.
-                */
-               return 0;
+               /* The clockevent device is getting replaced. Shut it down. */
 
        case CLOCK_EVT_STATE_SHUTDOWN:
                return dev->set_state_shutdown(dev);
index 76d4bd9..93ef719 100644 (file)
@@ -266,21 +266,23 @@ lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
 /*
  * Divide a ktime value by a nanosecond value
  */
-u64 __ktime_divns(const ktime_t kt, s64 div)
+s64 __ktime_divns(const ktime_t kt, s64 div)
 {
-       u64 dclc;
        int sft = 0;
+       s64 dclc;
+       u64 tmp;
 
        dclc = ktime_to_ns(kt);
+       tmp = dclc < 0 ? -dclc : dclc;
+
        /* Make sure the divisor is less than 2^32: */
        while (div >> 32) {
                sft++;
                div >>= 1;
        }
-       dclc >>= sft;
-       do_div(dclc, (unsigned long) div);
-
-       return dclc;
+       tmp >>= sft;
+       do_div(tmp, (unsigned long) div);
+       return dclc < 0 ? -tmp : tmp;
 }
 EXPORT_SYMBOL_GPL(__ktime_divns);
 #endif /* BITS_PER_LONG >= 64 */
index 0075da7..892e3da 100644 (file)
@@ -196,39 +196,62 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
        return 0;
 }
 
-static void update_gt_cputime(struct task_cputime *a, struct task_cputime *b)
+/*
+ * Set cputime to sum_cputime if sum_cputime > cputime. Use cmpxchg
+ * to avoid race conditions with concurrent updates to cputime.
+ */
+static inline void __update_gt_cputime(atomic64_t *cputime, u64 sum_cputime)
 {
-       if (b->utime > a->utime)
-               a->utime = b->utime;
+       u64 curr_cputime;
+retry:
+       curr_cputime = atomic64_read(cputime);
+       if (sum_cputime > curr_cputime) {
+               if (atomic64_cmpxchg(cputime, curr_cputime, sum_cputime) != curr_cputime)
+                       goto retry;
+       }
+}
 
-       if (b->stime > a->stime)
-               a->stime = b->stime;
+static void update_gt_cputime(struct task_cputime_atomic *cputime_atomic, struct task_cputime *sum)
+{
+       __update_gt_cputime(&cputime_atomic->utime, sum->utime);
+       __update_gt_cputime(&cputime_atomic->stime, sum->stime);
+       __update_gt_cputime(&cputime_atomic->sum_exec_runtime, sum->sum_exec_runtime);
+}
 
-       if (b->sum_exec_runtime > a->sum_exec_runtime)
-               a->sum_exec_runtime = b->sum_exec_runtime;
+/* Sample task_cputime_atomic values in "atomic_timers", store results in "times". */
+static inline void sample_cputime_atomic(struct task_cputime *times,
+                                        struct task_cputime_atomic *atomic_times)
+{
+       times->utime = atomic64_read(&atomic_times->utime);
+       times->stime = atomic64_read(&atomic_times->stime);
+       times->sum_exec_runtime = atomic64_read(&atomic_times->sum_exec_runtime);
 }
 
 void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times)
 {
        struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
        struct task_cputime sum;
-       unsigned long flags;
 
-       if (!cputimer->running) {
+       /* Check if cputimer isn't running. This is accessed without locking. */
+       if (!READ_ONCE(cputimer->running)) {
                /*
                 * The POSIX timer interface allows for absolute time expiry
                 * values through the TIMER_ABSTIME flag, therefore we have
-                * to synchronize the timer to the clock every time we start
-                * it.
+                * to synchronize the timer to the clock every time we start it.
                 */
                thread_group_cputime(tsk, &sum);
-               raw_spin_lock_irqsave(&cputimer->lock, flags);
-               cputimer->running = 1;
-               update_gt_cputime(&cputimer->cputime, &sum);
-       } else
-               raw_spin_lock_irqsave(&cputimer->lock, flags);
-       *times = cputimer->cputime;
-       raw_spin_unlock_irqrestore(&cputimer->lock, flags);
+               update_gt_cputime(&cputimer->cputime_atomic, &sum);
+
+               /*
+                * We're setting cputimer->running without a lock. Ensure
+                * this only gets written to in one operation. We set
+                * running after update_gt_cputime() as a small optimization,
+                * but barriers are not required because update_gt_cputime()
+                * can handle concurrent updates.
+                */
+               WRITE_ONCE(cputimer->running, 1);
+       }
+       sample_cputime_atomic(times, &cputimer->cputime_atomic);
 }
 
 /*
@@ -582,7 +605,8 @@ bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk)
        if (!task_cputime_zero(&tsk->cputime_expires))
                return false;
 
-       if (tsk->signal->cputimer.running)
+       /* Check if cputimer is running. This is accessed without locking. */
+       if (READ_ONCE(tsk->signal->cputimer.running))
                return false;
 
        return true;
@@ -852,10 +876,10 @@ static void check_thread_timers(struct task_struct *tsk,
        /*
         * Check for the special case thread timers.
         */
-       soft = ACCESS_ONCE(sig->rlim[RLIMIT_RTTIME].rlim_cur);
+       soft = READ_ONCE(sig->rlim[RLIMIT_RTTIME].rlim_cur);
        if (soft != RLIM_INFINITY) {
                unsigned long hard =
-                       ACCESS_ONCE(sig->rlim[RLIMIT_RTTIME].rlim_max);
+                       READ_ONCE(sig->rlim[RLIMIT_RTTIME].rlim_max);
 
                if (hard != RLIM_INFINITY &&
                    tsk->rt.timeout > DIV_ROUND_UP(hard, USEC_PER_SEC/HZ)) {
@@ -882,14 +906,12 @@ static void check_thread_timers(struct task_struct *tsk,
        }
 }
 
-static void stop_process_timers(struct signal_struct *sig)
+static inline void stop_process_timers(struct signal_struct *sig)
 {
        struct thread_group_cputimer *cputimer = &sig->cputimer;
-       unsigned long flags;
 
-       raw_spin_lock_irqsave(&cputimer->lock, flags);
-       cputimer->running = 0;
-       raw_spin_unlock_irqrestore(&cputimer->lock, flags);
+       /* Turn off cputimer->running. This is done without locking. */
+       WRITE_ONCE(cputimer->running, 0);
 }
 
 static u32 onecputick;
@@ -958,11 +980,11 @@ static void check_process_timers(struct task_struct *tsk,
                         SIGPROF);
        check_cpu_itimer(tsk, &sig->it[CPUCLOCK_VIRT], &virt_expires, utime,
                         SIGVTALRM);
-       soft = ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);
+       soft = READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);
        if (soft != RLIM_INFINITY) {
                unsigned long psecs = cputime_to_secs(ptime);
                unsigned long hard =
-                       ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_max);
+                       READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_max);
                cputime_t x;
                if (psecs >= hard) {
                        /*
@@ -1111,12 +1133,11 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
        }
 
        sig = tsk->signal;
-       if (sig->cputimer.running) {
+       /* Check if cputimer is running. This is accessed without locking. */
+       if (READ_ONCE(sig->cputimer.running)) {
                struct task_cputime group_sample;
 
-               raw_spin_lock(&sig->cputimer.lock);
-               group_sample = sig->cputimer.cputime;
-               raw_spin_unlock(&sig->cputimer.lock);
+               sample_cputime_atomic(&group_sample, &sig->cputimer.cputime_atomic);
 
                if (task_cputime_expired(&group_sample, &sig->cputime_expires))
                        return 1;
@@ -1157,7 +1178,7 @@ void run_posix_cpu_timers(struct task_struct *tsk)
         * If there are any active process wide timers (POSIX 1.b, itimers,
         * RLIMIT_CPU) cputimer must be running.
         */
-       if (tsk->signal->cputimer.running)
+       if (READ_ONCE(tsk->signal->cputimer.running))
                check_process_timers(tsk, &firing);
 
        /*
index dd70993..3e48406 100644 (file)
@@ -409,7 +409,7 @@ static void (*torture_shutdown_hook)(void);
  */
 void torture_shutdown_absorb(const char *title)
 {
-       while (ACCESS_ONCE(fullstop) == FULLSTOP_SHUTDOWN) {
+       while (READ_ONCE(fullstop) == FULLSTOP_SHUTDOWN) {
                pr_notice("torture thread %s parking due to system shutdown\n",
                          title);
                schedule_timeout_uninterruptible(MAX_SCHEDULE_TIMEOUT);
@@ -480,9 +480,9 @@ static int torture_shutdown_notify(struct notifier_block *unused1,
                                   unsigned long unused2, void *unused3)
 {
        mutex_lock(&fullstop_mutex);
-       if (ACCESS_ONCE(fullstop) == FULLSTOP_DONTSTOP) {
+       if (READ_ONCE(fullstop) == FULLSTOP_DONTSTOP) {
                VERBOSE_TOROUT_STRING("Unscheduled system shutdown detected");
-               ACCESS_ONCE(fullstop) = FULLSTOP_SHUTDOWN;
+               WRITE_ONCE(fullstop, FULLSTOP_SHUTDOWN);
        } else {
                pr_warn("Concurrent rmmod and shutdown illegal!\n");
        }
@@ -523,13 +523,13 @@ static int stutter;
  */
 void stutter_wait(const char *title)
 {
-       while (ACCESS_ONCE(stutter_pause_test) ||
-              (torture_runnable && !ACCESS_ONCE(*torture_runnable))) {
+       while (READ_ONCE(stutter_pause_test) ||
+              (torture_runnable && !READ_ONCE(*torture_runnable))) {
                if (stutter_pause_test)
-                       if (ACCESS_ONCE(stutter_pause_test) == 1)
+                       if (READ_ONCE(stutter_pause_test) == 1)
                                schedule_timeout_interruptible(1);
                        else
-                               while (ACCESS_ONCE(stutter_pause_test))
+                               while (READ_ONCE(stutter_pause_test))
                                        cond_resched();
                else
                        schedule_timeout_interruptible(round_jiffies_relative(HZ));
@@ -549,14 +549,14 @@ static int torture_stutter(void *arg)
                if (!torture_must_stop()) {
                        if (stutter > 1) {
                                schedule_timeout_interruptible(stutter - 1);
-                               ACCESS_ONCE(stutter_pause_test) = 2;
+                               WRITE_ONCE(stutter_pause_test, 2);
                        }
                        schedule_timeout_interruptible(1);
-                       ACCESS_ONCE(stutter_pause_test) = 1;
+                       WRITE_ONCE(stutter_pause_test, 1);
                }
                if (!torture_must_stop())
                        schedule_timeout_interruptible(stutter);
-               ACCESS_ONCE(stutter_pause_test) = 0;
+               WRITE_ONCE(stutter_pause_test, 0);
                torture_shutdown_absorb("torture_stutter");
        } while (!torture_must_stop());
        torture_kthread_stopping("torture_stutter");
@@ -642,13 +642,13 @@ EXPORT_SYMBOL_GPL(torture_init_end);
 bool torture_cleanup_begin(void)
 {
        mutex_lock(&fullstop_mutex);
-       if (ACCESS_ONCE(fullstop) == FULLSTOP_SHUTDOWN) {
+       if (READ_ONCE(fullstop) == FULLSTOP_SHUTDOWN) {
                pr_warn("Concurrent rmmod and shutdown illegal!\n");
                mutex_unlock(&fullstop_mutex);
                schedule_timeout_uninterruptible(10);
                return true;
        }
-       ACCESS_ONCE(fullstop) = FULLSTOP_RMMOD;
+       WRITE_ONCE(fullstop, FULLSTOP_RMMOD);
        mutex_unlock(&fullstop_mutex);
        torture_shutdown_cleanup();
        torture_shuffle_cleanup();
@@ -681,7 +681,7 @@ EXPORT_SYMBOL_GPL(torture_must_stop);
  */
 bool torture_must_stop_irq(void)
 {
-       return ACCESS_ONCE(fullstop) != FULLSTOP_DONTSTOP;
+       return READ_ONCE(fullstop) != FULLSTOP_DONTSTOP;
 }
 EXPORT_SYMBOL_GPL(torture_must_stop_irq);
 
index 13d945c..1b28df2 100644 (file)
@@ -450,7 +450,7 @@ static int __init ring_buffer_benchmark_init(void)
 
        if (producer_fifo >= 0) {
                struct sched_param param = {
-                       .sched_priority = consumer_fifo
+                       .sched_priority = producer_fifo
                };
                sched_setscheduler(producer, SCHED_FIFO, &param);
        } else
index ced69da..7f2e97c 100644 (file)
@@ -1369,19 +1369,26 @@ static int check_preds(struct filter_parse_state *ps)
 {
        int n_normal_preds = 0, n_logical_preds = 0;
        struct postfix_elt *elt;
+       int cnt = 0;
 
        list_for_each_entry(elt, &ps->postfix, list) {
-               if (elt->op == OP_NONE)
+               if (elt->op == OP_NONE) {
+                       cnt++;
                        continue;
+               }
 
                if (elt->op == OP_AND || elt->op == OP_OR) {
                        n_logical_preds++;
+                       cnt--;
                        continue;
                }
+               if (elt->op != OP_NOT)
+                       cnt--;
                n_normal_preds++;
+               WARN_ON_ONCE(cnt < 0);
        }
 
-       if (!n_normal_preds || n_logical_preds >= n_normal_preds) {
+       if (cnt != 1 || !n_normal_preds || n_logical_preds >= n_normal_preds) {
                parse_error(ps, FILT_ERR_INVALID_FILTER, 0);
                return -EINVAL;
        }
index 692bf71..25a086b 100644 (file)
@@ -178,12 +178,13 @@ ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len)
 EXPORT_SYMBOL(ftrace_print_hex_seq);
 
 const char *
-ftrace_print_array_seq(struct trace_seq *p, const void *buf, int buf_len,
+ftrace_print_array_seq(struct trace_seq *p, const void *buf, int count,
                       size_t el_size)
 {
        const char *ret = trace_seq_buffer_ptr(p);
        const char *prefix = "";
        void *ptr = (void *)buf;
+       size_t buf_len = count * el_size;
 
        trace_seq_putc(p, '{');
 
index 2316f50..581a68a 100644 (file)
@@ -41,6 +41,8 @@
 #define NMI_WATCHDOG_ENABLED      (1 << NMI_WATCHDOG_ENABLED_BIT)
 #define SOFT_WATCHDOG_ENABLED     (1 << SOFT_WATCHDOG_ENABLED_BIT)
 
+static DEFINE_MUTEX(watchdog_proc_mutex);
+
 #ifdef CONFIG_HARDLOCKUP_DETECTOR
 static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED;
 #else
@@ -608,26 +610,36 @@ void watchdog_nmi_enable_all(void)
 {
        int cpu;
 
-       if (!watchdog_user_enabled)
-               return;
+       mutex_lock(&watchdog_proc_mutex);
+
+       if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
+               goto unlock;
 
        get_online_cpus();
        for_each_online_cpu(cpu)
                watchdog_nmi_enable(cpu);
        put_online_cpus();
+
+unlock:
+       mutex_unlock(&watchdog_proc_mutex);
 }
 
 void watchdog_nmi_disable_all(void)
 {
        int cpu;
 
+       mutex_lock(&watchdog_proc_mutex);
+
        if (!watchdog_running)
-               return;
+               goto unlock;
 
        get_online_cpus();
        for_each_online_cpu(cpu)
                watchdog_nmi_disable(cpu);
        put_online_cpus();
+
+unlock:
+       mutex_unlock(&watchdog_proc_mutex);
 }
 #else
 static int watchdog_nmi_enable(unsigned int cpu) { return 0; }
@@ -744,8 +756,6 @@ static int proc_watchdog_update(void)
 
 }
 
-static DEFINE_MUTEX(watchdog_proc_mutex);
-
 /*
  * common function for watchdog, nmi_watchdog and soft_watchdog parameter
  *
index 1767057..b908048 100644 (file)
@@ -1233,6 +1233,7 @@ config RCU_TORTURE_TEST
        depends on DEBUG_KERNEL
        select TORTURE_TEST
        select SRCU
+       select TASKS_RCU
        default n
        help
          This option provides a kernel module that runs torture tests
@@ -1261,12 +1262,38 @@ config RCU_TORTURE_TEST_RUNNABLE
          Say N here if you want the RCU torture tests to start only
          after being manually enabled via /proc.
 
+config RCU_TORTURE_TEST_SLOW_PREINIT
+       bool "Slow down RCU grace-period pre-initialization to expose races"
+       depends on RCU_TORTURE_TEST
+       help
+         This option delays grace-period pre-initialization (the
+         propagation of CPU-hotplug changes up the rcu_node combining
+         tree) for a few jiffies between initializing each pair of
+         consecutive rcu_node structures.  This helps to expose races
+         involving grace-period pre-initialization, in other words, it
+         makes your kernel less stable.  It can also greatly increase
+         grace-period latency, especially on systems with large numbers
+         of CPUs.  This is useful when torture-testing RCU, but in
+         almost no other circumstance.
+
+         Say Y here if you want your system to crash and hang more often.
+         Say N if you want a sane system.
+
+config RCU_TORTURE_TEST_SLOW_PREINIT_DELAY
+       int "How much to slow down RCU grace-period pre-initialization"
+       range 0 5
+       default 3
+       depends on RCU_TORTURE_TEST_SLOW_PREINIT
+       help
+         This option specifies the number of jiffies to wait between
+         each rcu_node structure pre-initialization step.
+
 config RCU_TORTURE_TEST_SLOW_INIT
        bool "Slow down RCU grace-period initialization to expose races"
        depends on RCU_TORTURE_TEST
        help
-         This option makes grace-period initialization block for a
-         few jiffies between initializing each pair of consecutive
+         This option delays grace-period initialization for a few
+         jiffies between initializing each pair of consecutive
          rcu_node structures.  This helps to expose races involving
          grace-period initialization, in other words, it makes your
          kernel less stable.  It can also greatly increase grace-period
@@ -1281,10 +1308,35 @@ config RCU_TORTURE_TEST_SLOW_INIT_DELAY
        int "How much to slow down RCU grace-period initialization"
        range 0 5
        default 3
+       depends on RCU_TORTURE_TEST_SLOW_INIT
        help
          This option specifies the number of jiffies to wait between
          each rcu_node structure initialization.
 
+config RCU_TORTURE_TEST_SLOW_CLEANUP
+       bool "Slow down RCU grace-period cleanup to expose races"
+       depends on RCU_TORTURE_TEST
+       help
+         This option delays grace-period cleanup for a few jiffies
+         between cleaning up each pair of consecutive rcu_node
+         structures.  This helps to expose races involving grace-period
+         cleanup, in other words, it makes your kernel less stable.
+         It can also greatly increase grace-period latency, especially
+         on systems with large numbers of CPUs.  This is useful when
+         torture-testing RCU, but in almost no other circumstance.
+
+         Say Y here if you want your system to crash and hang more often.
+         Say N if you want a sane system.
+
+config RCU_TORTURE_TEST_SLOW_CLEANUP_DELAY
+       int "How much to slow down RCU grace-period cleanup"
+       range 0 5
+       default 3
+       depends on RCU_TORTURE_TEST_SLOW_CLEANUP
+       help
+         This option specifies the number of jiffies to wait between
+         each rcu_node structure cleanup operation.
+
 config RCU_CPU_STALL_TIMEOUT
        int "RCU CPU stall timeout in seconds"
        depends on RCU_STALL_COMMON
@@ -1321,6 +1373,17 @@ config RCU_TRACE
          Say Y here if you want to enable RCU tracing
          Say N if you are unsure.
 
+config RCU_EQS_DEBUG
+       bool "Use this when adding any sort of NO_HZ support to your arch"
+       depends on DEBUG_KERNEL
+       help
+         This option provides consistency checks in RCU's handling of
+         NO_HZ.  These checks have proven quite helpful in detecting
+         bugs in arch-specific NO_HZ code.
+
+         Say N here if you need ultimate kernel/user switch latencies
+         Say Y if you are unsure
+
 endmenu # "RCU Debugging"
 
 config DEBUG_BLOCK_EXT_DEVT
index 4f134d8..f610b2a 100644 (file)
@@ -191,7 +191,7 @@ int cpu_rmap_update(struct cpu_rmap *rmap, u16 index,
        /* Update distances based on topology */
        for_each_cpu(cpu, update_mask) {
                if (cpu_rmap_copy_neigh(rmap, cpu,
-                                       topology_thread_cpumask(cpu), 1))
+                                       topology_sibling_cpumask(cpu), 1))
                        continue;
                if (cpu_rmap_copy_neigh(rmap, cpu,
                                        topology_core_cpumask(cpu), 2))
index 830dd5d..5a70f61 100644 (file)
 int cpumask_next_and(int n, const struct cpumask *src1p,
                     const struct cpumask *src2p)
 {
-       struct cpumask tmp;
-
-       if (cpumask_and(&tmp, src1p, src2p))
-               return cpumask_next(n, &tmp);
-       return nr_cpu_ids;
+       while ((n = cpumask_next(n, src1p)) < nr_cpu_ids)
+               if (cpumask_test_cpu(n, src2p))
+                       break;
+       return n;
 }
 EXPORT_SYMBOL(cpumask_next_and);
 
@@ -139,64 +138,42 @@ void __init free_bootmem_cpumask_var(cpumask_var_t mask)
 #endif
 
 /**
- * cpumask_set_cpu_local_first - set i'th cpu with local numa cpu's first
- *
+ * cpumask_local_spread - select the i'th cpu with local numa cpu's first
  * @i: index number
- * @numa_node: local numa_node
- * @dstp: cpumask with the relevant cpu bit set according to the policy
+ * @node: local numa_node
  *
- * This function sets the cpumask according to a numa aware policy.
- * cpumask could be used as an affinity hint for the IRQ related to a
- * queue. When the policy is to spread queues across cores - local cores
- * first.
+ * This function selects an online CPU according to a numa aware policy;
+ * local cpus are returned first, followed by non-local ones, then it
+ * wraps around.
  *
- * Returns 0 on success, -ENOMEM for no memory, and -EAGAIN when failed to set
- * the cpu bit and need to re-call the function.
+ * It's not very efficient, but useful for setup.
  */
-int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp)
+unsigned int cpumask_local_spread(unsigned int i, int node)
 {
-       cpumask_var_t mask;
        int cpu;
-       int ret = 0;
-
-       if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
-               return -ENOMEM;
 
+       /* Wrap: we always want a cpu. */
        i %= num_online_cpus();
 
-       if (numa_node == -1 || !cpumask_of_node(numa_node)) {
-               /* Use all online cpu's for non numa aware system */
-               cpumask_copy(mask, cpu_online_mask);
+       if (node == -1) {
+               for_each_cpu(cpu, cpu_online_mask)
+                       if (i-- == 0)
+                               return cpu;
        } else {
-               int n;
-
-               cpumask_and(mask,
-                           cpumask_of_node(numa_node), cpu_online_mask);
-
-               n = cpumask_weight(mask);
-               if (i >= n) {
-                       i -= n;
-
-                       /* If index > number of local cpu's, mask out local
-                        * cpu's
-                        */
-                       cpumask_andnot(mask, cpu_online_mask, mask);
+               /* NUMA first. */
+               for_each_cpu_and(cpu, cpumask_of_node(node), cpu_online_mask)
+                       if (i-- == 0)
+                               return cpu;
+
+               for_each_cpu(cpu, cpu_online_mask) {
+                       /* Skip NUMA nodes, done above. */
+                       if (cpumask_test_cpu(cpu, cpumask_of_node(node)))
+                               continue;
+
+                       if (i-- == 0)
+                               return cpu;
                }
        }
-
-       for_each_cpu(cpu, mask) {
-               if (--i < 0)
-                       goto out;
-       }
-
-       ret = -EAGAIN;
-
-out:
-       free_cpumask_var(mask);
-
-       if (!ret)
-               cpumask_set_cpu(cpu, dstp);
-
-       return ret;
+       BUG();
 }
-EXPORT_SYMBOL(cpumask_set_cpu_local_first);
+EXPORT_SYMBOL(cpumask_local_spread);
index aac5114..a89d041 100644 (file)
@@ -639,7 +639,7 @@ do { \
        **************  MIPS  *****************
        ***************************************/
 #if defined(__mips__) && W_TYPE_SIZE == 32
-#if __GNUC__ >= 4 && __GNUC_MINOR__ >= 4
+#if (__GNUC__ >= 5) || (__GNUC__ >= 4 && __GNUC_MINOR__ >= 4)
 #define umul_ppmm(w1, w0, u, v)                        \
 do {                                           \
        UDItype __ll = (UDItype)(u) * (v);      \
@@ -671,7 +671,7 @@ do {                                                \
        **************  MIPS/64  **************
        ***************************************/
 #if (defined(__mips) && __mips >= 3) && W_TYPE_SIZE == 64
-#if __GNUC__ >= 4 && __GNUC_MINOR__ >= 4
+#if (__GNUC__ >= 5) || (__GNUC__ >= 4 && __GNUC_MINOR__ >= 4)
 #define umul_ppmm(w1, w0, u, v) \
 do {                                                                   \
        typedef unsigned int __ll_UTItype __attribute__((mode(TI)));    \
index 48144cd..f051d69 100644 (file)
@@ -197,13 +197,13 @@ static int percpu_counter_hotcpu_callback(struct notifier_block *nb,
  * Compare counter against given value.
  * Return 1 if greater, 0 if equal and -1 if less
  */
-int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs)
+int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch)
 {
        s64     count;
 
        count = percpu_counter_read(fbc);
        /* Check to see if rough count will be sufficient for comparison */
-       if (abs(count - rhs) > (percpu_counter_batch*num_online_cpus())) {
+       if (abs(count - rhs) > (batch * num_online_cpus())) {
                if (count > rhs)
                        return 1;
                else
@@ -218,7 +218,7 @@ int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs)
        else
                return 0;
 }
-EXPORT_SYMBOL(percpu_counter_compare);
+EXPORT_SYMBOL(__percpu_counter_compare);
 
 static int __init percpu_counter_startup(void)
 {
index 3d2aa27..061550d 100644 (file)
@@ -33,7 +33,7 @@
 #include <linux/string.h>
 #include <linux/bitops.h>
 #include <linux/rcupdate.h>
-#include <linux/preempt_mask.h>                /* in_interrupt() */
+#include <linux/preempt.h>             /* in_interrupt() */
 
 
 /*
index b28df40..8609378 100644 (file)
@@ -14,6 +14,7 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/atomic.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/log2.h>
@@ -25,6 +26,7 @@
 #include <linux/random.h>
 #include <linux/rhashtable.h>
 #include <linux/err.h>
+#include <linux/export.h>
 
 #define HASH_DEFAULT_SIZE      64UL
 #define HASH_MIN_SIZE          4U
@@ -446,6 +448,10 @@ int rhashtable_insert_slow(struct rhashtable *ht, const void *key,
        if (key && rhashtable_lookup_fast(ht, key, ht->p))
                goto exit;
 
+       err = -E2BIG;
+       if (unlikely(rht_grow_above_max(ht, tbl)))
+               goto exit;
+
        err = -EAGAIN;
        if (rhashtable_check_elasticity(ht, tbl, hash) ||
            rht_grow_above_100(ht, tbl))
@@ -738,6 +744,12 @@ int rhashtable_init(struct rhashtable *ht,
        if (params->max_size)
                ht->p.max_size = rounddown_pow_of_two(params->max_size);
 
+       if (params->insecure_max_entries)
+               ht->p.insecure_max_entries =
+                       rounddown_pow_of_two(params->insecure_max_entries);
+       else
+               ht->p.insecure_max_entries = ht->p.max_size * 2;
+
        ht->p.min_size = max(ht->p.min_size, HASH_MIN_SIZE);
 
        /* The maximum (not average) chain length grows with the
index a28df52..3a5f2b3 100644 (file)
@@ -57,7 +57,8 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count,
                        return res + find_zero(data) + 1 - align;
                }
                res += sizeof(unsigned long);
-               if (unlikely(max < sizeof(unsigned long)))
+               /* We already handled 'unsigned long' bytes. Did we do it all ? */
+               if (unlikely(max <= sizeof(unsigned long)))
                        break;
                max -= sizeof(unsigned long);
                if (unlikely(__get_user(c,(unsigned long __user *)(src+res))))
@@ -84,13 +85,21 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count,
  * @str: The string to measure.
  * @count: Maximum count (including NUL character)
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Get the size of a NUL-terminated string in user space.
  *
  * Returns the size of the string INCLUDING the terminating NUL.
- * If the string is too long, returns 'count+1'.
+ * If the string is too long, returns a number larger than @count. User
+ * has to check the return value against "> count".
  * On exception (or invalid count), returns 0.
+ *
+ * NOTE! You should basically never use this function. There is
+ * almost never any valid case for using the length of a user space
+ * string, since the string can be changed at any time by other
+ * threads. Use "strncpy_from_user()" instead to get a stable copy
+ * of the string.
  */
 long strnlen_user(const char __user *str, long count)
 {
@@ -113,7 +122,8 @@ EXPORT_SYMBOL(strnlen_user);
  * strlen_user: - Get the size of a user string INCLUDING final NUL.
  * @str: The string to measure.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Get the size of a NUL-terminated string in user space.
  *
index 4abda07..3c365ab 100644 (file)
@@ -537,8 +537,9 @@ EXPORT_SYMBOL_GPL(swiotlb_tbl_map_single);
  * Allocates bounce buffer and returns its kernel virtual address.
  */
 
-phys_addr_t map_single(struct device *hwdev, phys_addr_t phys, size_t size,
-                      enum dma_data_direction dir)
+static phys_addr_t
+map_single(struct device *hwdev, phys_addr_t phys, size_t size,
+          enum dma_data_direction dir)
 {
        dma_addr_t start_dma_addr = phys_to_dma(hwdev, io_tlb_start);
 
index 6dc4580..000e7b3 100644 (file)
@@ -359,23 +359,6 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi)
        flush_delayed_work(&bdi->wb.dwork);
 }
 
-/*
- * Called when the device behind @bdi has been removed or ejected.
- *
- * We can't really do much here except for reducing the dirty ratio at
- * the moment.  In the future we should be able to set a flag so that
- * the filesystem can handle errors at mark_inode_dirty time instead
- * of only at writeback time.
- */
-void bdi_unregister(struct backing_dev_info *bdi)
-{
-       if (WARN_ON_ONCE(!bdi->dev))
-               return;
-
-       bdi_set_min_ratio(bdi, 0);
-}
-EXPORT_SYMBOL(bdi_unregister);
-
 static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi)
 {
        memset(wb, 0, sizeof(*wb));
@@ -443,6 +426,7 @@ void bdi_destroy(struct backing_dev_info *bdi)
        int i;
 
        bdi_wb_shutdown(bdi);
+       bdi_set_min_ratio(bdi, 0);
 
        WARN_ON(!list_empty(&bdi->work_list));
        WARN_ON(delayed_work_pending(&bdi->wb.dwork));
index 5405aff..f0fe4f2 100644 (file)
 #define BYTES_PER_POINTER      sizeof(void *)
 
 /* GFP bitmask for kmemleak internal allocations */
-#define gfp_kmemleak_mask(gfp) (((gfp) & (GFP_KERNEL | GFP_ATOMIC)) | \
+#define gfp_kmemleak_mask(gfp) (((gfp) & (GFP_KERNEL | GFP_ATOMIC | \
+                                          __GFP_NOACCOUNT)) | \
                                 __GFP_NORETRY | __GFP_NOMEMALLOC | \
                                 __GFP_NOWARN)
 
index 14c2f20..a04225d 100644 (file)
@@ -2323,6 +2323,8 @@ done_restock:
        css_get_many(&memcg->css, batch);
        if (batch > nr_pages)
                refill_stock(memcg, batch - nr_pages);
+       if (!(gfp_mask & __GFP_WAIT))
+               goto done;
        /*
         * If the hierarchy is above the normal consumption range,
         * make the charging task trim their excess contribution.
@@ -5833,9 +5835,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
        if (!mem_cgroup_is_root(memcg))
                page_counter_uncharge(&memcg->memory, 1);
 
-       /* XXX: caller holds IRQ-safe mapping->tree_lock */
-       VM_BUG_ON(!irqs_disabled());
-
+       /* Caller disabled preemption with mapping->tree_lock */
        mem_cgroup_charge_statistics(memcg, page, -1);
        memcg_check_events(memcg, page);
 }
index 22e037e..17734c3 100644 (file)
@@ -3737,7 +3737,7 @@ void print_vma_addr(char *prefix, unsigned long ip)
 }
 
 #if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_DEBUG_ATOMIC_SLEEP)
-void might_fault(void)
+void __might_fault(const char *file, int line)
 {
        /*
         * Some code (nfs/sunrpc) uses socket ops on kernel memory while
@@ -3747,21 +3747,15 @@ void might_fault(void)
         */
        if (segment_eq(get_fs(), KERNEL_DS))
                return;
-
-       /*
-        * it would be nicer only to annotate paths which are not under
-        * pagefault_disable, however that requires a larger audit and
-        * providing helpers like get_user_atomic.
-        */
-       if (in_atomic())
+       if (pagefault_disabled())
                return;
-
-       __might_sleep(__FILE__, __LINE__, 0);
-
+       __might_sleep(file, line, 0);
+#if defined(CONFIG_DEBUG_ATOMIC_SLEEP)
        if (current->mm)
                might_lock_read(&current->mm->mmap_sem);
+#endif
 }
-EXPORT_SYMBOL(might_fault);
+EXPORT_SYMBOL(__might_fault);
 #endif
 
 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS)
index 457bde5..9e88f74 100644 (file)
@@ -1969,8 +1969,10 @@ void try_offline_node(int nid)
                 * wait_table may be allocated from boot memory,
                 * here only free if it's allocated by vmalloc.
                 */
-               if (is_vmalloc_addr(zone->wait_table))
+               if (is_vmalloc_addr(zone->wait_table)) {
                        vfree(zone->wait_table);
+                       zone->wait_table = NULL;
+               }
        }
 }
 EXPORT_SYMBOL(try_offline_node);
index ede2629..7477432 100644 (file)
@@ -2518,7 +2518,7 @@ static void __init check_numabalancing_enable(void)
        if (numabalancing_override)
                set_numabalancing_state(numabalancing_override == 1);
 
-       if (nr_node_ids > 1 && !numabalancing_override) {
+       if (num_online_nodes() > 1 && !numabalancing_override) {
                pr_info("%s automatic NUMA balancing. "
                        "Configure with numa_balancing= or the "
                        "kernel.numa_balancing sysctl",
index 5daf556..eb59f7e 100644 (file)
@@ -580,7 +580,7 @@ static long long pos_ratio_polynom(unsigned long setpoint,
        long x;
 
        x = div64_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT,
-                   limit - setpoint + 1);
+                     (limit - setpoint) | 1);
        pos_ratio = x;
        pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
        pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
@@ -807,7 +807,7 @@ static unsigned long bdi_position_ratio(struct backing_dev_info *bdi,
         * scale global setpoint to bdi's:
         *      bdi_setpoint = setpoint * bdi_thresh / thresh
         */
-       x = div_u64((u64)bdi_thresh << 16, thresh + 1);
+       x = div_u64((u64)bdi_thresh << 16, thresh | 1);
        bdi_setpoint = setpoint * (u64)x >> 16;
        /*
         * Use span=(8*write_bw) in single bdi case as indicated by
@@ -822,7 +822,7 @@ static unsigned long bdi_position_ratio(struct backing_dev_info *bdi,
 
        if (bdi_dirty < x_intercept - span / 4) {
                pos_ratio = div64_u64(pos_ratio * (x_intercept - bdi_dirty),
-                                   x_intercept - bdi_setpoint + 1);
+                                     (x_intercept - bdi_setpoint) | 1);
        } else
                pos_ratio /= 4;
 
index 755a42c..303c908 100644 (file)
@@ -101,7 +101,8 @@ void unset_migratetype_isolate(struct page *page, unsigned migratetype)
                        buddy_idx = __find_buddy_index(page_idx, order);
                        buddy = page + (buddy_idx - page_idx);
 
-                       if (!is_migrate_isolate_page(buddy)) {
+                       if (pfn_valid_within(page_to_pfn(buddy)) &&
+                           !is_migrate_isolate_page(buddy)) {
                                __isolate_free_page(page, order);
                                kernel_map_pages(page, (1 << order), 1);
                                set_page_refcounted(page);
index de98137..3759099 100644 (file)
@@ -2451,6 +2451,7 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
                        return -ENOMEM;
                }
                inode->i_op = &shmem_short_symlink_operations;
+               inode->i_link = info->symlink;
        } else {
                error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL);
                if (error) {
@@ -2474,30 +2475,23 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
        return 0;
 }
 
-static void *shmem_follow_short_symlink(struct dentry *dentry, struct nameidata *nd)
-{
-       nd_set_link(nd, SHMEM_I(d_inode(dentry))->symlink);
-       return NULL;
-}
-
-static void *shmem_follow_link(struct dentry *dentry, struct nameidata *nd)
+static const char *shmem_follow_link(struct dentry *dentry, void **cookie)
 {
        struct page *page = NULL;
        int error = shmem_getpage(d_inode(dentry), 0, &page, SGP_READ, NULL);
-       nd_set_link(nd, error ? ERR_PTR(error) : kmap(page));
-       if (page)
-               unlock_page(page);
-       return page;
+       if (error)
+               return ERR_PTR(error);
+       unlock_page(page);
+       *cookie = page;
+       return kmap(page);
 }
 
-static void shmem_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
+static void shmem_put_link(struct inode *unused, void *cookie)
 {
-       if (!IS_ERR(nd_get_link(nd))) {
-               struct page *page = cookie;
-               kunmap(page);
-               mark_page_accessed(page);
-               page_cache_release(page);
-       }
+       struct page *page = cookie;
+       kunmap(page);
+       mark_page_accessed(page);
+       page_cache_release(page);
 }
 
 #ifdef CONFIG_TMPFS_XATTR
@@ -2642,7 +2636,7 @@ static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size)
 
 static const struct inode_operations shmem_short_symlink_operations = {
        .readlink       = generic_readlink,
-       .follow_link    = shmem_follow_short_symlink,
+       .follow_link    = simple_follow_link,
 #ifdef CONFIG_TMPFS_XATTR
        .setxattr       = shmem_setxattr,
        .getxattr       = shmem_getxattr,
@@ -3401,7 +3395,13 @@ int shmem_zero_setup(struct vm_area_struct *vma)
        struct file *file;
        loff_t size = vma->vm_end - vma->vm_start;
 
-       file = shmem_file_setup("dev/zero", size, vma->vm_flags);
+       /*
+        * Cloning a new file under mmap_sem leads to a lock ordering conflict
+        * between XFS directory reading and selinux: since this file is only
+        * accessible to the user through its mapping, use S_PRIVATE flag to
+        * bypass file security, in the same way as shmem_kernel_file_setup().
+        */
+       file = __shmem_file_setup("dev/zero", size, vma->vm_flags, S_PRIVATE);
        if (IS_ERR(file))
                return PTR_ERR(file);
 
index 08bd7a3..a8b5e74 100644 (file)
@@ -289,7 +289,8 @@ static int create_handle_cache(struct zs_pool *pool)
 
 static void destroy_handle_cache(struct zs_pool *pool)
 {
-       kmem_cache_destroy(pool->handle_cachep);
+       if (pool->handle_cachep)
+               kmem_cache_destroy(pool->handle_cachep);
 }
 
 static unsigned long alloc_handle(struct zs_pool *pool)
index 98a30a5..59555f0 100644 (file)
@@ -443,7 +443,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
        case NETDEV_UP:
                /* Put all VLANs for this dev in the up state too.  */
                vlan_group_for_each_dev(grp, i, vlandev) {
-                       flgs = vlandev->flags;
+                       flgs = dev_get_flags(vlandev);
                        if (flgs & IFF_UP)
                                continue;
 
index 476709b..c4802f3 100644 (file)
@@ -1557,7 +1557,8 @@ static int hci_dev_do_close(struct hci_dev *hdev)
 {
        BT_DBG("%s %p", hdev->name, hdev);
 
-       if (!hci_dev_test_flag(hdev, HCI_UNREGISTER)) {
+       if (!hci_dev_test_flag(hdev, HCI_UNREGISTER) &&
+           test_bit(HCI_UP, &hdev->flags)) {
                /* Execute vendor specific shutdown routine */
                if (hdev->shutdown)
                        hdev->shutdown(hdev);
@@ -2853,9 +2854,11 @@ static void le_scan_disable_work_complete(struct hci_dev *hdev, u8 status,
                         * state. If we were running both LE and BR/EDR inquiry
                         * simultaneously, and BR/EDR inquiry is already
                         * finished, stop discovery, otherwise BR/EDR inquiry
-                        * will stop discovery when finished.
+                        * will stop discovery when finished. If we will resolve
+                        * remote device name, do not change discovery state.
                         */
-                       if (!test_bit(HCI_INQUIRY, &hdev->flags))
+                       if (!test_bit(HCI_INQUIRY, &hdev->flags) &&
+                           hdev->discovery.state != DISCOVERY_RESOLVING)
                                hci_discovery_set_state(hdev,
                                                        DISCOVERY_STOPPED);
                } else {
index e0670d7..659fb96 100644 (file)
@@ -796,9 +796,11 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge_port *p,
        int err = 0;
 
        if (ndm->ndm_flags & NTF_USE) {
+               local_bh_disable();
                rcu_read_lock();
                br_fdb_update(p->br, p, addr, vid, true);
                rcu_read_unlock();
+               local_bh_enable();
        } else {
                spin_lock_bh(&p->br->hash_lock);
                err = fdb_add_entry(p, addr, ndm->ndm_state,
index 4b6722f..ff667e1 100644 (file)
@@ -1072,7 +1072,7 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
 
                err = br_ip6_multicast_add_group(br, port, &grec->grec_mca,
                                                 vid);
-               if (!err)
+               if (err)
                        break;
        }
 
@@ -1167,6 +1167,9 @@ static void br_multicast_add_router(struct net_bridge *br,
        struct net_bridge_port *p;
        struct hlist_node *slot = NULL;
 
+       if (!hlist_unhashed(&port->rlist))
+               return;
+
        hlist_for_each_entry(p, &br->router_list, rlist) {
                if ((unsigned long) port >= (unsigned long) p)
                        break;
@@ -1194,12 +1197,8 @@ static void br_multicast_mark_router(struct net_bridge *br,
        if (port->multicast_router != 1)
                return;
 
-       if (!hlist_unhashed(&port->rlist))
-               goto timer;
-
        br_multicast_add_router(br, port);
 
-timer:
        mod_timer(&port->multicast_router_timer,
                  now + br->multicast_querier_interval);
 }
@@ -1822,7 +1821,7 @@ static void br_multicast_query_expired(struct net_bridge *br,
        if (query->startup_sent < br->multicast_startup_query_count)
                query->startup_sent++;
 
-       RCU_INIT_POINTER(querier, NULL);
+       RCU_INIT_POINTER(querier->port, NULL);
        br_multicast_send_query(br, NULL, query);
        spin_unlock(&br->multicast_lock);
 }
index ab55e24..60ddfbe 100644 (file)
 #include <net/route.h>
 #include <net/netfilter/br_netfilter.h>
 
-#if IS_ENABLED(CONFIG_NF_CONNTRACK)
-#include <net/netfilter/nf_conntrack.h>
-#endif
-
 #include <asm/uaccess.h>
 #include "br_private.h"
 #ifdef CONFIG_SYSCTL
@@ -350,24 +346,15 @@ free_skb:
        return 0;
 }
 
-static bool dnat_took_place(const struct sk_buff *skb)
+static bool daddr_was_changed(const struct sk_buff *skb,
+                             const struct nf_bridge_info *nf_bridge)
 {
-#if IS_ENABLED(CONFIG_NF_CONNTRACK)
-       enum ip_conntrack_info ctinfo;
-       struct nf_conn *ct;
-
-       ct = nf_ct_get(skb, &ctinfo);
-       if (!ct || nf_ct_is_untracked(ct))
-               return false;
-
-       return test_bit(IPS_DST_NAT_BIT, &ct->status);
-#else
-       return false;
-#endif
+       return ip_hdr(skb)->daddr != nf_bridge->ipv4_daddr;
 }
 
 /* This requires some explaining. If DNAT has taken place,
  * we will need to fix up the destination Ethernet address.
+ * This is also true when SNAT takes place (for the reply direction).
  *
  * There are two cases to consider:
  * 1. The packet was DNAT'ed to a device in the same bridge
@@ -421,7 +408,7 @@ static int br_nf_pre_routing_finish(struct sock *sk, struct sk_buff *skb)
                nf_bridge->pkt_otherhost = false;
        }
        nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING;
-       if (dnat_took_place(skb)) {
+       if (daddr_was_changed(skb, nf_bridge)) {
                if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) {
                        struct in_device *in_dev = __in_dev_get_rcu(dev);
 
@@ -632,6 +619,7 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops,
                                      struct sk_buff *skb,
                                      const struct nf_hook_state *state)
 {
+       struct nf_bridge_info *nf_bridge;
        struct net_bridge_port *p;
        struct net_bridge *br;
        __u32 len = nf_bridge_encap_header_len(skb);
@@ -669,6 +657,9 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops,
        if (!setup_pre_routing(skb))
                return NF_DROP;
 
+       nf_bridge = nf_bridge_info_get(skb);
+       nf_bridge->ipv4_daddr = ip_hdr(skb)->daddr;
+
        skb->protocol = htons(ETH_P_IP);
 
        NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, state->sk, skb,
index 4fcaa67..7caf7fa 100644 (file)
@@ -97,7 +97,9 @@ static void br_forward_delay_timer_expired(unsigned long arg)
                netif_carrier_on(br->dev);
        }
        br_log_state(p);
+       rcu_read_lock();
        br_ifinfo_notify(RTM_NEWLINK, p);
+       rcu_read_unlock();
        spin_unlock(&br->lock);
 }
 
index 4ec0c80..112ad78 100644 (file)
@@ -330,6 +330,10 @@ static long caif_stream_data_wait(struct sock *sk, long timeo)
                release_sock(sk);
                timeo = schedule_timeout(timeo);
                lock_sock(sk);
+
+               if (sock_flag(sk, SOCK_DEAD))
+                       break;
+
                clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
        }
 
@@ -373,6 +377,10 @@ static int caif_stream_recvmsg(struct socket *sock, struct msghdr *msg,
                struct sk_buff *skb;
 
                lock_sock(sk);
+               if (sock_flag(sk, SOCK_DEAD)) {
+                       err = -ECONNRESET;
+                       goto unlock;
+               }
                skb = skb_dequeue(&sk->sk_receive_queue);
                caif_check_flow_release(sk);
 
index 41a4abc..c4ec923 100644 (file)
@@ -1306,8 +1306,6 @@ static void __unregister_linger_request(struct ceph_osd_client *osdc,
                if (list_empty(&req->r_osd_item))
                        req->r_osd = NULL;
        }
-
-       list_del_init(&req->r_req_lru_item); /* can be on notarget */
        ceph_osdc_put_request(req);
 }
 
@@ -2017,20 +2015,29 @@ static void kick_requests(struct ceph_osd_client *osdc, bool force_resend,
                err = __map_request(osdc, req,
                                    force_resend || force_resend_writes);
                dout("__map_request returned %d\n", err);
-               if (err == 0)
-                       continue;  /* no change and no osd was specified */
                if (err < 0)
                        continue;  /* hrm! */
-               if (req->r_osd == NULL) {
-                       dout("tid %llu maps to no valid osd\n", req->r_tid);
-                       needmap++;  /* request a newer map */
-                       continue;
-               }
+               if (req->r_osd == NULL || err > 0) {
+                       if (req->r_osd == NULL) {
+                               dout("lingering %p tid %llu maps to no osd\n",
+                                    req, req->r_tid);
+                               /*
+                                * A homeless lingering request makes
+                                * no sense, as it's job is to keep
+                                * a particular OSD connection open.
+                                * Request a newer map and kick the
+                                * request, knowing that it won't be
+                                * resent until we actually get a map
+                                * that can tell us where to send it.
+                                */
+                               needmap++;
+                       }
 
-               dout("kicking lingering %p tid %llu osd%d\n", req, req->r_tid,
-                    req->r_osd ? req->r_osd->o_osd : -1);
-               __register_request(osdc, req);
-               __unregister_linger_request(osdc, req);
+                       dout("kicking lingering %p tid %llu osd%d\n", req,
+                            req->r_tid, req->r_osd ? req->r_osd->o_osd : -1);
+                       __register_request(osdc, req);
+                       __unregister_linger_request(osdc, req);
+               }
        }
        reset_changed_osds(osdc);
        mutex_unlock(&osdc->request_mutex);
index c7ba038..aa82f9a 100644 (file)
@@ -1718,15 +1718,8 @@ EXPORT_SYMBOL_GPL(is_skb_forwardable);
 
 int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
 {
-       if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
-               if (skb_copy_ubufs(skb, GFP_ATOMIC)) {
-                       atomic_long_inc(&dev->rx_dropped);
-                       kfree_skb(skb);
-                       return NET_RX_DROP;
-               }
-       }
-
-       if (unlikely(!is_skb_forwardable(dev, skb))) {
+       if (skb_orphan_frags(skb, GFP_ATOMIC) ||
+           unlikely(!is_skb_forwardable(dev, skb))) {
                atomic_long_inc(&dev->rx_dropped);
                kfree_skb(skb);
                return NET_RX_DROP;
@@ -5209,7 +5202,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
        if (__netdev_find_adj(upper_dev, dev, &upper_dev->all_adj_list.upper))
                return -EBUSY;
 
-       if (__netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper))
+       if (__netdev_find_adj(dev, upper_dev, &dev->adj_list.upper))
                return -EEXIST;
 
        if (master && netdev_master_upper_dev_get(dev))
index 78fc04a..572af00 100644 (file)
@@ -601,7 +601,7 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh)
        }
 
        err = rtnl_net_fill(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
-                           RTM_GETNSID, net, peer, -1);
+                           RTM_NEWNSID, net, peer, -1);
        if (err < 0)
                goto err_out;
 
index 666e092..8de3682 100644 (file)
@@ -2416,6 +2416,9 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change,
 {
        struct sk_buff *skb;
 
+       if (dev->reg_state != NETREG_REGISTERED)
+               return;
+
        skb = rtmsg_ifinfo_build_skb(type, dev, change, flags);
        if (skb)
                rtmsg_ifinfo_send(skb, dev, flags);
index 3cfff2a..41ec022 100644 (file)
@@ -4398,7 +4398,7 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
 
                while (order) {
                        if (npages >= 1 << order) {
-                               page = alloc_pages(gfp_mask |
+                               page = alloc_pages((gfp_mask & ~__GFP_WAIT) |
                                                   __GFP_COMP |
                                                   __GFP_NOWARN |
                                                   __GFP_NORETRY,
index e891bcf..dc30dc5 100644 (file)
@@ -354,15 +354,12 @@ void sk_clear_memalloc(struct sock *sk)
 
        /*
         * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward
-        * progress of swapping. However, if SOCK_MEMALLOC is cleared while
-        * it has rmem allocations there is a risk that the user of the
-        * socket cannot make forward progress due to exceeding the rmem
-        * limits. By rights, sk_clear_memalloc() should only be called
-        * on sockets being torn down but warn and reset the accounting if
-        * that assumption breaks.
+        * progress of swapping. SOCK_MEMALLOC may be cleared while
+        * it has rmem allocations due to the last swapfile being deactivated
+        * but there is a risk that the socket is unusable due to exceeding
+        * the rmem limits. Reclaim the reserves and obey rmem limits again.
         */
-       if (WARN_ON(sk->sk_forward_alloc))
-               sk_mem_reclaim(sk);
+       sk_mem_reclaim(sk);
 }
 EXPORT_SYMBOL_GPL(sk_clear_memalloc);
 
@@ -1474,8 +1471,8 @@ void sk_release_kernel(struct sock *sk)
                return;
 
        sock_hold(sk);
-       sock_net_set(sk, get_net(&init_net));
        sock_release(sk->sk_socket);
+       sock_net_set(sk, get_net(&init_net));
        sock_put(sk);
 }
 EXPORT_SYMBOL(sk_release_kernel);
@@ -1883,7 +1880,7 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
 
        pfrag->offset = 0;
        if (SKB_FRAG_PAGE_ORDER) {
-               pfrag->page = alloc_pages(gfp | __GFP_COMP |
+               pfrag->page = alloc_pages((gfp & ~__GFP_WAIT) | __GFP_COMP |
                                          __GFP_NOWARN | __GFP_NORETRY,
                                          SKB_FRAG_PAGE_ORDER);
                if (likely(pfrag->page)) {
index e6f6cc3..392e29a 100644 (file)
@@ -359,7 +359,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index,
         */
        ds = kzalloc(sizeof(*ds) + drv->priv_size, GFP_KERNEL);
        if (ds == NULL)
-               return NULL;
+               return ERR_PTR(-ENOMEM);
 
        ds->dst = dst;
        ds->index = index;
@@ -370,7 +370,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index,
 
        ret = dsa_switch_setup_one(ds, parent);
        if (ret)
-               return NULL;
+               return ERR_PTR(ret);
 
        return ds;
 }
index 05dab29..4adfd4d 100644 (file)
@@ -3,7 +3,9 @@ obj-$(CONFIG_IEEE802154_SOCKET) += ieee802154_socket.o
 obj-y += 6lowpan/
 
 ieee802154-y := netlink.o nl-mac.o nl-phy.o nl_policy.o core.o \
-                header_ops.o sysfs.o nl802154.o
+                header_ops.o sysfs.o nl802154.o trace.o
 ieee802154_socket-y := socket.o
 
+CFLAGS_trace.o := -I$(src)
+
 ccflags-y += -D__CHECK_ENDIAN__
index 1b9d25f..346c666 100644 (file)
@@ -175,6 +175,7 @@ int ieee802154_add_iface(struct sk_buff *skb, struct genl_info *info)
        int rc = -ENOBUFS;
        struct net_device *dev;
        int type = __IEEE802154_DEV_INVALID;
+       unsigned char name_assign_type;
 
        pr_debug("%s\n", __func__);
 
@@ -190,8 +191,10 @@ int ieee802154_add_iface(struct sk_buff *skb, struct genl_info *info)
                if (devname[nla_len(info->attrs[IEEE802154_ATTR_DEV_NAME]) - 1]
                                != '\0')
                        return -EINVAL; /* phy name should be null-terminated */
+               name_assign_type = NET_NAME_USER;
        } else  {
                devname = "wpan%d";
+               name_assign_type = NET_NAME_ENUM;
        }
 
        if (strlen(devname) >= IFNAMSIZ)
@@ -221,7 +224,7 @@ int ieee802154_add_iface(struct sk_buff *skb, struct genl_info *info)
        }
 
        dev = rdev_add_virtual_intf_deprecated(wpan_phy_to_rdev(phy), devname,
-                                              type);
+                                              name_assign_type, type);
        if (IS_ERR(dev)) {
                rc = PTR_ERR(dev);
                goto nla_put_failure;
index a4daf91..f3c12f6 100644 (file)
@@ -589,7 +589,7 @@ static int nl802154_new_interface(struct sk_buff *skb, struct genl_info *info)
 
        return rdev_add_virtual_intf(rdev,
                                     nla_data(info->attrs[NL802154_ATTR_IFNAME]),
-                                    type, extended_addr);
+                                    NET_NAME_USER, type, extended_addr);
 }
 
 static int nl802154_del_interface(struct sk_buff *skb, struct genl_info *info)
index 7c46732..7b5a9dd 100644 (file)
@@ -4,13 +4,16 @@
 #include <net/cfg802154.h>
 
 #include "core.h"
+#include "trace.h"
 
 static inline struct net_device *
 rdev_add_virtual_intf_deprecated(struct cfg802154_registered_device *rdev,
-                                const char *name, int type)
+                                const char *name,
+                                unsigned char name_assign_type,
+                                int type)
 {
        return rdev->ops->add_virtual_intf_deprecated(&rdev->wpan_phy, name,
-                                                     type);
+                                                     name_assign_type, type);
 }
 
 static inline void
@@ -22,75 +25,131 @@ rdev_del_virtual_intf_deprecated(struct cfg802154_registered_device *rdev,
 
 static inline int
 rdev_add_virtual_intf(struct cfg802154_registered_device *rdev, char *name,
+                     unsigned char name_assign_type,
                      enum nl802154_iftype type, __le64 extended_addr)
 {
-       return rdev->ops->add_virtual_intf(&rdev->wpan_phy, name, type,
+       int ret;
+
+       trace_802154_rdev_add_virtual_intf(&rdev->wpan_phy, name, type,
                                           extended_addr);
+       ret = rdev->ops->add_virtual_intf(&rdev->wpan_phy, name,
+                                         name_assign_type, type,
+                                         extended_addr);
+       trace_802154_rdev_return_int(&rdev->wpan_phy, ret);
+       return ret;
 }
 
 static inline int
 rdev_del_virtual_intf(struct cfg802154_registered_device *rdev,
                      struct wpan_dev *wpan_dev)
 {
-       return rdev->ops->del_virtual_intf(&rdev->wpan_phy, wpan_dev);
+       int ret;
+
+       trace_802154_rdev_del_virtual_intf(&rdev->wpan_phy, wpan_dev);
+       ret = rdev->ops->del_virtual_intf(&rdev->wpan_phy, wpan_dev);
+       trace_802154_rdev_return_int(&rdev->wpan_phy, ret);
+       return ret;
 }
 
 static inline int
 rdev_set_channel(struct cfg802154_registered_device *rdev, u8 page, u8 channel)
 {
-       return rdev->ops->set_channel(&rdev->wpan_phy, page, channel);
+       int ret;
+
+       trace_802154_rdev_set_channel(&rdev->wpan_phy, page, channel);
+       ret = rdev->ops->set_channel(&rdev->wpan_phy, page, channel);
+       trace_802154_rdev_return_int(&rdev->wpan_phy, ret);
+       return ret;
 }
 
 static inline int
 rdev_set_cca_mode(struct cfg802154_registered_device *rdev,
                  const struct wpan_phy_cca *cca)
 {
-       return rdev->ops->set_cca_mode(&rdev->wpan_phy, cca);
+       int ret;
+
+       trace_802154_rdev_set_cca_mode(&rdev->wpan_phy, cca);
+       ret = rdev->ops->set_cca_mode(&rdev->wpan_phy, cca);
+       trace_802154_rdev_return_int(&rdev->wpan_phy, ret);
+       return ret;
 }
 
 static inline int
 rdev_set_pan_id(struct cfg802154_registered_device *rdev,
                struct wpan_dev *wpan_dev, __le16 pan_id)
 {
-       return rdev->ops->set_pan_id(&rdev->wpan_phy, wpan_dev, pan_id);
+       int ret;
+
+       trace_802154_rdev_set_pan_id(&rdev->wpan_phy, wpan_dev, pan_id);
+       ret = rdev->ops->set_pan_id(&rdev->wpan_phy, wpan_dev, pan_id);
+       trace_802154_rdev_return_int(&rdev->wpan_phy, ret);
+       return ret;
 }
 
 static inline int
 rdev_set_short_addr(struct cfg802154_registered_device *rdev,
                    struct wpan_dev *wpan_dev, __le16 short_addr)
 {
-       return rdev->ops->set_short_addr(&rdev->wpan_phy, wpan_dev, short_addr);
+       int ret;
+
+       trace_802154_rdev_set_short_addr(&rdev->wpan_phy, wpan_dev, short_addr);
+       ret = rdev->ops->set_short_addr(&rdev->wpan_phy, wpan_dev, short_addr);
+       trace_802154_rdev_return_int(&rdev->wpan_phy, ret);
+       return ret;
 }
 
 static inline int
 rdev_set_backoff_exponent(struct cfg802154_registered_device *rdev,
                          struct wpan_dev *wpan_dev, u8 min_be, u8 max_be)
 {
-       return rdev->ops->set_backoff_exponent(&rdev->wpan_phy, wpan_dev,
+       int ret;
+
+       trace_802154_rdev_set_backoff_exponent(&rdev->wpan_phy, wpan_dev,
                                               min_be, max_be);
+       ret = rdev->ops->set_backoff_exponent(&rdev->wpan_phy, wpan_dev,
+                                             min_be, max_be);
+       trace_802154_rdev_return_int(&rdev->wpan_phy, ret);
+       return ret;
 }
 
 static inline int
 rdev_set_max_csma_backoffs(struct cfg802154_registered_device *rdev,
                           struct wpan_dev *wpan_dev, u8 max_csma_backoffs)
 {
-       return rdev->ops->set_max_csma_backoffs(&rdev->wpan_phy, wpan_dev,
-                                               max_csma_backoffs);
+       int ret;
+
+       trace_802154_rdev_set_csma_backoffs(&rdev->wpan_phy, wpan_dev,
+                                           max_csma_backoffs);
+       ret = rdev->ops->set_max_csma_backoffs(&rdev->wpan_phy, wpan_dev,
+                                              max_csma_backoffs);
+       trace_802154_rdev_return_int(&rdev->wpan_phy, ret);
+       return ret;
 }
 
 static inline int
 rdev_set_max_frame_retries(struct cfg802154_registered_device *rdev,
                           struct wpan_dev *wpan_dev, s8 max_frame_retries)
 {
-       return rdev->ops->set_max_frame_retries(&rdev->wpan_phy, wpan_dev,
+       int ret;
+
+       trace_802154_rdev_set_max_frame_retries(&rdev->wpan_phy, wpan_dev,
                                                max_frame_retries);
+       ret = rdev->ops->set_max_frame_retries(&rdev->wpan_phy, wpan_dev,
+                                              max_frame_retries);
+       trace_802154_rdev_return_int(&rdev->wpan_phy, ret);
+       return ret;
 }
 
 static inline int
 rdev_set_lbt_mode(struct cfg802154_registered_device *rdev,
                  struct wpan_dev *wpan_dev, bool mode)
 {
-       return rdev->ops->set_lbt_mode(&rdev->wpan_phy, wpan_dev, mode);
+       int ret;
+
+       trace_802154_rdev_set_lbt_mode(&rdev->wpan_phy, wpan_dev, mode);
+       ret = rdev->ops->set_lbt_mode(&rdev->wpan_phy, wpan_dev, mode);
+       trace_802154_rdev_return_int(&rdev->wpan_phy, ret);
+       return ret;
 }
 
 #endif /* __CFG802154_RDEV_OPS */
diff --git a/net/ieee802154/trace.c b/net/ieee802154/trace.c
new file mode 100644 (file)
index 0000000..95f997f
--- /dev/null
@@ -0,0 +1,7 @@
+#include <linux/module.h>
+
+#ifndef __CHECKER__
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
+#endif
diff --git a/net/ieee802154/trace.h b/net/ieee802154/trace.h
new file mode 100644 (file)
index 0000000..5ac25eb
--- /dev/null
@@ -0,0 +1,247 @@
+/* Based on net/wireless/tracing.h */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM cfg802154
+
+#if !defined(__RDEV_CFG802154_OPS_TRACE) || defined(TRACE_HEADER_MULTI_READ)
+#define __RDEV_CFG802154_OPS_TRACE
+
+#include <linux/tracepoint.h>
+
+#include <net/cfg802154.h>
+
+#define MAXNAME                32
+#define WPAN_PHY_ENTRY __array(char, wpan_phy_name, MAXNAME)
+#define WPAN_PHY_ASSIGN        strlcpy(__entry->wpan_phy_name,  \
+                               wpan_phy_name(wpan_phy), \
+                               MAXNAME)
+#define WPAN_PHY_PR_FMT        "%s"
+#define WPAN_PHY_PR_ARG        __entry->wpan_phy_name
+
+#define WPAN_DEV_ENTRY __field(u32, identifier)
+#define WPAN_DEV_ASSIGN        (__entry->identifier) = (!IS_ERR_OR_NULL(wpan_dev) \
+                                        ? wpan_dev->identifier : 0)
+#define WPAN_DEV_PR_FMT        "wpan_dev(%u)"
+#define WPAN_DEV_PR_ARG        (__entry->identifier)
+
+#define WPAN_CCA_ENTRY __field(enum nl802154_cca_modes, cca_mode) \
+                       __field(enum nl802154_cca_opts, cca_opt)
+#define WPAN_CCA_ASSIGN \
+       do {                                     \
+               (__entry->cca_mode) = cca->mode; \
+               (__entry->cca_opt) = cca->opt;   \
+       } while (0)
+#define WPAN_CCA_PR_FMT        "cca_mode: %d, cca_opt: %d"
+#define WPAN_CCA_PR_ARG __entry->cca_mode, __entry->cca_opt
+
+#define BOOL_TO_STR(bo) (bo) ? "true" : "false"
+
+/*************************************************************
+ *                     rdev->ops traces                     *
+ *************************************************************/
+
+TRACE_EVENT(802154_rdev_add_virtual_intf,
+       TP_PROTO(struct wpan_phy *wpan_phy, char *name,
+                enum nl802154_iftype type, __le64 extended_addr),
+       TP_ARGS(wpan_phy, name, type, extended_addr),
+       TP_STRUCT__entry(
+               WPAN_PHY_ENTRY
+               __string(vir_intf_name, name ? name : "<noname>")
+               __field(enum nl802154_iftype, type)
+               __field(__le64, extended_addr)
+       ),
+       TP_fast_assign(
+               WPAN_PHY_ASSIGN;
+               __assign_str(vir_intf_name, name ? name : "<noname>");
+               __entry->type = type;
+               __entry->extended_addr = extended_addr;
+       ),
+       TP_printk(WPAN_PHY_PR_FMT ", virtual intf name: %s, type: %d, ea %llx",
+                 WPAN_PHY_PR_ARG, __get_str(vir_intf_name), __entry->type,
+                 __le64_to_cpu(__entry->extended_addr))
+);
+
+TRACE_EVENT(802154_rdev_del_virtual_intf,
+       TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev),
+       TP_ARGS(wpan_phy, wpan_dev),
+       TP_STRUCT__entry(
+               WPAN_PHY_ENTRY
+               WPAN_DEV_ENTRY
+       ),
+       TP_fast_assign(
+               WPAN_PHY_ASSIGN;
+               WPAN_DEV_ASSIGN;
+       ),
+       TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT, WPAN_PHY_PR_ARG,
+                 WPAN_DEV_PR_ARG)
+);
+
+TRACE_EVENT(802154_rdev_set_channel,
+       TP_PROTO(struct wpan_phy *wpan_phy, u8 page, u8 channel),
+       TP_ARGS(wpan_phy, page, channel),
+       TP_STRUCT__entry(
+               WPAN_PHY_ENTRY
+               __field(u8, page)
+               __field(u8, channel)
+       ),
+       TP_fast_assign(
+               WPAN_PHY_ASSIGN;
+               __entry->page = page;
+               __entry->channel = channel;
+       ),
+       TP_printk(WPAN_PHY_PR_FMT ", page: %d, channel: %d", WPAN_PHY_PR_ARG,
+                 __entry->page, __entry->channel)
+);
+
+TRACE_EVENT(802154_rdev_set_cca_mode,
+       TP_PROTO(struct wpan_phy *wpan_phy, const struct wpan_phy_cca *cca),
+       TP_ARGS(wpan_phy, cca),
+       TP_STRUCT__entry(
+               WPAN_PHY_ENTRY
+               WPAN_CCA_ENTRY
+       ),
+       TP_fast_assign(
+               WPAN_PHY_ASSIGN;
+               WPAN_CCA_ASSIGN;
+       ),
+       TP_printk(WPAN_PHY_PR_FMT ", " WPAN_CCA_PR_FMT, WPAN_PHY_PR_ARG,
+                 WPAN_CCA_PR_ARG)
+);
+
+DECLARE_EVENT_CLASS(802154_le16_template,
+       TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
+                __le16 le16arg),
+       TP_ARGS(wpan_phy, wpan_dev, le16arg),
+       TP_STRUCT__entry(
+               WPAN_PHY_ENTRY
+               WPAN_DEV_ENTRY
+               __field(__le16, le16arg)
+       ),
+       TP_fast_assign(
+               WPAN_PHY_ASSIGN;
+               WPAN_DEV_ASSIGN;
+               __entry->le16arg = le16arg;
+       ),
+       TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT ", pan id: 0x%04x",
+                 WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG,
+                 __le16_to_cpu(__entry->le16arg))
+);
+
+DEFINE_EVENT(802154_le16_template, 802154_rdev_set_pan_id,
+       TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
+                __le16 le16arg),
+       TP_ARGS(wpan_phy, wpan_dev, le16arg)
+);
+
+DEFINE_EVENT_PRINT(802154_le16_template, 802154_rdev_set_short_addr,
+       TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
+                __le16 le16arg),
+       TP_ARGS(wpan_phy, wpan_dev, le16arg),
+       TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT ", sa: 0x%04x",
+                 WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG,
+                 __le16_to_cpu(__entry->le16arg))
+);
+
+TRACE_EVENT(802154_rdev_set_backoff_exponent,
+       TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
+                u8 min_be, u8 max_be),
+       TP_ARGS(wpan_phy, wpan_dev, min_be, max_be),
+       TP_STRUCT__entry(
+               WPAN_PHY_ENTRY
+               WPAN_DEV_ENTRY
+               __field(u8, min_be)
+               __field(u8, max_be)
+       ),
+       TP_fast_assign(
+               WPAN_PHY_ASSIGN;
+               WPAN_DEV_ASSIGN;
+               __entry->min_be = min_be;
+               __entry->max_be = max_be;
+       ),
+
+       TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT
+                 ", min be: %d, max_be: %d", WPAN_PHY_PR_ARG,
+                 WPAN_DEV_PR_ARG, __entry->min_be, __entry->max_be)
+);
+
+TRACE_EVENT(802154_rdev_set_csma_backoffs,
+       TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
+                u8 max_csma_backoffs),
+       TP_ARGS(wpan_phy, wpan_dev, max_csma_backoffs),
+       TP_STRUCT__entry(
+               WPAN_PHY_ENTRY
+               WPAN_DEV_ENTRY
+               __field(u8, max_csma_backoffs)
+       ),
+       TP_fast_assign(
+               WPAN_PHY_ASSIGN;
+               WPAN_DEV_ASSIGN;
+               __entry->max_csma_backoffs = max_csma_backoffs;
+       ),
+
+       TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT
+                 ", max csma backoffs: %d", WPAN_PHY_PR_ARG,
+                 WPAN_DEV_PR_ARG, __entry->max_csma_backoffs)
+);
+
+TRACE_EVENT(802154_rdev_set_max_frame_retries,
+       TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
+                s8 max_frame_retries),
+       TP_ARGS(wpan_phy, wpan_dev, max_frame_retries),
+       TP_STRUCT__entry(
+               WPAN_PHY_ENTRY
+               WPAN_DEV_ENTRY
+               __field(s8, max_frame_retries)
+       ),
+       TP_fast_assign(
+               WPAN_PHY_ASSIGN;
+               WPAN_DEV_ASSIGN;
+               __entry->max_frame_retries = max_frame_retries;
+       ),
+
+       TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT
+                 ", max frame retries: %d", WPAN_PHY_PR_ARG,
+                 WPAN_DEV_PR_ARG, __entry->max_frame_retries)
+);
+
+TRACE_EVENT(802154_rdev_set_lbt_mode,
+       TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
+                bool mode),
+       TP_ARGS(wpan_phy, wpan_dev, mode),
+       TP_STRUCT__entry(
+               WPAN_PHY_ENTRY
+               WPAN_DEV_ENTRY
+               __field(bool, mode)
+       ),
+       TP_fast_assign(
+               WPAN_PHY_ASSIGN;
+               WPAN_DEV_ASSIGN;
+               __entry->mode = mode;
+       ),
+       TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT
+               ", lbt mode: %s", WPAN_PHY_PR_ARG,
+               WPAN_DEV_PR_ARG, BOOL_TO_STR(__entry->mode))
+);
+
+TRACE_EVENT(802154_rdev_return_int,
+       TP_PROTO(struct wpan_phy *wpan_phy, int ret),
+       TP_ARGS(wpan_phy, ret),
+       TP_STRUCT__entry(
+               WPAN_PHY_ENTRY
+               __field(int, ret)
+       ),
+       TP_fast_assign(
+               WPAN_PHY_ASSIGN;
+               __entry->ret = ret;
+       ),
+       TP_printk(WPAN_PHY_PR_FMT ", returned: %d", WPAN_PHY_PR_ARG,
+                 __entry->ret)
+);
+
+#endif /* !__RDEV_CFG802154_OPS_TRACE || TRACE_HEADER_MULTI_READ */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace
+#include <trace/define_trace.h>
index 421a80b..30b544f 100644 (file)
@@ -256,7 +256,8 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
        aead_givcrypt_set_crypt(req, sg, sg, clen, iv);
        aead_givcrypt_set_assoc(req, asg, assoclen);
        aead_givcrypt_set_giv(req, esph->enc_data,
-                             XFRM_SKB_CB(skb)->seq.output.low);
+                             XFRM_SKB_CB(skb)->seq.output.low +
+                             ((u64)XFRM_SKB_CB(skb)->seq.output.hi << 32));
 
        ESP_SKB_CB(skb)->tmp = tmp;
        err = crypto_aead_givencrypt(req);
index e13fcc6..09b62e1 100644 (file)
@@ -1164,6 +1164,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
                        state = fa->fa_state;
                        new_fa->fa_state = state & ~FA_S_ACCESSED;
                        new_fa->fa_slen = fa->fa_slen;
+                       new_fa->tb_id = tb->tb_id;
 
                        err = netdev_switch_fib_ipv4_add(key, plen, fi,
                                                         new_fa->fa_tos,
@@ -1764,7 +1765,7 @@ void fib_table_flush_external(struct fib_table *tb)
                        /* record local slen */
                        slen = fa->fa_slen;
 
-                       if (!fi || !(fi->fib_flags & RTNH_F_EXTERNAL))
+                       if (!fi || !(fi->fib_flags & RTNH_F_OFFLOAD))
                                continue;
 
                        netdev_switch_fib_ipv4_del(n->key,
index bb77ebd..4d32262 100644 (file)
@@ -224,14 +224,16 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
        handler->idiag_get_info(sk, r, info);
 
        if (sk->sk_state < TCP_TIME_WAIT) {
-               int err = 0;
+               union tcp_cc_info info;
+               size_t sz = 0;
+               int attr;
 
                rcu_read_lock();
                ca_ops = READ_ONCE(icsk->icsk_ca_ops);
                if (ca_ops && ca_ops->get_info)
-                       err = ca_ops->get_info(sk, ext, skb);
+                       sz = ca_ops->get_info(sk, ext, &attr, &info);
                rcu_read_unlock();
-               if (err < 0)
+               if (sz && nla_put(skb, attr, sz, &info) < 0)
                        goto errout;
        }
 
index 9f7269f..0c15208 100644 (file)
@@ -65,7 +65,6 @@ static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi,
                        goto drop;
 
                XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel;
-               skb->mark = be32_to_cpu(tunnel->parms.i_key);
 
                return xfrm_input(skb, nexthdr, spi, encap_type);
        }
@@ -91,6 +90,8 @@ static int vti_rcv_cb(struct sk_buff *skb, int err)
        struct pcpu_sw_netstats *tstats;
        struct xfrm_state *x;
        struct ip_tunnel *tunnel = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4;
+       u32 orig_mark = skb->mark;
+       int ret;
 
        if (!tunnel)
                return 1;
@@ -107,7 +108,11 @@ static int vti_rcv_cb(struct sk_buff *skb, int err)
        x = xfrm_input_state(skb);
        family = x->inner_mode->afinfo->family;
 
-       if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family))
+       skb->mark = be32_to_cpu(tunnel->parms.i_key);
+       ret = xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family);
+       skb->mark = orig_mark;
+
+       if (!ret)
                return -EPERM;
 
        skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(skb->dev)));
@@ -216,8 +221,6 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 
        memset(&fl, 0, sizeof(fl));
 
-       skb->mark = be32_to_cpu(tunnel->parms.o_key);
-
        switch (skb->protocol) {
        case htons(ETH_P_IP):
                xfrm_decode_session(skb, &fl, AF_INET);
@@ -233,6 +236,9 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
                return NETDEV_TX_OK;
        }
 
+       /* override mark with tunnel output key */
+       fl.flowi_mark = be32_to_cpu(tunnel->parms.o_key);
+
        return vti_xmit(skb, dev, &fl);
 }
 
index 13bfe84..a612007 100644 (file)
@@ -1075,6 +1075,9 @@ static int do_replace(struct net *net, const void __user *user,
        /* overflow check */
        if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
                return -ENOMEM;
+       if (tmp.num_counters == 0)
+               return -EINVAL;
+
        tmp.name[sizeof(tmp.name)-1] = 0;
 
        newinfo = xt_alloc_table_info(tmp.size);
@@ -1499,6 +1502,9 @@ static int compat_do_replace(struct net *net, void __user *user,
                return -ENOMEM;
        if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
                return -ENOMEM;
+       if (tmp.num_counters == 0)
+               return -EINVAL;
+
        tmp.name[sizeof(tmp.name)-1] = 0;
 
        newinfo = xt_alloc_table_info(tmp.size);
index c69db7f..2d0e265 100644 (file)
@@ -1262,6 +1262,9 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
        /* overflow check */
        if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
                return -ENOMEM;
+       if (tmp.num_counters == 0)
+               return -EINVAL;
+
        tmp.name[sizeof(tmp.name)-1] = 0;
 
        newinfo = xt_alloc_table_info(tmp.size);
@@ -1809,6 +1812,9 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
                return -ENOMEM;
        if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
                return -ENOMEM;
+       if (tmp.num_counters == 0)
+               return -EINVAL;
+
        tmp.name[sizeof(tmp.name)-1] = 0;
 
        newinfo = xt_alloc_table_info(tmp.size);
index bff62fc..f45f2a1 100644 (file)
@@ -902,6 +902,10 @@ static int ip_error(struct sk_buff *skb)
        bool send;
        int code;
 
+       /* IP on this device is disabled. */
+       if (!in_dev)
+               goto out;
+
        net = dev_net(rt->dst.dev);
        if (!IN_DEV_FORWARD(in_dev)) {
                switch (rt->dst.error) {
index 8c5cd9e..f1377f2 100644 (file)
 #include <linux/types.h>
 #include <linux/fcntl.h>
 #include <linux/poll.h>
+#include <linux/inet_diag.h>
 #include <linux/init.h>
 #include <linux/fs.h>
 #include <linux/skbuff.h>
@@ -401,6 +402,7 @@ void tcp_init_sock(struct sock *sk)
        tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
        tp->snd_cwnd_clamp = ~0;
        tp->mss_cache = TCP_MSS_DEFAULT;
+       u64_stats_init(&tp->syncp);
 
        tp->reordering = sysctl_tcp_reordering;
        tcp_enable_early_retrans(tp);
@@ -2592,11 +2594,12 @@ EXPORT_SYMBOL(compat_tcp_setsockopt);
 #endif
 
 /* Return information about state of tcp endpoint in API format. */
-void tcp_get_info(const struct sock *sk, struct tcp_info *info)
+void tcp_get_info(struct sock *sk, struct tcp_info *info)
 {
        const struct tcp_sock *tp = tcp_sk(sk);
        const struct inet_connection_sock *icsk = inet_csk(sk);
        u32 now = tcp_time_stamp;
+       unsigned int start;
        u32 rate;
 
        memset(info, 0, sizeof(*info));
@@ -2663,6 +2666,12 @@ void tcp_get_info(const struct sock *sk, struct tcp_info *info)
 
        rate = READ_ONCE(sk->sk_max_pacing_rate);
        info->tcpi_max_pacing_rate = rate != ~0U ? rate : ~0ULL;
+
+       do {
+               start = u64_stats_fetch_begin_irq(&tp->syncp);
+               info->tcpi_bytes_acked = tp->bytes_acked;
+               info->tcpi_bytes_received = tp->bytes_received;
+       } while (u64_stats_fetch_retry_irq(&tp->syncp, start));
 }
 EXPORT_SYMBOL_GPL(tcp_get_info);
 
@@ -2734,6 +2743,26 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
                        return -EFAULT;
                return 0;
        }
+       case TCP_CC_INFO: {
+               const struct tcp_congestion_ops *ca_ops;
+               union tcp_cc_info info;
+               size_t sz = 0;
+               int attr;
+
+               if (get_user(len, optlen))
+                       return -EFAULT;
+
+               ca_ops = icsk->icsk_ca_ops;
+               if (ca_ops && ca_ops->get_info)
+                       sz = ca_ops->get_info(sk, ~0U, &attr, &info);
+
+               len = min_t(unsigned int, len, sz);
+               if (put_user(len, optlen))
+                       return -EFAULT;
+               if (copy_to_user(optval, &info, len))
+                       return -EFAULT;
+               return 0;
+       }
        case TCP_QUICKACK:
                val = !icsk->icsk_ack.pingpong;
                break;
index 7a5ae50..84be008 100644 (file)
@@ -187,6 +187,7 @@ static void tcp_reinit_congestion_control(struct sock *sk,
 
        tcp_cleanup_congestion_control(sk);
        icsk->icsk_ca_ops = ca;
+       icsk->icsk_ca_setsockopt = 1;
 
        if (sk->sk_state != TCP_CLOSE && icsk->icsk_ca_ops->init)
                icsk->icsk_ca_ops->init(sk);
@@ -335,8 +336,10 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
        rcu_read_lock();
        ca = __tcp_ca_find_autoload(name);
        /* No change asking for existing value */
-       if (ca == icsk->icsk_ca_ops)
+       if (ca == icsk->icsk_ca_ops) {
+               icsk->icsk_ca_setsockopt = 1;
                goto out;
+       }
        if (!ca)
                err = -ENOENT;
        else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) ||
index 4376016..4c41c12 100644 (file)
@@ -277,7 +277,8 @@ static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
        }
 }
 
-static int dctcp_get_info(struct sock *sk, u32 ext, struct sk_buff *skb)
+static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr,
+                            union tcp_cc_info *info)
 {
        const struct dctcp *ca = inet_csk_ca(sk);
 
@@ -286,18 +287,17 @@ static int dctcp_get_info(struct sock *sk, u32 ext, struct sk_buff *skb)
         */
        if (ext & (1 << (INET_DIAG_DCTCPINFO - 1)) ||
            ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
-               struct tcp_dctcp_info info;
-
-               memset(&info, 0, sizeof(info));
+               memset(info, 0, sizeof(struct tcp_dctcp_info));
                if (inet_csk(sk)->icsk_ca_ops != &dctcp_reno) {
-                       info.dctcp_enabled = 1;
-                       info.dctcp_ce_state = (u16) ca->ce_state;
-                       info.dctcp_alpha = ca->dctcp_alpha;
-                       info.dctcp_ab_ecn = ca->acked_bytes_ecn;
-                       info.dctcp_ab_tot = ca->acked_bytes_total;
+                       info->dctcp.dctcp_enabled = 1;
+                       info->dctcp.dctcp_ce_state = (u16) ca->ce_state;
+                       info->dctcp.dctcp_alpha = ca->dctcp_alpha;
+                       info->dctcp.dctcp_ab_ecn = ca->acked_bytes_ecn;
+                       info->dctcp.dctcp_ab_tot = ca->acked_bytes_total;
                }
 
-               return nla_put(skb, INET_DIAG_DCTCPINFO, sizeof(info), &info);
+               *attr = INET_DIAG_DCTCPINFO;
+               return sizeof(*info);
        }
        return 0;
 }
index e3d87ac..46b087a 100644 (file)
@@ -206,6 +206,11 @@ static bool tcp_fastopen_create_child(struct sock *sk,
                        skb_set_owner_r(skb2, child);
                        __skb_queue_tail(&child->sk_receive_queue, skb2);
                        tp->syn_data_acked = 1;
+
+                       /* u64_stats_update_begin(&tp->syncp) not needed here,
+                        * as we certainly are not changing upper 32bit value (0)
+                        */
+                       tp->bytes_received = end_seq - TCP_SKB_CB(skb)->seq - 1;
                } else {
                        end_seq = TCP_SKB_CB(skb)->seq + 1;
                }
index 67476f0..f71002e 100644 (file)
@@ -300,24 +300,25 @@ static u32 tcp_illinois_ssthresh(struct sock *sk)
 }
 
 /* Extract info for Tcp socket info provided via netlink. */
-static int tcp_illinois_info(struct sock *sk, u32 ext, struct sk_buff *skb)
+static size_t tcp_illinois_info(struct sock *sk, u32 ext, int *attr,
+                               union tcp_cc_info *info)
 {
        const struct illinois *ca = inet_csk_ca(sk);
 
        if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
-               struct tcpvegas_info info = {
-                       .tcpv_enabled = 1,
-                       .tcpv_rttcnt = ca->cnt_rtt,
-                       .tcpv_minrtt = ca->base_rtt,
-               };
+               info->vegas.tcpv_enabled = 1;
+               info->vegas.tcpv_rttcnt = ca->cnt_rtt;
+               info->vegas.tcpv_minrtt = ca->base_rtt;
+               info->vegas.tcpv_rtt = 0;
 
-               if (info.tcpv_rttcnt > 0) {
+               if (info->vegas.tcpv_rttcnt > 0) {
                        u64 t = ca->sum_rtt;
 
-                       do_div(t, info.tcpv_rttcnt);
-                       info.tcpv_rtt = t;
+                       do_div(t, info->vegas.tcpv_rttcnt);
+                       info->vegas.tcpv_rtt = t;
                }
-               return nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info);
+               *attr = INET_DIAG_VEGASINFO;
+               return sizeof(struct tcpvegas_info);
        }
        return 0;
 }
index 3a4d9b3..c9ab964 100644 (file)
@@ -1820,14 +1820,12 @@ advance_sp:
        for (j = 0; j < used_sacks; j++)
                tp->recv_sack_cache[i++] = sp[j];
 
-       tcp_mark_lost_retrans(sk);
-
-       tcp_verify_left_out(tp);
-
        if ((state.reord < tp->fackets_out) &&
            ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker))
                tcp_update_reordering(sk, tp->fackets_out - state.reord, 0);
 
+       tcp_mark_lost_retrans(sk);
+       tcp_verify_left_out(tp);
 out:
 
 #if FASTRETRANS_DEBUG > 0
@@ -2700,16 +2698,21 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
        struct tcp_sock *tp = tcp_sk(sk);
        bool recovered = !before(tp->snd_una, tp->high_seq);
 
+       if ((flag & FLAG_SND_UNA_ADVANCED) &&
+           tcp_try_undo_loss(sk, false))
+               return;
+
        if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */
                /* Step 3.b. A timeout is spurious if not all data are
                 * lost, i.e., never-retransmitted data are (s)acked.
                 */
-               if (tcp_try_undo_loss(sk, flag & FLAG_ORIG_SACK_ACKED))
+               if ((flag & FLAG_ORIG_SACK_ACKED) &&
+                   tcp_try_undo_loss(sk, true))
                        return;
 
-               if (after(tp->snd_nxt, tp->high_seq) &&
-                   (flag & FLAG_DATA_SACKED || is_dupack)) {
-                       tp->frto = 0; /* Loss was real: 2nd part of step 3.a */
+               if (after(tp->snd_nxt, tp->high_seq)) {
+                       if (flag & FLAG_DATA_SACKED || is_dupack)
+                               tp->frto = 0; /* Step 3.a. loss was real */
                } else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) {
                        tp->high_seq = tp->snd_nxt;
                        __tcp_push_pending_frames(sk, tcp_current_mss(sk),
@@ -2734,8 +2737,6 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
                else if (flag & FLAG_SND_UNA_ADVANCED)
                        tcp_reset_reno_sack(tp);
        }
-       if (tcp_try_undo_loss(sk, false))
-               return;
        tcp_xmit_retransmit_queue(sk);
 }
 
@@ -3280,6 +3281,28 @@ static inline bool tcp_may_update_window(const struct tcp_sock *tp,
                (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd);
 }
 
+/* If we update tp->snd_una, also update tp->bytes_acked */
+static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack)
+{
+       u32 delta = ack - tp->snd_una;
+
+       u64_stats_update_begin(&tp->syncp);
+       tp->bytes_acked += delta;
+       u64_stats_update_end(&tp->syncp);
+       tp->snd_una = ack;
+}
+
+/* If we update tp->rcv_nxt, also update tp->bytes_received */
+static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq)
+{
+       u32 delta = seq - tp->rcv_nxt;
+
+       u64_stats_update_begin(&tp->syncp);
+       tp->bytes_received += delta;
+       u64_stats_update_end(&tp->syncp);
+       tp->rcv_nxt = seq;
+}
+
 /* Update our send window.
  *
  * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2
@@ -3315,7 +3338,7 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
                }
        }
 
-       tp->snd_una = ack;
+       tcp_snd_una_update(tp, ack);
 
        return flag;
 }
@@ -3497,7 +3520,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
                 * Note, we use the fact that SND.UNA>=SND.WL2.
                 */
                tcp_update_wl(tp, ack_seq);
-               tp->snd_una = ack;
+               tcp_snd_una_update(tp, ack);
                flag |= FLAG_WIN_UPDATE;
 
                tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE);
@@ -4236,7 +4259,7 @@ static void tcp_ofo_queue(struct sock *sk)
 
                tail = skb_peek_tail(&sk->sk_receive_queue);
                eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
-               tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+               tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
                if (!eaten)
                        __skb_queue_tail(&sk->sk_receive_queue, skb);
                if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
@@ -4404,7 +4427,7 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int
        __skb_pull(skb, hdrlen);
        eaten = (tail &&
                 tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0;
-       tcp_sk(sk)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+       tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);
        if (!eaten) {
                __skb_queue_tail(&sk->sk_receive_queue, skb);
                skb_set_owner_r(skb, sk);
@@ -4497,7 +4520,7 @@ queue_and_out:
 
                        eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
                }
-               tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+               tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
                if (skb->len)
                        tcp_event_data_recv(sk, skb);
                if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
@@ -5245,7 +5268,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
                                        tcp_rcv_rtt_measure_ts(sk, skb);
 
                                        __skb_pull(skb, tcp_header_len);
-                                       tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+                                       tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
                                        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITSTOUSER);
                                        eaten = 1;
                                }
index e5d7649..17e7339 100644 (file)
@@ -300,7 +300,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
                        tw->tw_v6_daddr = sk->sk_v6_daddr;
                        tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
                        tw->tw_tclass = np->tclass;
-                       tw->tw_flowlabel = np->flow_label >> 12;
+                       tw->tw_flowlabel = be32_to_cpu(np->flow_label & IPV6_FLOWLABEL_MASK);
                        tw->tw_ipv6only = sk->sk_ipv6only;
                }
 #endif
@@ -420,7 +420,10 @@ void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst)
                rcu_read_unlock();
        }
 
-       if (!ca_got_dst && !try_module_get(icsk->icsk_ca_ops->owner))
+       /* If no valid choice made yet, assign current system default ca. */
+       if (!ca_got_dst &&
+           (!icsk->icsk_ca_setsockopt ||
+            !try_module_get(icsk->icsk_ca_ops->owner)))
                tcp_assign_congestion_control(sk);
 
        tcp_set_ca_state(sk, TCP_CA_Open);
index c71a1b8..a6cea1d 100644 (file)
@@ -286,18 +286,19 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 }
 
 /* Extract info for Tcp socket info provided via netlink. */
-int tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb)
+size_t tcp_vegas_get_info(struct sock *sk, u32 ext, int *attr,
+                         union tcp_cc_info *info)
 {
        const struct vegas *ca = inet_csk_ca(sk);
+
        if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
-               struct tcpvegas_info info = {
-                       .tcpv_enabled = ca->doing_vegas_now,
-                       .tcpv_rttcnt = ca->cntRTT,
-                       .tcpv_rtt = ca->baseRTT,
-                       .tcpv_minrtt = ca->minRTT,
-               };
-
-               return nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info);
+               info->vegas.tcpv_enabled = ca->doing_vegas_now,
+               info->vegas.tcpv_rttcnt = ca->cntRTT,
+               info->vegas.tcpv_rtt = ca->baseRTT,
+               info->vegas.tcpv_minrtt = ca->minRTT,
+
+               *attr = INET_DIAG_VEGASINFO;
+               return sizeof(struct tcpvegas_info);
        }
        return 0;
 }
index e8a6b33..ef9da53 100644 (file)
@@ -19,6 +19,7 @@ void tcp_vegas_init(struct sock *sk);
 void tcp_vegas_state(struct sock *sk, u8 ca_state);
 void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, s32 rtt_us);
 void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event);
-int tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb);
+size_t tcp_vegas_get_info(struct sock *sk, u32 ext, int *attr,
+                         union tcp_cc_info *info);
 
 #endif /* __TCP_VEGAS_H */
index b3c57cc..c10732e 100644 (file)
@@ -256,18 +256,19 @@ static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event)
 }
 
 /* Extract info for Tcp socket info provided via netlink. */
-static int tcp_westwood_info(struct sock *sk, u32 ext, struct sk_buff *skb)
+static size_t tcp_westwood_info(struct sock *sk, u32 ext, int *attr,
+                               union tcp_cc_info *info)
 {
        const struct westwood *ca = inet_csk_ca(sk);
 
        if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
-               struct tcpvegas_info info = {
-                       .tcpv_enabled = 1,
-                       .tcpv_rtt = jiffies_to_usecs(ca->rtt),
-                       .tcpv_minrtt = jiffies_to_usecs(ca->rtt_min),
-               };
+               info->vegas.tcpv_enabled = 1;
+               info->vegas.tcpv_rttcnt = 0;
+               info->vegas.tcpv_rtt    = jiffies_to_usecs(ca->rtt),
+               info->vegas.tcpv_minrtt = jiffies_to_usecs(ca->rtt_min),
 
-               return nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info);
+               *attr = INET_DIAG_VEGASINFO;
+               return sizeof(struct tcpvegas_info);
        }
        return 0;
 }
index d10b7e0..83aa604 100644 (file)
@@ -90,6 +90,7 @@
 #include <linux/socket.h>
 #include <linux/sockios.h>
 #include <linux/igmp.h>
+#include <linux/inetdevice.h>
 #include <linux/in.h>
 #include <linux/errno.h>
 #include <linux/timer.h>
@@ -1345,10 +1346,8 @@ csum_copy_err:
        }
        unlock_sock_fast(sk, slow);
 
-       if (noblock)
-               return -EAGAIN;
-
-       /* starting over for a new packet */
+       /* starting over for a new packet, but check if we need to yield */
+       cond_resched();
        msg->msg_flags &= ~MSG_TRUNC;
        goto try_again;
 }
@@ -1962,6 +1961,7 @@ void udp_v4_early_demux(struct sk_buff *skb)
        struct sock *sk;
        struct dst_entry *dst;
        int dif = skb->dev->ifindex;
+       int ours;
 
        /* validate the packet */
        if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr)))
@@ -1971,14 +1971,24 @@ void udp_v4_early_demux(struct sk_buff *skb)
        uh = udp_hdr(skb);
 
        if (skb->pkt_type == PACKET_BROADCAST ||
-           skb->pkt_type == PACKET_MULTICAST)
+           skb->pkt_type == PACKET_MULTICAST) {
+               struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
+
+               if (!in_dev)
+                       return;
+
+               ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr,
+                                      iph->protocol);
+               if (!ours)
+                       return;
                sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr,
                                                   uh->source, iph->saddr, dif);
-       else if (skb->pkt_type == PACKET_HOST)
+       } else if (skb->pkt_type == PACKET_HOST) {
                sk = __udp4_lib_demux_lookup(net, uh->dest, iph->daddr,
                                             uh->source, iph->saddr, dif);
-       else
+       } else {
                return;
+       }
 
        if (!sk)
                return;
index d873cee..ca09bf4 100644 (file)
@@ -133,6 +133,14 @@ static void snmp6_free_dev(struct inet6_dev *idev)
        free_percpu(idev->stats.ipv6);
 }
 
+static void in6_dev_finish_destroy_rcu(struct rcu_head *head)
+{
+       struct inet6_dev *idev = container_of(head, struct inet6_dev, rcu);
+
+       snmp6_free_dev(idev);
+       kfree(idev);
+}
+
 /* Nobody refers to this device, we may destroy it. */
 
 void in6_dev_finish_destroy(struct inet6_dev *idev)
@@ -151,7 +159,6 @@ void in6_dev_finish_destroy(struct inet6_dev *idev)
                pr_warn("Freeing alive inet6 device %p\n", idev);
                return;
        }
-       snmp6_free_dev(idev);
-       kfree_rcu(idev, rcu);
+       call_rcu(&idev->rcu, in6_dev_finish_destroy_rcu);
 }
 EXPORT_SYMBOL(in6_dev_finish_destroy);
index 31f1b5d..7c07ce3 100644 (file)
@@ -248,7 +248,8 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
        aead_givcrypt_set_crypt(req, sg, sg, clen, iv);
        aead_givcrypt_set_assoc(req, asg, assoclen);
        aead_givcrypt_set_giv(req, esph->enc_data,
-                             XFRM_SKB_CB(skb)->seq.output.low);
+                             XFRM_SKB_CB(skb)->seq.output.low +
+                             ((u64)XFRM_SKB_CB(skb)->seq.output.hi << 32));
 
        ESP_SKB_CB(skb)->tmp = tmp;
        err = crypto_aead_givencrypt(req);
index 96dbfff..bde57b1 100644 (file)
@@ -693,6 +693,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 {
        struct rt6_info *iter = NULL;
        struct rt6_info **ins;
+       struct rt6_info **fallback_ins = NULL;
        int replace = (info->nlh &&
                       (info->nlh->nlmsg_flags & NLM_F_REPLACE));
        int add = (!info->nlh ||
@@ -716,8 +717,13 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
                            (info->nlh->nlmsg_flags & NLM_F_EXCL))
                                return -EEXIST;
                        if (replace) {
-                               found++;
-                               break;
+                               if (rt_can_ecmp == rt6_qualify_for_ecmp(iter)) {
+                                       found++;
+                                       break;
+                               }
+                               if (rt_can_ecmp)
+                                       fallback_ins = fallback_ins ?: ins;
+                               goto next_iter;
                        }
 
                        if (iter->dst.dev == rt->dst.dev &&
@@ -753,9 +759,17 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
                if (iter->rt6i_metric > rt->rt6i_metric)
                        break;
 
+next_iter:
                ins = &iter->dst.rt6_next;
        }
 
+       if (fallback_ins && !found) {
+               /* No ECMP-able route found, replace first non-ECMP one */
+               ins = fallback_ins;
+               iter = *ins;
+               found++;
+       }
+
        /* Reset round-robin state, if necessary */
        if (ins == &fn->leaf)
                fn->rr_ptr = NULL;
@@ -815,6 +829,8 @@ add:
                }
 
        } else {
+               int nsiblings;
+
                if (!found) {
                        if (add)
                                goto add;
@@ -835,8 +851,27 @@ add:
                        info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
                        fn->fn_flags |= RTN_RTINFO;
                }
+               nsiblings = iter->rt6i_nsiblings;
                fib6_purge_rt(iter, fn, info->nl_net);
                rt6_release(iter);
+
+               if (nsiblings) {
+                       /* Replacing an ECMP route, remove all siblings */
+                       ins = &rt->dst.rt6_next;
+                       iter = *ins;
+                       while (iter) {
+                               if (rt6_qualify_for_ecmp(iter)) {
+                                       *ins = iter->dst.rt6_next;
+                                       fib6_purge_rt(iter, fn, info->nl_net);
+                                       rt6_release(iter);
+                                       nsiblings--;
+                               } else {
+                                       ins = &iter->dst.rt6_next;
+                               }
+                               iter = *ins;
+                       }
+                       WARN_ON(nsiblings != 0);
+               }
        }
 
        return 0;
index 7fde1f2..bc09cb9 100644 (file)
@@ -886,22 +886,45 @@ static int ip6_dst_lookup_tail(struct sock *sk,
 #endif
        int err;
 
-       if (!*dst)
-               *dst = ip6_route_output(net, sk, fl6);
-
-       err = (*dst)->error;
-       if (err)
-               goto out_err_release;
+       /* The correct way to handle this would be to do
+        * ip6_route_get_saddr, and then ip6_route_output; however,
+        * the route-specific preferred source forces the
+        * ip6_route_output call _before_ ip6_route_get_saddr.
+        *
+        * In source specific routing (no src=any default route),
+        * ip6_route_output will fail given src=any saddr, though, so
+        * that's why we try it again later.
+        */
+       if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
+               struct rt6_info *rt;
+               bool had_dst = *dst != NULL;
 
-       if (ipv6_addr_any(&fl6->saddr)) {
-               struct rt6_info *rt = (struct rt6_info *) *dst;
+               if (!had_dst)
+                       *dst = ip6_route_output(net, sk, fl6);
+               rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
                err = ip6_route_get_saddr(net, rt, &fl6->daddr,
                                          sk ? inet6_sk(sk)->srcprefs : 0,
                                          &fl6->saddr);
                if (err)
                        goto out_err_release;
+
+               /* If we had an erroneous initial result, pretend it
+                * never existed and let the SA-enabled version take
+                * over.
+                */
+               if (!had_dst && (*dst)->error) {
+                       dst_release(*dst);
+                       *dst = NULL;
+               }
        }
 
+       if (!*dst)
+               *dst = ip6_route_output(net, sk, fl6);
+
+       err = (*dst)->error;
+       if (err)
+               goto out_err_release;
+
 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
        /*
         * Here if the dst entry we've looked up
@@ -1277,8 +1300,10 @@ emsgsize:
 
        /* If this is the first and only packet and device
         * supports checksum offloading, let's use it.
+        * Use transhdrlen, same as IPv4, because partial
+        * sums only work when transhdrlen is set.
         */
-       if (!skb && sk->sk_protocol == IPPROTO_UDP &&
+       if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
            length + fragheaderlen < mtu &&
            rt->dst.dev->features & NETIF_F_V6_CSUM &&
            !exthdrlen)
index ed9d681..0224c03 100644 (file)
@@ -322,7 +322,6 @@ static int vti6_rcv(struct sk_buff *skb)
                }
 
                XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t;
-               skb->mark = be32_to_cpu(t->parms.i_key);
 
                rcu_read_unlock();
 
@@ -342,6 +341,8 @@ static int vti6_rcv_cb(struct sk_buff *skb, int err)
        struct pcpu_sw_netstats *tstats;
        struct xfrm_state *x;
        struct ip6_tnl *t = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6;
+       u32 orig_mark = skb->mark;
+       int ret;
 
        if (!t)
                return 1;
@@ -358,7 +359,11 @@ static int vti6_rcv_cb(struct sk_buff *skb, int err)
        x = xfrm_input_state(skb);
        family = x->inner_mode->afinfo->family;
 
-       if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family))
+       skb->mark = be32_to_cpu(t->parms.i_key);
+       ret = xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family);
+       skb->mark = orig_mark;
+
+       if (!ret)
                return -EPERM;
 
        skb_scrub_packet(skb, !net_eq(t->net, dev_net(skb->dev)));
@@ -430,6 +435,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
        struct net_device *tdev;
        struct xfrm_state *x;
        int err = -1;
+       int mtu;
 
        if (!dst)
                goto tx_err_link_failure;
@@ -463,6 +469,19 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
        skb_dst_set(skb, dst);
        skb->dev = skb_dst(skb)->dev;
 
+       mtu = dst_mtu(dst);
+       if (!skb->ignore_df && skb->len > mtu) {
+               skb_dst(skb)->ops->update_pmtu(dst, NULL, skb, mtu);
+
+               if (skb->protocol == htons(ETH_P_IPV6))
+                       icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+               else
+                       icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+                                 htonl(mtu));
+
+               return -EMSGSIZE;
+       }
+
        err = dst_output(skb);
        if (net_xmit_eval(err) == 0) {
                struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);
@@ -495,7 +514,6 @@ vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
        int ret;
 
        memset(&fl, 0, sizeof(fl));
-       skb->mark = be32_to_cpu(t->parms.o_key);
 
        switch (skb->protocol) {
        case htons(ETH_P_IPV6):
@@ -516,6 +534,9 @@ vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
                goto tx_err;
        }
 
+       /* override mark with tunnel output key */
+       fl.flowi_mark = be32_to_cpu(t->parms.o_key);
+
        ret = vti6_xmit(skb, dev, &fl);
        if (ret < 0)
                goto tx_err;
index 1a732a1..62f5b0d 100644 (file)
@@ -1275,6 +1275,9 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
        /* overflow check */
        if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
                return -ENOMEM;
+       if (tmp.num_counters == 0)
+               return -EINVAL;
+
        tmp.name[sizeof(tmp.name)-1] = 0;
 
        newinfo = xt_alloc_table_info(tmp.size);
@@ -1822,6 +1825,9 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
                return -ENOMEM;
        if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
                return -ENOMEM;
+       if (tmp.num_counters == 0)
+               return -EINVAL;
+
        tmp.name[sizeof(tmp.name)-1] = 0;
 
        newinfo = xt_alloc_table_info(tmp.size);
index 5c48293..c73ae50 100644 (file)
@@ -2245,9 +2245,10 @@ int ip6_route_get_saddr(struct net *net,
                        unsigned int prefs,
                        struct in6_addr *saddr)
 {
-       struct inet6_dev *idev = ip6_dst_idev((struct dst_entry *)rt);
+       struct inet6_dev *idev =
+               rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
        int err = 0;
-       if (rt->rt6i_prefsrc.plen)
+       if (rt && rt->rt6i_prefsrc.plen)
                *saddr = rt->rt6i_prefsrc.addr;
        else
                err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
@@ -2503,9 +2504,9 @@ static int ip6_route_multipath(struct fib6_config *cfg, int add)
        int attrlen;
        int err = 0, last_err = 0;
 
+       remaining = cfg->fc_mp_len;
 beginning:
        rtnh = (struct rtnexthop *)cfg->fc_mp;
-       remaining = cfg->fc_mp_len;
 
        /* Parse a Multipath Entry */
        while (rtnh_ok(rtnh, remaining)) {
@@ -2535,15 +2536,19 @@ beginning:
                                 * next hops that have been already added.
                                 */
                                add = 0;
+                               remaining = cfg->fc_mp_len - remaining;
                                goto beginning;
                        }
                }
                /* Because each route is added like a single route we remove
-                * this flag after the first nexthop (if there is a collision,
-                * we have already fail to add the first nexthop:
-                * fib6_add_rt2node() has reject it).
+                * these flags after the first nexthop: if there is a collision,
+                * we have already failed to add the first nexthop:
+                * fib6_add_rt2node() has rejected it; when replacing, old
+                * nexthops have been replaced by first new, the rest should
+                * be added to it.
                 */
-               cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
+               cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
+                                                    NLM_F_REPLACE);
                rtnh = rtnh_next(rtnh, &remaining);
        }
 
index b6575d6..3adffb3 100644 (file)
@@ -914,7 +914,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
                        tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
                        tcp_time_stamp + tcptw->tw_ts_offset,
                        tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
-                       tw->tw_tclass, (tw->tw_flowlabel << 12));
+                       tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel));
 
        inet_twsk_put(tw);
 }
index 3477c91..e51fc3e 100644 (file)
@@ -525,10 +525,8 @@ csum_copy_err:
        }
        unlock_sock_fast(sk, slow);
 
-       if (noblock)
-               return -EAGAIN;
-
-       /* starting over for a new packet */
+       /* starting over for a new packet, but check if we need to yield */
+       cond_resched();
        msg->msg_flags &= ~MSG_TRUNC;
        goto try_again;
 }
@@ -731,7 +729,9 @@ static bool __udp_v6_is_mcast_sock(struct net *net, struct sock *sk,
            (inet->inet_dport && inet->inet_dport != rmt_port) ||
            (!ipv6_addr_any(&sk->sk_v6_daddr) &&
                    !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) ||
-           (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
+           (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) ||
+           (!ipv6_addr_any(&sk->sk_v6_rcv_saddr) &&
+                   !ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr)))
                return false;
        if (!inet6_mc_check(sk, loc_addr, rmt_addr))
                return false;
index 265e427..ff347a0 100644 (file)
@@ -2495,51 +2495,22 @@ static bool ieee80211_coalesce_started_roc(struct ieee80211_local *local,
                                           struct ieee80211_roc_work *new_roc,
                                           struct ieee80211_roc_work *cur_roc)
 {
-       unsigned long j = jiffies;
-       unsigned long cur_roc_end = cur_roc->hw_start_time +
-                                   msecs_to_jiffies(cur_roc->duration);
-       struct ieee80211_roc_work *next_roc;
-       int new_dur;
+       unsigned long now = jiffies;
+       unsigned long remaining = cur_roc->hw_start_time +
+                                 msecs_to_jiffies(cur_roc->duration) -
+                                 now;
 
        if (WARN_ON(!cur_roc->started || !cur_roc->hw_begun))
                return false;
 
-       if (time_after(j + IEEE80211_ROC_MIN_LEFT, cur_roc_end))
+       /* if it doesn't fit entirely, schedule a new one */
+       if (new_roc->duration > jiffies_to_msecs(remaining))
                return false;
 
        ieee80211_handle_roc_started(new_roc);
 
-       new_dur = new_roc->duration - jiffies_to_msecs(cur_roc_end - j);
-
-       /* cur_roc is long enough - add new_roc to the dependents list. */
-       if (new_dur <= 0) {
-               list_add_tail(&new_roc->list, &cur_roc->dependents);
-               return true;
-       }
-
-       new_roc->duration = new_dur;
-
-       /*
-        * if cur_roc was already coalesced before, we might
-        * want to extend the next roc instead of adding
-        * a new one.
-        */
-       next_roc = list_entry(cur_roc->list.next,
-                             struct ieee80211_roc_work, list);
-       if (&next_roc->list != &local->roc_list &&
-           next_roc->chan == new_roc->chan &&
-           next_roc->sdata == new_roc->sdata &&
-           !WARN_ON(next_roc->started)) {
-               list_add_tail(&new_roc->list, &next_roc->dependents);
-               next_roc->duration = max(next_roc->duration,
-                                        new_roc->duration);
-               next_roc->type = max(next_roc->type, new_roc->type);
-               return true;
-       }
-
-       /* add right after cur_roc */
-       list_add(&new_roc->list, &cur_roc->list);
-
+       /* add to dependents so we send the expired event properly */
+       list_add_tail(&new_roc->list, &cur_roc->dependents);
        return true;
 }
 
@@ -2652,17 +2623,9 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
                         * In the offloaded ROC case, if it hasn't begun, add
                         * this new one to the dependent list to be handled
                         * when the master one begins. If it has begun,
-                        * check that there's still a minimum time left and
-                        * if so, start this one, transmitting the frame, but
-                        * add it to the list directly after this one with
-                        * a reduced time so we'll ask the driver to execute
-                        * it right after finishing the previous one, in the
-                        * hope that it'll also be executed right afterwards,
-                        * effectively extending the old one.
-                        * If there's no minimum time left, just add it to the
-                        * normal list.
-                        * TODO: the ROC type is ignored here, assuming that it
-                        * is better to immediately use the current ROC.
+                        * check if it fits entirely within the existing one,
+                        * in which case it will just be dependent as well.
+                        * Otherwise, schedule it by itself.
                         */
                        if (!tmp->hw_begun) {
                                list_add_tail(&roc->list, &tmp->dependents);
index ab46ab4..c0a9187 100644 (file)
@@ -205,6 +205,8 @@ enum ieee80211_packet_rx_flags {
  * @IEEE80211_RX_CMNTR: received on cooked monitor already
  * @IEEE80211_RX_BEACON_REPORTED: This frame was already reported
  *     to cfg80211_report_obss_beacon().
+ * @IEEE80211_RX_REORDER_TIMER: this frame is released by the
+ *     reorder buffer timeout timer, not the normal RX path
  *
  * These flags are used across handling multiple interfaces
  * for a single frame.
@@ -212,6 +214,7 @@ enum ieee80211_packet_rx_flags {
 enum ieee80211_rx_flags {
        IEEE80211_RX_CMNTR              = BIT(0),
        IEEE80211_RX_BEACON_REPORTED    = BIT(1),
+       IEEE80211_RX_REORDER_TIMER      = BIT(2),
 };
 
 struct ieee80211_rx_data {
@@ -325,12 +328,6 @@ struct mesh_preq_queue {
        u8 flags;
 };
 
-#if HZ/100 == 0
-#define IEEE80211_ROC_MIN_LEFT 1
-#else
-#define IEEE80211_ROC_MIN_LEFT (HZ/100)
-#endif
-
 struct ieee80211_roc_work {
        struct list_head list;
        struct list_head dependents;
index b4ac596..84cef60 100644 (file)
@@ -522,6 +522,12 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
                memcpy(sdata->vif.hw_queue, master->vif.hw_queue,
                       sizeof(sdata->vif.hw_queue));
                sdata->vif.bss_conf.chandef = master->vif.bss_conf.chandef;
+
+               mutex_lock(&local->key_mtx);
+               sdata->crypto_tx_tailroom_needed_cnt +=
+                       master->crypto_tx_tailroom_needed_cnt;
+               mutex_unlock(&local->key_mtx);
+
                break;
                }
        case NL80211_IFTYPE_AP:
@@ -819,13 +825,15 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
         * (because if we remove a STA after ops->remove_interface()
         * the driver will have removed the vif info already!)
         *
-        * This is relevant only in WDS mode, in all other modes we've
-        * already removed all stations when disconnecting or similar,
-        * so warn otherwise.
+        * In WDS mode a station must exist here and be flushed, for
+        * AP_VLANs stations may exist since there's nothing else that
+        * would have removed them, but in other modes there shouldn't
+        * be any stations.
         */
        flushed = sta_info_flush(sdata);
-       WARN_ON_ONCE((sdata->vif.type != NL80211_IFTYPE_WDS && flushed > 0) ||
-                    (sdata->vif.type == NL80211_IFTYPE_WDS && flushed != 1));
+       WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_AP_VLAN &&
+                    ((sdata->vif.type != NL80211_IFTYPE_WDS && flushed > 0) ||
+                     (sdata->vif.type == NL80211_IFTYPE_WDS && flushed != 1)));
 
        /* don't count this interface for promisc/allmulti while it is down */
        if (sdata->flags & IEEE80211_SDATA_ALLMULTI)
index 2291cd7..a907f2d 100644 (file)
@@ -58,6 +58,22 @@ static void assert_key_lock(struct ieee80211_local *local)
        lockdep_assert_held(&local->key_mtx);
 }
 
+static void
+update_vlan_tailroom_need_count(struct ieee80211_sub_if_data *sdata, int delta)
+{
+       struct ieee80211_sub_if_data *vlan;
+
+       if (sdata->vif.type != NL80211_IFTYPE_AP)
+               return;
+
+       mutex_lock(&sdata->local->mtx);
+
+       list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list)
+               vlan->crypto_tx_tailroom_needed_cnt += delta;
+
+       mutex_unlock(&sdata->local->mtx);
+}
+
 static void increment_tailroom_need_count(struct ieee80211_sub_if_data *sdata)
 {
        /*
@@ -79,6 +95,8 @@ static void increment_tailroom_need_count(struct ieee80211_sub_if_data *sdata)
         * http://mid.gmane.org/1308590980.4322.19.camel@jlt3.sipsolutions.net
         */
 
+       update_vlan_tailroom_need_count(sdata, 1);
+
        if (!sdata->crypto_tx_tailroom_needed_cnt++) {
                /*
                 * Flush all XMIT packets currently using HW encryption or no
@@ -88,6 +106,15 @@ static void increment_tailroom_need_count(struct ieee80211_sub_if_data *sdata)
        }
 }
 
+static void decrease_tailroom_need_count(struct ieee80211_sub_if_data *sdata,
+                                        int delta)
+{
+       WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt < delta);
+
+       update_vlan_tailroom_need_count(sdata, -delta);
+       sdata->crypto_tx_tailroom_needed_cnt -= delta;
+}
+
 static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
 {
        struct ieee80211_sub_if_data *sdata;
@@ -144,7 +171,7 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
 
                if (!((key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC) ||
                      (key->conf.flags & IEEE80211_KEY_FLAG_RESERVE_TAILROOM)))
-                       sdata->crypto_tx_tailroom_needed_cnt--;
+                       decrease_tailroom_need_count(sdata, 1);
 
                WARN_ON((key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE) &&
                        (key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV));
@@ -541,7 +568,7 @@ static void __ieee80211_key_destroy(struct ieee80211_key *key,
                        schedule_delayed_work(&sdata->dec_tailroom_needed_wk,
                                              HZ/2);
                } else {
-                       sdata->crypto_tx_tailroom_needed_cnt--;
+                       decrease_tailroom_need_count(sdata, 1);
                }
        }
 
@@ -631,6 +658,7 @@ void ieee80211_key_free(struct ieee80211_key *key, bool delay_tailroom)
 void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata)
 {
        struct ieee80211_key *key;
+       struct ieee80211_sub_if_data *vlan;
 
        ASSERT_RTNL();
 
@@ -639,7 +667,14 @@ void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata)
 
        mutex_lock(&sdata->local->key_mtx);
 
-       sdata->crypto_tx_tailroom_needed_cnt = 0;
+       WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt ||
+                    sdata->crypto_tx_tailroom_pending_dec);
+
+       if (sdata->vif.type == NL80211_IFTYPE_AP) {
+               list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list)
+                       WARN_ON_ONCE(vlan->crypto_tx_tailroom_needed_cnt ||
+                                    vlan->crypto_tx_tailroom_pending_dec);
+       }
 
        list_for_each_entry(key, &sdata->key_list, list) {
                increment_tailroom_need_count(sdata);
@@ -649,6 +684,22 @@ void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata)
        mutex_unlock(&sdata->local->key_mtx);
 }
 
+void ieee80211_reset_crypto_tx_tailroom(struct ieee80211_sub_if_data *sdata)
+{
+       struct ieee80211_sub_if_data *vlan;
+
+       mutex_lock(&sdata->local->key_mtx);
+
+       sdata->crypto_tx_tailroom_needed_cnt = 0;
+
+       if (sdata->vif.type == NL80211_IFTYPE_AP) {
+               list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list)
+                       vlan->crypto_tx_tailroom_needed_cnt = 0;
+       }
+
+       mutex_unlock(&sdata->local->key_mtx);
+}
+
 void ieee80211_iter_keys(struct ieee80211_hw *hw,
                         struct ieee80211_vif *vif,
                         void (*iter)(struct ieee80211_hw *hw,
@@ -688,8 +739,8 @@ static void ieee80211_free_keys_iface(struct ieee80211_sub_if_data *sdata,
 {
        struct ieee80211_key *key, *tmp;
 
-       sdata->crypto_tx_tailroom_needed_cnt -=
-               sdata->crypto_tx_tailroom_pending_dec;
+       decrease_tailroom_need_count(sdata,
+                                    sdata->crypto_tx_tailroom_pending_dec);
        sdata->crypto_tx_tailroom_pending_dec = 0;
 
        ieee80211_debugfs_key_remove_mgmt_default(sdata);
@@ -709,6 +760,7 @@ void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata,
 {
        struct ieee80211_local *local = sdata->local;
        struct ieee80211_sub_if_data *vlan;
+       struct ieee80211_sub_if_data *master;
        struct ieee80211_key *key, *tmp;
        LIST_HEAD(keys);
 
@@ -728,8 +780,20 @@ void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata,
        list_for_each_entry_safe(key, tmp, &keys, list)
                __ieee80211_key_destroy(key, false);
 
-       WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt ||
-                    sdata->crypto_tx_tailroom_pending_dec);
+       if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) {
+               if (sdata->bss) {
+                       master = container_of(sdata->bss,
+                                             struct ieee80211_sub_if_data,
+                                             u.ap);
+
+                       WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt !=
+                                    master->crypto_tx_tailroom_needed_cnt);
+               }
+       } else {
+               WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt ||
+                            sdata->crypto_tx_tailroom_pending_dec);
+       }
+
        if (sdata->vif.type == NL80211_IFTYPE_AP) {
                list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list)
                        WARN_ON_ONCE(vlan->crypto_tx_tailroom_needed_cnt ||
@@ -793,8 +857,8 @@ void ieee80211_delayed_tailroom_dec(struct work_struct *wk)
         */
 
        mutex_lock(&sdata->local->key_mtx);
-       sdata->crypto_tx_tailroom_needed_cnt -=
-               sdata->crypto_tx_tailroom_pending_dec;
+       decrease_tailroom_need_count(sdata,
+                                    sdata->crypto_tx_tailroom_pending_dec);
        sdata->crypto_tx_tailroom_pending_dec = 0;
        mutex_unlock(&sdata->local->key_mtx);
 }
index c5a3183..96557dd 100644 (file)
@@ -161,6 +161,7 @@ void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata,
 void ieee80211_free_sta_keys(struct ieee80211_local *local,
                             struct sta_info *sta);
 void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata);
+void ieee80211_reset_crypto_tx_tailroom(struct ieee80211_sub_if_data *sdata);
 
 #define key_mtx_dereference(local, ref) \
        rcu_dereference_protected(ref, lockdep_is_held(&((local)->key_mtx)))
index 260eed4..5793f75 100644 (file)
@@ -2121,7 +2121,8 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
                /* deliver to local stack */
                skb->protocol = eth_type_trans(skb, dev);
                memset(skb->cb, 0, sizeof(skb->cb));
-               if (rx->local->napi)
+               if (!(rx->flags & IEEE80211_RX_REORDER_TIMER) &&
+                   rx->local->napi)
                        napi_gro_receive(rx->local->napi, skb);
                else
                        netif_receive_skb(skb);
@@ -3231,7 +3232,7 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid)
                /* This is OK -- must be QoS data frame */
                .security_idx = tid,
                .seqno_idx = tid,
-               .flags = 0,
+               .flags = IEEE80211_RX_REORDER_TIMER,
        };
        struct tid_ampdu_rx *tid_agg_rx;
 
index 12971b7..2880f2a 100644 (file)
@@ -66,6 +66,7 @@
 
 static const struct rhashtable_params sta_rht_params = {
        .nelem_hint = 3, /* start small */
+       .automatic_shrinking = true,
        .head_offset = offsetof(struct sta_info, hash_node),
        .key_offset = offsetof(struct sta_info, sta.addr),
        .key_len = ETH_ALEN,
@@ -157,8 +158,24 @@ struct sta_info *sta_info_get(struct ieee80211_sub_if_data *sdata,
                              const u8 *addr)
 {
        struct ieee80211_local *local = sdata->local;
+       struct sta_info *sta;
+       struct rhash_head *tmp;
+       const struct bucket_table *tbl;
+
+       rcu_read_lock();
+       tbl = rht_dereference_rcu(local->sta_hash.tbl, &local->sta_hash);
 
-       return rhashtable_lookup_fast(&local->sta_hash, addr, sta_rht_params);
+       for_each_sta_info(local, tbl, addr, sta, tmp) {
+               if (sta->sdata == sdata) {
+                       rcu_read_unlock();
+                       /* this is safe as the caller must already hold
+                        * another rcu read section or the mutex
+                        */
+                       return sta;
+               }
+       }
+       rcu_read_unlock();
+       return NULL;
 }
 
 /*
index 79412f1..b864ebc 100644 (file)
@@ -2023,6 +2023,9 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 
        /* add back keys */
        list_for_each_entry(sdata, &local->interfaces, list)
+               ieee80211_reset_crypto_tx_tailroom(sdata);
+
+       list_for_each_entry(sdata, &local->interfaces, list)
                if (ieee80211_sdata_running(sdata))
                        ieee80211_enable_keys(sdata);
 
index a4220e9..efa3f48 100644 (file)
@@ -98,8 +98,7 @@ static u8 *ieee80211_wep_add_iv(struct ieee80211_local *local,
 
        hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED);
 
-       if (WARN_ON(skb_tailroom(skb) < IEEE80211_WEP_ICV_LEN ||
-                   skb_headroom(skb) < IEEE80211_WEP_IV_LEN))
+       if (WARN_ON(skb_headroom(skb) < IEEE80211_WEP_IV_LEN))
                return NULL;
 
        hdrlen = ieee80211_hdrlen(hdr->frame_control);
@@ -167,6 +166,9 @@ int ieee80211_wep_encrypt(struct ieee80211_local *local,
        size_t len;
        u8 rc4key[3 + WLAN_KEY_LEN_WEP104];
 
+       if (WARN_ON(skb_tailroom(skb) < IEEE80211_WEP_ICV_LEN))
+               return -1;
+
        iv = ieee80211_wep_add_iv(local, skb, keylen, keyidx);
        if (!iv)
                return -1;
index 5d9f68c..70be9c7 100644 (file)
 
 static struct net_device *
 ieee802154_add_iface_deprecated(struct wpan_phy *wpan_phy,
-                               const char *name, int type)
+                               const char *name,
+                               unsigned char name_assign_type, int type)
 {
        struct ieee802154_local *local = wpan_phy_priv(wpan_phy);
        struct net_device *dev;
 
        rtnl_lock();
-       dev = ieee802154_if_add(local, name, type,
+       dev = ieee802154_if_add(local, name, name_assign_type, type,
                                cpu_to_le64(0x0000000000000000ULL));
        rtnl_unlock();
 
@@ -45,12 +46,14 @@ static void ieee802154_del_iface_deprecated(struct wpan_phy *wpan_phy,
 
 static int
 ieee802154_add_iface(struct wpan_phy *phy, const char *name,
+                    unsigned char name_assign_type,
                     enum nl802154_iftype type, __le64 extended_addr)
 {
        struct ieee802154_local *local = wpan_phy_priv(phy);
        struct net_device *err;
 
-       err = ieee802154_if_add(local, name, type, extended_addr);
+       err = ieee802154_if_add(local, name, name_assign_type, type,
+                               extended_addr);
        return PTR_ERR_OR_ZERO(err);
 }
 
index bebd70f..127ba18 100644 (file)
@@ -182,7 +182,8 @@ void ieee802154_iface_exit(void);
 void ieee802154_if_remove(struct ieee802154_sub_if_data *sdata);
 struct net_device *
 ieee802154_if_add(struct ieee802154_local *local, const char *name,
-                 enum nl802154_iftype type, __le64 extended_addr);
+                 unsigned char name_assign_type, enum nl802154_iftype type,
+                 __le64 extended_addr);
 void ieee802154_remove_interfaces(struct ieee802154_local *local);
 
 #endif /* __IEEE802154_I_H */
index 38b56f9..91b75ab 100644 (file)
@@ -522,7 +522,8 @@ ieee802154_setup_sdata(struct ieee802154_sub_if_data *sdata,
 
 struct net_device *
 ieee802154_if_add(struct ieee802154_local *local, const char *name,
-                 enum nl802154_iftype type, __le64 extended_addr)
+                 unsigned char name_assign_type, enum nl802154_iftype type,
+                 __le64 extended_addr)
 {
        struct net_device *ndev = NULL;
        struct ieee802154_sub_if_data *sdata = NULL;
@@ -531,7 +532,7 @@ ieee802154_if_add(struct ieee802154_local *local, const char *name,
        ASSERT_RTNL();
 
        ndev = alloc_netdev(sizeof(*sdata) + local->hw.vif_data_size, name,
-                           NET_NAME_UNKNOWN, ieee802154_if_setup);
+                           name_assign_type, ieee802154_if_setup);
        if (!ndev)
                return ERR_PTR(-ENOMEM);
 
index dcf7395..5b2be12 100644 (file)
@@ -134,7 +134,7 @@ llsec_key_alloc(const struct ieee802154_llsec_key *template)
        for (i = 0; i < ARRAY_SIZE(key->tfm); i++) {
                key->tfm[i] = crypto_alloc_aead("ccm(aes)", 0,
                                                CRYPTO_ALG_ASYNC);
-               if (!key->tfm[i])
+               if (IS_ERR(key->tfm[i]))
                        goto err_tfm;
                if (crypto_aead_setkey(key->tfm[i], template->key,
                                       IEEE802154_LLSEC_KEY_SIZE))
@@ -144,7 +144,7 @@ llsec_key_alloc(const struct ieee802154_llsec_key *template)
        }
 
        key->tfm0 = crypto_alloc_blkcipher("ctr(aes)", 0, CRYPTO_ALG_ASYNC);
-       if (!key->tfm0)
+       if (IS_ERR(key->tfm0))
                goto err_tfm;
 
        if (crypto_blkcipher_setkey(key->tfm0, template->key,
index 8500378..08cb32d 100644 (file)
@@ -161,18 +161,21 @@ int ieee802154_register_hw(struct ieee802154_hw *hw)
 
        rtnl_lock();
 
-       dev = ieee802154_if_add(local, "wpan%d", NL802154_IFTYPE_NODE,
+       dev = ieee802154_if_add(local, "wpan%d", NET_NAME_ENUM,
+                               NL802154_IFTYPE_NODE,
                                cpu_to_le64(0x0000000000000000ULL));
        if (IS_ERR(dev)) {
                rtnl_unlock();
                rc = PTR_ERR(dev);
-               goto out_wq;
+               goto out_phy;
        }
 
        rtnl_unlock();
 
        return 0;
 
+out_phy:
+       wpan_phy_unregister(local->phy);
 out_wq:
        destroy_workqueue(local->workqueue);
 out:
index 954810c..1f93a59 100644 (file)
@@ -541,7 +541,7 @@ static void mpls_ifdown(struct net_device *dev)
 
        RCU_INIT_POINTER(dev->mpls_ptr, NULL);
 
-       kfree(mdev);
+       kfree_rcu(mdev, rcu);
 }
 
 static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
@@ -564,6 +564,17 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
        case NETDEV_UNREGISTER:
                mpls_ifdown(dev);
                break;
+       case NETDEV_CHANGENAME:
+               mdev = mpls_dev_get(dev);
+               if (mdev) {
+                       int err;
+
+                       mpls_dev_sysctl_unregister(mdev);
+                       err = mpls_dev_sysctl_register(dev, mdev);
+                       if (err)
+                               return notifier_from_errno(err);
+               }
+               break;
        }
        return NOTIFY_OK;
 }
@@ -647,7 +658,7 @@ int nla_get_labels(const struct nlattr *nla,
                        return -EINVAL;
 
                switch (dec.label) {
-               case LABEL_IMPLICIT_NULL:
+               case MPLS_LABEL_IMPLNULL:
                        /* RFC3032: This is a label that an LSR may
                         * assign and distribute, but which never
                         * actually appears in the encapsulation.
@@ -935,7 +946,7 @@ static int resize_platform_label_table(struct net *net, size_t limit)
        }
 
        /* In case the predefined labels need to be populated */
-       if (limit > LABEL_IPV4_EXPLICIT_NULL) {
+       if (limit > MPLS_LABEL_IPV4NULL) {
                struct net_device *lo = net->loopback_dev;
                rt0 = mpls_rt_alloc(lo->addr_len);
                if (!rt0)
@@ -945,7 +956,7 @@ static int resize_platform_label_table(struct net *net, size_t limit)
                rt0->rt_via_table = NEIGH_LINK_TABLE;
                memcpy(rt0->rt_via, lo->dev_addr, lo->addr_len);
        }
-       if (limit > LABEL_IPV6_EXPLICIT_NULL) {
+       if (limit > MPLS_LABEL_IPV6NULL) {
                struct net_device *lo = net->loopback_dev;
                rt2 = mpls_rt_alloc(lo->addr_len);
                if (!rt2)
@@ -973,15 +984,15 @@ static int resize_platform_label_table(struct net *net, size_t limit)
        memcpy(labels, old, cp_size);
 
        /* If needed set the predefined labels */
-       if ((old_limit <= LABEL_IPV6_EXPLICIT_NULL) &&
-           (limit > LABEL_IPV6_EXPLICIT_NULL)) {
-               RCU_INIT_POINTER(labels[LABEL_IPV6_EXPLICIT_NULL], rt2);
+       if ((old_limit <= MPLS_LABEL_IPV6NULL) &&
+           (limit > MPLS_LABEL_IPV6NULL)) {
+               RCU_INIT_POINTER(labels[MPLS_LABEL_IPV6NULL], rt2);
                rt2 = NULL;
        }
 
-       if ((old_limit <= LABEL_IPV4_EXPLICIT_NULL) &&
-           (limit > LABEL_IPV4_EXPLICIT_NULL)) {
-               RCU_INIT_POINTER(labels[LABEL_IPV4_EXPLICIT_NULL], rt0);
+       if ((old_limit <= MPLS_LABEL_IPV4NULL) &&
+           (limit > MPLS_LABEL_IPV4NULL)) {
+               RCU_INIT_POINTER(labels[MPLS_LABEL_IPV4NULL], rt0);
                rt0 = NULL;
        }
 
index 693877d..8cabeb5 100644 (file)
@@ -1,16 +1,6 @@
 #ifndef MPLS_INTERNAL_H
 #define MPLS_INTERNAL_H
 
-#define LABEL_IPV4_EXPLICIT_NULL       0 /* RFC3032 */
-#define LABEL_ROUTER_ALERT_LABEL       1 /* RFC3032 */
-#define LABEL_IPV6_EXPLICIT_NULL       2 /* RFC3032 */
-#define LABEL_IMPLICIT_NULL            3 /* RFC3032 */
-#define LABEL_ENTROPY_INDICATOR                7 /* RFC6790 */
-#define LABEL_GAL                      13 /* RFC5586 */
-#define LABEL_OAM_ALERT                        14 /* RFC3429 */
-#define LABEL_EXTENSION                        15 /* RFC7274 */
-
-
 struct mpls_shim_hdr {
        __be32 label_stack_entry;
 };
@@ -26,6 +16,7 @@ struct mpls_dev {
        int                     input_enabled;
 
        struct ctl_table_header *sysctl;
+       struct rcu_head         rcu;
 };
 
 struct sk_buff;
index f70e34a..a0f3e6a 100644 (file)
@@ -863,6 +863,7 @@ config NETFILTER_XT_TARGET_TPROXY
        depends on NETFILTER_XTABLES
        depends on NETFILTER_ADVANCED
        depends on (IPV6 || IPV6=n)
+       depends on (IP6_NF_IPTABLES || IP6_NF_IPTABLES=n)
        depends on IP_NF_MANGLE
        select NF_DEFRAG_IPV4
        select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES
@@ -1356,6 +1357,7 @@ config NETFILTER_XT_MATCH_SOCKET
        depends on NETFILTER_ADVANCED
        depends on !NF_CONNTRACK || NF_CONNTRACK
        depends on (IPV6 || IPV6=n)
+       depends on (IP6_NF_IPTABLES || IP6_NF_IPTABLES=n)
        select NF_DEFRAG_IPV4
        select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES
        help
index 4953267..285eae3 100644 (file)
@@ -3823,6 +3823,9 @@ static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net)
        cancel_work_sync(&ipvs->defense_work.work);
        unregister_net_sysctl_table(ipvs->sysctl_hdr);
        ip_vs_stop_estimator(net, &ipvs->tot_stats);
+
+       if (!net_eq(net, &init_net))
+               kfree(ipvs->sysctl_tbl);
 }
 
 #else
index 5caa0c4..70383de 100644 (file)
@@ -202,7 +202,7 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
  *     sES -> sES      :-)
  *     sFW -> sCW      Normal close request answered by ACK.
  *     sCW -> sCW
- *     sLA -> sTW      Last ACK detected.
+ *     sLA -> sTW      Last ACK detected (RFC5961 challenged)
  *     sTW -> sTW      Retransmitted last ACK. Remain in the same state.
  *     sCL -> sCL
  */
@@ -261,7 +261,7 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
  *     sES -> sES      :-)
  *     sFW -> sCW      Normal close request answered by ACK.
  *     sCW -> sCW
- *     sLA -> sTW      Last ACK detected.
+ *     sLA -> sTW      Last ACK detected (RFC5961 challenged)
  *     sTW -> sTW      Retransmitted last ACK.
  *     sCL -> sCL
  */
@@ -906,6 +906,7 @@ static int tcp_packet(struct nf_conn *ct,
                                        1 : ct->proto.tcp.last_win;
                        ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_scale =
                                ct->proto.tcp.last_wscale;
+                       ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK;
                        ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags =
                                ct->proto.tcp.last_flags;
                        memset(&ct->proto.tcp.seen[dir], 0,
@@ -923,7 +924,9 @@ static int tcp_packet(struct nf_conn *ct,
                 * may be in sync but we are not. In that case, we annotate
                 * the TCP options and let the packet go through. If it is a
                 * valid SYN packet, the server will reply with a SYN/ACK, and
-                * then we'll get in sync. Otherwise, the server ignores it. */
+                * then we'll get in sync. Otherwise, the server potentially
+                * responds with a challenge ACK if implementing RFC5961.
+                */
                if (index == TCP_SYN_SET && dir == IP_CT_DIR_ORIGINAL) {
                        struct ip_ct_tcp_state seen = {};
 
@@ -939,6 +942,13 @@ static int tcp_packet(struct nf_conn *ct,
                                ct->proto.tcp.last_flags |=
                                        IP_CT_TCP_FLAG_SACK_PERM;
                        }
+                       /* Mark the potential for RFC5961 challenge ACK,
+                        * this pose a special problem for LAST_ACK state
+                        * as ACK is intrepretated as ACKing last FIN.
+                        */
+                       if (old_state == TCP_CONNTRACK_LAST_ACK)
+                               ct->proto.tcp.last_flags |=
+                                       IP_CT_EXP_CHALLENGE_ACK;
                }
                spin_unlock_bh(&ct->lock);
                if (LOG_INVALID(net, IPPROTO_TCP))
@@ -970,6 +980,25 @@ static int tcp_packet(struct nf_conn *ct,
                        nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
                                  "nf_ct_tcp: invalid state ");
                return -NF_ACCEPT;
+       case TCP_CONNTRACK_TIME_WAIT:
+               /* RFC5961 compliance cause stack to send "challenge-ACK"
+                * e.g. in response to spurious SYNs.  Conntrack MUST
+                * not believe this ACK is acking last FIN.
+                */
+               if (old_state == TCP_CONNTRACK_LAST_ACK &&
+                   index == TCP_ACK_SET &&
+                   ct->proto.tcp.last_dir != dir &&
+                   ct->proto.tcp.last_index == TCP_SYN_SET &&
+                   (ct->proto.tcp.last_flags & IP_CT_EXP_CHALLENGE_ACK)) {
+                       /* Detected RFC5961 challenge ACK */
+                       ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK;
+                       spin_unlock_bh(&ct->lock);
+                       if (LOG_INVALID(net, IPPROTO_TCP))
+                               nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
+                                     "nf_ct_tcp: challenge-ACK ignored ");
+                       return NF_ACCEPT; /* Don't change state */
+               }
+               break;
        case TCP_CONNTRACK_CLOSE:
                if (index == TCP_RST_SET
                    && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET)
index ad9d11f..34ded09 100644 (file)
@@ -4472,9 +4472,9 @@ EXPORT_SYMBOL_GPL(nft_data_init);
  */
 void nft_data_uninit(const struct nft_data *data, enum nft_data_types type)
 {
-       switch (type) {
-       case NFT_DATA_VALUE:
+       if (type < NFT_DATA_VERDICT)
                return;
+       switch (type) {
        case NFT_DATA_VERDICT:
                return nft_verdict_uninit(data);
        default:
index 3ad9126..4ef1fae 100644 (file)
@@ -1073,7 +1073,13 @@ static struct pernet_operations nfnl_log_net_ops = {
 
 static int __init nfnetlink_log_init(void)
 {
-       int status = -ENOMEM;
+       int status;
+
+       status = register_pernet_subsys(&nfnl_log_net_ops);
+       if (status < 0) {
+               pr_err("failed to register pernet ops\n");
+               goto out;
+       }
 
        netlink_register_notifier(&nfulnl_rtnl_notifier);
        status = nfnetlink_subsys_register(&nfulnl_subsys);
@@ -1088,28 +1094,23 @@ static int __init nfnetlink_log_init(void)
                goto cleanup_subsys;
        }
 
-       status = register_pernet_subsys(&nfnl_log_net_ops);
-       if (status < 0) {
-               pr_err("failed to register pernet ops\n");
-               goto cleanup_logger;
-       }
        return status;
 
-cleanup_logger:
-       nf_log_unregister(&nfulnl_logger);
 cleanup_subsys:
        nfnetlink_subsys_unregister(&nfulnl_subsys);
 cleanup_netlink_notifier:
        netlink_unregister_notifier(&nfulnl_rtnl_notifier);
+       unregister_pernet_subsys(&nfnl_log_net_ops);
+out:
        return status;
 }
 
 static void __exit nfnetlink_log_fini(void)
 {
-       unregister_pernet_subsys(&nfnl_log_net_ops);
        nf_log_unregister(&nfulnl_logger);
        nfnetlink_subsys_unregister(&nfulnl_subsys);
        netlink_unregister_notifier(&nfulnl_rtnl_notifier);
+       unregister_pernet_subsys(&nfnl_log_net_ops);
 }
 
 MODULE_DESCRIPTION("netfilter userspace logging");
index 0b98c74..11c7682 100644 (file)
@@ -1317,7 +1317,13 @@ static struct pernet_operations nfnl_queue_net_ops = {
 
 static int __init nfnetlink_queue_init(void)
 {
-       int status = -ENOMEM;
+       int status;
+
+       status = register_pernet_subsys(&nfnl_queue_net_ops);
+       if (status < 0) {
+               pr_err("nf_queue: failed to register pernet ops\n");
+               goto out;
+       }
 
        netlink_register_notifier(&nfqnl_rtnl_notifier);
        status = nfnetlink_subsys_register(&nfqnl_subsys);
@@ -1326,19 +1332,13 @@ static int __init nfnetlink_queue_init(void)
                goto cleanup_netlink_notifier;
        }
 
-       status = register_pernet_subsys(&nfnl_queue_net_ops);
-       if (status < 0) {
-               pr_err("nf_queue: failed to register pernet ops\n");
-               goto cleanup_subsys;
-       }
        register_netdevice_notifier(&nfqnl_dev_notifier);
        nf_register_queue_handler(&nfqh);
        return status;
 
-cleanup_subsys:
-       nfnetlink_subsys_unregister(&nfqnl_subsys);
 cleanup_netlink_notifier:
        netlink_unregister_notifier(&nfqnl_rtnl_notifier);
+out:
        return status;
 }
 
@@ -1346,9 +1346,9 @@ static void __exit nfnetlink_queue_fini(void)
 {
        nf_unregister_queue_handler();
        unregister_netdevice_notifier(&nfqnl_dev_notifier);
-       unregister_pernet_subsys(&nfnl_queue_net_ops);
        nfnetlink_subsys_unregister(&nfqnl_subsys);
        netlink_unregister_notifier(&nfqnl_rtnl_notifier);
+       unregister_pernet_subsys(&nfnl_queue_net_ops);
 
        rcu_barrier(); /* Wait for completion of call_rcu()'s */
 }
index ec4adbd..bf6e766 100644 (file)
@@ -89,7 +89,7 @@ static inline int netlink_is_kernel(struct sock *sk)
        return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET;
 }
 
-struct netlink_table *nl_table;
+struct netlink_table *nl_table __read_mostly;
 EXPORT_SYMBOL_GPL(nl_table);
 
 static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
@@ -1081,6 +1081,7 @@ static int netlink_insert(struct sock *sk, u32 portid)
        if (err) {
                if (err == -EEXIST)
                        err = -EADDRINUSE;
+               nlk_sk(sk)->portid = 0;
                sock_put(sk);
        }
 
@@ -3139,7 +3140,6 @@ static const struct rhashtable_params netlink_rhashtable_params = {
        .key_len = netlink_compare_arg_len,
        .obj_hashfn = netlink_hash,
        .obj_cmpfn = netlink_compare,
-       .max_size = 65536,
        .automatic_shrinking = true,
 };
 
index 4776282..33e6d6e 100644 (file)
@@ -125,6 +125,7 @@ static struct vport *netdev_create(const struct vport_parms *parms)
        if (err)
                goto error_master_upper_dev_unlink;
 
+       dev_disable_lro(netdev_vport->dev);
        dev_set_promiscuity(netdev_vport->dev, 1);
        netdev_vport->dev->priv_flags |= IFF_OVS_DATAPATH;
        rtnl_unlock();
index 5102c3c..b5989c6 100644 (file)
@@ -2311,11 +2311,14 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
                tlen = dev->needed_tailroom;
                skb = sock_alloc_send_skb(&po->sk,
                                hlen + tlen + sizeof(struct sockaddr_ll),
-                               0, &err);
+                               !need_wait, &err);
 
-               if (unlikely(skb == NULL))
+               if (unlikely(skb == NULL)) {
+                       /* we assume the socket was initially writeable ... */
+                       if (likely(len_sum > 0))
+                               err = len_sum;
                        goto out_status;
-
+               }
                tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
                                          addr, hlen);
                if (tp_len > dev->mtu + dev->hard_header_len) {
index 14f0413..da6da57 100644 (file)
@@ -126,7 +126,10 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
        struct rds_transport *loop_trans;
        unsigned long flags;
        int ret;
+       struct rds_transport *otrans = trans;
 
+       if (!is_outgoing && otrans->t_type == RDS_TRANS_TCP)
+               goto new_conn;
        rcu_read_lock();
        conn = rds_conn_lookup(head, laddr, faddr, trans);
        if (conn && conn->c_loopback && conn->c_trans != &rds_loop_transport &&
@@ -142,6 +145,7 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
        if (conn)
                goto out;
 
+new_conn:
        conn = kmem_cache_zalloc(rds_conn_slab, gfp);
        if (!conn) {
                conn = ERR_PTR(-ENOMEM);
@@ -230,13 +234,22 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
                /* Creating normal conn */
                struct rds_connection *found;
 
-               found = rds_conn_lookup(head, laddr, faddr, trans);
+               if (!is_outgoing && otrans->t_type == RDS_TRANS_TCP)
+                       found = NULL;
+               else
+                       found = rds_conn_lookup(head, laddr, faddr, trans);
                if (found) {
                        trans->conn_free(conn->c_transport_data);
                        kmem_cache_free(rds_conn_slab, conn);
                        conn = found;
                } else {
-                       hlist_add_head_rcu(&conn->c_hash_node, head);
+                       if ((is_outgoing && otrans->t_type == RDS_TRANS_TCP) ||
+                           (otrans->t_type != RDS_TRANS_TCP)) {
+                               /* Only the active side should be added to
+                                * reconnect list for TCP.
+                                */
+                               hlist_add_head_rcu(&conn->c_hash_node, head);
+                       }
                        rds_cong_add_conn(conn);
                        rds_conn_count++;
                }
index 31b74f5..8a09ee7 100644 (file)
@@ -183,8 +183,17 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
 
        /* If the peer gave us the last packet it saw, process this as if
         * we had received a regular ACK. */
-       if (dp && dp->dp_ack_seq)
-               rds_send_drop_acked(conn, be64_to_cpu(dp->dp_ack_seq), NULL);
+       if (dp) {
+               /* dp structure start is not guaranteed to be 8 bytes aligned.
+                * Since dp_ack_seq is 64-bit extended load operations can be
+                * used so go through get_unaligned to avoid unaligned errors.
+                */
+               __be64 dp_ack_seq = get_unaligned(&dp->dp_ack_seq);
+
+               if (dp_ack_seq)
+                       rds_send_drop_acked(conn, be64_to_cpu(dp_ack_seq),
+                                           NULL);
+       }
 
        rds_connect_complete(conn);
 }
index f9f564a..973109c 100644 (file)
@@ -62,6 +62,7 @@ void rds_tcp_state_change(struct sock *sk)
                case TCP_ESTABLISHED:
                        rds_connect_complete(conn);
                        break;
+               case TCP_CLOSE_WAIT:
                case TCP_CLOSE:
                        rds_conn_drop(conn);
                default:
index 23ab4dc..0da49e3 100644 (file)
@@ -45,12 +45,45 @@ static void rds_tcp_accept_worker(struct work_struct *work);
 static DECLARE_WORK(rds_tcp_listen_work, rds_tcp_accept_worker);
 static struct socket *rds_tcp_listen_sock;
 
+static int rds_tcp_keepalive(struct socket *sock)
+{
+       /* values below based on xs_udp_default_timeout */
+       int keepidle = 5; /* send a probe 'keepidle' secs after last data */
+       int keepcnt = 5; /* number of unack'ed probes before declaring dead */
+       int keepalive = 1;
+       int ret = 0;
+
+       ret = kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
+                               (char *)&keepalive, sizeof(keepalive));
+       if (ret < 0)
+               goto bail;
+
+       ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_KEEPCNT,
+                               (char *)&keepcnt, sizeof(keepcnt));
+       if (ret < 0)
+               goto bail;
+
+       ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_KEEPIDLE,
+                               (char *)&keepidle, sizeof(keepidle));
+       if (ret < 0)
+               goto bail;
+
+       /* KEEPINTVL is the interval between successive probes. We follow
+        * the model in xs_tcp_finish_connecting() and re-use keepidle.
+        */
+       ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_KEEPINTVL,
+                               (char *)&keepidle, sizeof(keepidle));
+bail:
+       return ret;
+}
+
 static int rds_tcp_accept_one(struct socket *sock)
 {
        struct socket *new_sock = NULL;
        struct rds_connection *conn;
        int ret;
        struct inet_sock *inet;
+       struct rds_tcp_connection *rs_tcp;
 
        ret = sock_create_lite(sock->sk->sk_family, sock->sk->sk_type,
                               sock->sk->sk_protocol, &new_sock);
@@ -63,6 +96,10 @@ static int rds_tcp_accept_one(struct socket *sock)
        if (ret < 0)
                goto out;
 
+       ret = rds_tcp_keepalive(new_sock);
+       if (ret < 0)
+               goto out;
+
        rds_tcp_tune(new_sock);
 
        inet = inet_sk(new_sock->sk);
@@ -77,6 +114,15 @@ static int rds_tcp_accept_one(struct socket *sock)
                ret = PTR_ERR(conn);
                goto out;
        }
+       /* An incoming SYN request came in, and TCP just accepted it.
+        * We always create a new conn for listen side of TCP, and do not
+        * add it to the c_hash_list.
+        *
+        * If the client reboots, this conn will need to be cleaned up.
+        * rds_tcp_state_change() will do that cleanup
+        */
+       rs_tcp = (struct rds_tcp_connection *)conn->c_transport_data;
+       WARN_ON(!rs_tcp || rs_tcp->t_sock);
 
        /*
         * see the comment above rds_queue_delayed_reconnect()
index 8b0470e..a75864d 100644 (file)
@@ -81,6 +81,11 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
        struct tcf_proto_ops *t;
        int rc = -ENOENT;
 
+       /* Wait for outstanding call_rcu()s, if any, from a
+        * tcf_proto_ops's destroy() handler.
+        */
+       rcu_barrier();
+
        write_lock(&cls_mod_lock);
        list_for_each_entry(t, &tcf_proto_base, head) {
                if (t == ops) {
@@ -308,12 +313,11 @@ replay:
                case RTM_DELTFILTER:
                        err = tp->ops->delete(tp, fh);
                        if (err == 0) {
-                               tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER);
-                               if (tcf_destroy(tp, false)) {
-                                       struct tcf_proto *next = rtnl_dereference(tp->next);
+                               struct tcf_proto *next = rtnl_dereference(tp->next);
 
+                               tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER);
+                               if (tcf_destroy(tp, false))
                                        RCU_INIT_POINTER(*back, next);
-                               }
                        }
                        goto errout;
                case RTM_GETTFILTER:
index ad9eed7..1e1c89e 100644 (file)
@@ -815,10 +815,8 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
                if (dev->flags & IFF_UP)
                        dev_deactivate(dev);
 
-               if (new && new->ops->attach) {
-                       new->ops->attach(new);
-                       num_q = 0;
-               }
+               if (new && new->ops->attach)
+                       goto skip;
 
                for (i = 0; i < num_q; i++) {
                        struct netdev_queue *dev_queue = dev_ingress_queue(dev);
@@ -834,12 +832,16 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
                                qdisc_destroy(old);
                }
 
+skip:
                if (!ingress) {
                        notify_and_destroy(net, skb, n, classid,
                                           dev->qdisc, new);
                        if (new && !new->ops->attach)
                                atomic_inc(&new->refcnt);
                        dev->qdisc = new ? : &noop_qdisc;
+
+                       if (new && new->ops->attach)
+                               new->ops->attach(new);
                } else {
                        notify_and_destroy(net, skb, n, classid, old, new);
                }
index de28f8e..7a0bdb1 100644 (file)
@@ -164,7 +164,7 @@ static int codel_init(struct Qdisc *sch, struct nlattr *opt)
 
        sch->limit = DEFAULT_CODEL_LIMIT;
 
-       codel_params_init(&q->params);
+       codel_params_init(&q->params, sch);
        codel_vars_init(&q->vars);
        codel_stats_init(&q->stats);
 
index 1e52dec..c244c45 100644 (file)
@@ -391,7 +391,7 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt)
        q->perturbation = prandom_u32();
        INIT_LIST_HEAD(&q->new_flows);
        INIT_LIST_HEAD(&q->old_flows);
-       codel_params_init(&q->cparams);
+       codel_params_init(&q->cparams, sch);
        codel_stats_init(&q->cstats);
        q->cparams.ecn = true;
 
index a4ca451..634529e 100644 (file)
@@ -229,7 +229,7 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch)
                break;
        }
 
-       if (q->backlog + qdisc_pkt_len(skb) <= q->limit) {
+       if (gred_backlog(t, q, sch) + qdisc_pkt_len(skb) <= q->limit) {
                q->backlog += qdisc_pkt_len(skb);
                return qdisc_enqueue_tail(skb, sch);
        }
@@ -553,7 +553,7 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
 
                opt.limit       = q->limit;
                opt.DP          = q->DP;
-               opt.backlog     = q->backlog;
+               opt.backlog     = gred_backlog(table, q, sch);
                opt.prio        = q->prio;
                opt.qth_min     = q->parms.qth_min >> q->parms.Wlog;
                opt.qth_max     = q->parms.qth_max >> q->parms.Wlog;
index fb7976a..4f15b7d 100644 (file)
@@ -381,13 +381,14 @@ nomem:
 }
 
 
-/* Public interface to creat the association shared key.
+/* Public interface to create the association shared key.
  * See code above for the algorithm.
  */
 int sctp_auth_asoc_init_active_key(struct sctp_association *asoc, gfp_t gfp)
 {
        struct sctp_auth_bytes  *secret;
        struct sctp_shared_key *ep_key;
+       struct sctp_chunk *chunk;
 
        /* If we don't support AUTH, or peer is not capable
         * we don't need to do anything.
@@ -410,6 +411,14 @@ int sctp_auth_asoc_init_active_key(struct sctp_association *asoc, gfp_t gfp)
        sctp_auth_key_put(asoc->asoc_shared_key);
        asoc->asoc_shared_key = secret;
 
+       /* Update send queue in case any chunk already in there now
+        * needs authenticating
+        */
+       list_for_each_entry(chunk, &asoc->outqueue.out_chunk_list, list) {
+               if (sctp_auth_send_cid(chunk->chunk_hdr->type, asoc))
+                       chunk->auth = 1;
+       }
+
        return 0;
 }
 
index 1ec19f6..eeeba5a 100644 (file)
@@ -793,20 +793,26 @@ int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp,
 {
        u32 value_follows;
        int err;
+       struct page *scratch;
+
+       scratch = alloc_page(GFP_KERNEL);
+       if (!scratch)
+               return -ENOMEM;
+       xdr_set_scratch_buffer(xdr, page_address(scratch), PAGE_SIZE);
 
        /* res->status */
        err = gssx_dec_status(xdr, &res->status);
        if (err)
-               return err;
+               goto out_free;
 
        /* res->context_handle */
        err = gssx_dec_bool(xdr, &value_follows);
        if (err)
-               return err;
+               goto out_free;
        if (value_follows) {
                err = gssx_dec_ctx(xdr, res->context_handle);
                if (err)
-                       return err;
+                       goto out_free;
        } else {
                res->context_handle = NULL;
        }
@@ -814,11 +820,11 @@ int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp,
        /* res->output_token */
        err = gssx_dec_bool(xdr, &value_follows);
        if (err)
-               return err;
+               goto out_free;
        if (value_follows) {
                err = gssx_dec_buffer(xdr, res->output_token);
                if (err)
-                       return err;
+                       goto out_free;
        } else {
                res->output_token = NULL;
        }
@@ -826,14 +832,17 @@ int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp,
        /* res->delegated_cred_handle */
        err = gssx_dec_bool(xdr, &value_follows);
        if (err)
-               return err;
+               goto out_free;
        if (value_follows) {
                /* we do not support upcall servers sending this data. */
-               return -EINVAL;
+               err = -EINVAL;
+               goto out_free;
        }
 
        /* res->options */
        err = gssx_dec_option_array(xdr, &res->options);
 
+out_free:
+       __free_page(scratch);
        return err;
 }
index 46568b8..055453d 100644 (file)
@@ -338,7 +338,7 @@ int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
                                              fi, tos, type, nlflags,
                                              tb_id);
                if (!err)
-                       fi->fib_flags |= RTNH_F_EXTERNAL;
+                       fi->fib_flags |= RTNH_F_OFFLOAD;
        }
 
        return err;
@@ -364,7 +364,7 @@ int netdev_switch_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
        const struct swdev_ops *ops;
        int err = 0;
 
-       if (!(fi->fib_flags & RTNH_F_EXTERNAL))
+       if (!(fi->fib_flags & RTNH_F_OFFLOAD))
                return 0;
 
        dev = netdev_switch_get_dev_by_nhs(fi);
@@ -376,7 +376,7 @@ int netdev_switch_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
                err = ops->swdev_fib_ipv4_del(dev, htonl(dst), dst_len,
                                              fi, tos, type, tb_id);
                if (!err)
-                       fi->fib_flags &= ~RTNH_F_EXTERNAL;
+                       fi->fib_flags &= ~RTNH_F_OFFLOAD;
        }
 
        return err;
index 9074b5c..f485600 100644 (file)
@@ -2142,11 +2142,17 @@ static void tipc_sk_timeout(unsigned long data)
        peer_node = tsk_peer_node(tsk);
 
        if (tsk->probing_state == TIPC_CONN_PROBING) {
-               /* Previous probe not answered -> self abort */
-               skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE,
-                                     TIPC_CONN_MSG, SHORT_H_SIZE, 0,
-                                     own_node, peer_node, tsk->portid,
-                                     peer_port, TIPC_ERR_NO_PORT);
+               if (!sock_owned_by_user(sk)) {
+                       sk->sk_socket->state = SS_DISCONNECTING;
+                       tsk->connected = 0;
+                       tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk),
+                                             tsk_peer_port(tsk));
+                       sk->sk_state_change(sk);
+               } else {
+                       /* Try again later */
+                       sk_reset_timer(sk, &sk->sk_timer, (HZ / 20));
+               }
+
        } else {
                skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE,
                                      INT_H_SIZE, 0, peer_node, own_node,
index 5266ea7..0643059 100644 (file)
@@ -1880,6 +1880,10 @@ static long unix_stream_data_wait(struct sock *sk, long timeo,
                unix_state_unlock(sk);
                timeo = freezable_schedule_timeout(timeo);
                unix_state_lock(sk);
+
+               if (sock_flag(sk, SOCK_DEAD))
+                       break;
+
                clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
        }
 
@@ -1939,6 +1943,10 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
                struct sk_buff *skb, *last;
 
                unix_state_lock(sk);
+               if (sock_flag(sk, SOCK_DEAD)) {
+                       err = -ECONNRESET;
+                       goto unlock;
+               }
                last = skb = skb_peek(&sk->sk_receive_queue);
 again:
                if (skb == NULL) {
index fff1bef..fd68283 100644 (file)
@@ -1333,6 +1333,8 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev)
        memcpy(bssid, wdev->current_bss->pub.bssid, ETH_ALEN);
        wdev_unlock(wdev);
 
+       memset(&sinfo, 0, sizeof(sinfo));
+
        if (rdev_get_station(rdev, dev, bssid, &sinfo))
                return NULL;
 
index 526c4fe..b58286e 100644 (file)
@@ -13,6 +13,8 @@
 #include <net/dst.h>
 #include <net/ip.h>
 #include <net/xfrm.h>
+#include <net/ip_tunnels.h>
+#include <net/ip6_tunnel.h>
 
 static struct kmem_cache *secpath_cachep __read_mostly;
 
@@ -186,6 +188,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
        struct xfrm_state *x = NULL;
        xfrm_address_t *daddr;
        struct xfrm_mode *inner_mode;
+       u32 mark = skb->mark;
        unsigned int family;
        int decaps = 0;
        int async = 0;
@@ -203,6 +206,18 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
                                   XFRM_SPI_SKB_CB(skb)->daddroff);
        family = XFRM_SPI_SKB_CB(skb)->family;
 
+       /* if tunnel is present override skb->mark value with tunnel i_key */
+       if (XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4) {
+               switch (family) {
+               case AF_INET:
+                       mark = be32_to_cpu(XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4->parms.i_key);
+                       break;
+               case AF_INET6:
+                       mark = be32_to_cpu(XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6->parms.i_key);
+                       break;
+               }
+       }
+
        /* Allocate new secpath or COW existing one. */
        if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) {
                struct sec_path *sp;
@@ -229,7 +244,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
                        goto drop;
                }
 
-               x = xfrm_state_lookup(net, skb->mark, daddr, spi, nexthdr, family);
+               x = xfrm_state_lookup(net, mark, daddr, spi, nexthdr, family);
                if (x == NULL) {
                        XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES);
                        xfrm_audit_state_notfound(skb, family, spi, seq);
index dab57da..4fd725a 100644 (file)
@@ -99,6 +99,7 @@ static int xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb)
 
        if (x->type->flags & XFRM_TYPE_REPLAY_PROT) {
                XFRM_SKB_CB(skb)->seq.output.low = ++x->replay.oseq;
+               XFRM_SKB_CB(skb)->seq.output.hi = 0;
                if (unlikely(x->replay.oseq == 0)) {
                        x->replay.oseq--;
                        xfrm_audit_state_replay_overflow(x, skb);
@@ -177,6 +178,7 @@ static int xfrm_replay_overflow_bmp(struct xfrm_state *x, struct sk_buff *skb)
 
        if (x->type->flags & XFRM_TYPE_REPLAY_PROT) {
                XFRM_SKB_CB(skb)->seq.output.low = ++replay_esn->oseq;
+               XFRM_SKB_CB(skb)->seq.output.hi = 0;
                if (unlikely(replay_esn->oseq == 0)) {
                        replay_esn->oseq--;
                        xfrm_audit_state_replay_overflow(x, skb);
index f5e39e3..96688cd 100644 (file)
@@ -927,8 +927,8 @@ struct xfrm_state *xfrm_state_lookup_byspi(struct net *net, __be32 spi,
                        x->id.spi != spi)
                        continue;
 
-               spin_unlock_bh(&net->xfrm.xfrm_state_lock);
                xfrm_state_hold(x);
+               spin_unlock_bh(&net->xfrm.xfrm_state_lock);
                return x;
        }
        spin_unlock_bh(&net->xfrm.xfrm_state_lock);
index 89b1df4..c5ec977 100755 (executable)
@@ -3169,12 +3169,12 @@ sub process {
                }
 
 # check for global initialisers.
-               if ($line =~ /^\+(\s*$Type\s*$Ident\s*(?:\s+$Modifier))*\s*=\s*(0|NULL|false)\s*;/) {
+               if ($line =~ /^\+$Type\s*$Ident(?:\s+$Modifier)*\s*=\s*(?:0|NULL|false)\s*;/) {
                        if (ERROR("GLOBAL_INITIALISERS",
                                  "do not initialise globals to 0 or NULL\n" .
                                      $herecurr) &&
                            $fix) {
-                               $fixed[$fixlinenr] =~ s/($Type\s*$Ident\s*(?:\s+$Modifier))*\s*=\s*(0|NULL|false)\s*;/$1;/;
+                               $fixed[$fixlinenr] =~ s/(^.$Type\s*$Ident(?:\s+$Modifier)*)\s*=\s*(0|NULL|false)\s*;/$1;/;
                        }
                }
 # check for static initialisers.
index a1504c4..25db8cf 100644 (file)
@@ -73,18 +73,11 @@ class LxLsmod(gdb.Command):
                 "        " if utils.get_long_type().sizeof == 8 else ""))
 
         for module in module_list():
-            ref = 0
-            module_refptr = module['refptr']
-            for cpu in cpus.cpu_list("cpu_possible_mask"):
-                refptr = cpus.per_cpu(module_refptr, cpu)
-                ref += refptr['incs']
-                ref -= refptr['decs']
-
             gdb.write("{address} {name:<19} {size:>8}  {ref}".format(
                 address=str(module['module_core']).split()[0],
                 name=module['name'].string(),
                 size=str(module['core_size']),
-                ref=str(ref)))
+                ref=str(module['refcnt']['counter'])))
 
             source_list = module['source_list']
             t = self._module_use_type.get_type().pointer()
index 0d03fcc..7d3f38f 100644 (file)
@@ -209,8 +209,8 @@ static int cap_inode_readlink(struct dentry *dentry)
        return 0;
 }
 
-static int cap_inode_follow_link(struct dentry *dentry,
-                                struct nameidata *nameidata)
+static int cap_inode_follow_link(struct dentry *dentry, struct inode *inode,
+                                bool rcu)
 {
        return 0;
 }
index 8e9b1f4..04c8fec 100644 (file)
@@ -581,11 +581,12 @@ int security_inode_readlink(struct dentry *dentry)
        return security_ops->inode_readlink(dentry);
 }
 
-int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd)
+int security_inode_follow_link(struct dentry *dentry, struct inode *inode,
+                              bool rcu)
 {
-       if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
+       if (unlikely(IS_PRIVATE(inode)))
                return 0;
-       return security_ops->inode_follow_link(dentry, nd);
+       return security_ops->inode_follow_link(dentry, inode, rcu);
 }
 
 int security_inode_permission(struct inode *inode, int mask)
index 3c17dda..0b122b1 100644 (file)
@@ -761,7 +761,23 @@ int avc_has_perm(u32 ssid, u32 tsid, u16 tclass,
 
        rc = avc_has_perm_noaudit(ssid, tsid, tclass, requested, 0, &avd);
 
-       rc2 = avc_audit(ssid, tsid, tclass, requested, &avd, rc, auditdata);
+       rc2 = avc_audit(ssid, tsid, tclass, requested, &avd, rc, auditdata, 0);
+       if (rc2)
+               return rc2;
+       return rc;
+}
+
+int avc_has_perm_flags(u32 ssid, u32 tsid, u16 tclass,
+                      u32 requested, struct common_audit_data *auditdata,
+                      int flags)
+{
+       struct av_decision avd;
+       int rc, rc2;
+
+       rc = avc_has_perm_noaudit(ssid, tsid, tclass, requested, 0, &avd);
+
+       rc2 = avc_audit(ssid, tsid, tclass, requested, &avd, rc,
+                       auditdata, flags);
        if (rc2)
                return rc2;
        return rc;
index 7dade28..ffa5a64 100644 (file)
@@ -1564,7 +1564,7 @@ static int cred_has_capability(const struct cred *cred,
 
        rc = avc_has_perm_noaudit(sid, sid, sclass, av, 0, &avd);
        if (audit == SECURITY_CAP_AUDIT) {
-               int rc2 = avc_audit(sid, sid, sclass, av, &avd, rc, &ad);
+               int rc2 = avc_audit(sid, sid, sclass, av, &avd, rc, &ad, 0);
                if (rc2)
                        return rc2;
        }
@@ -2861,11 +2861,23 @@ static int selinux_inode_readlink(struct dentry *dentry)
        return dentry_has_perm(cred, dentry, FILE__READ);
 }
 
-static int selinux_inode_follow_link(struct dentry *dentry, struct nameidata *nameidata)
+static int selinux_inode_follow_link(struct dentry *dentry, struct inode *inode,
+                                    bool rcu)
 {
        const struct cred *cred = current_cred();
+       struct common_audit_data ad;
+       struct inode_security_struct *isec;
+       u32 sid;
 
-       return dentry_has_perm(cred, dentry, FILE__READ);
+       validate_creds(cred);
+
+       ad.type = LSM_AUDIT_DATA_DENTRY;
+       ad.u.dentry = dentry;
+       sid = cred_sid(cred);
+       isec = inode->i_security;
+
+       return avc_has_perm_flags(sid, isec->sid, isec->sclass, FILE__READ, &ad,
+                                 rcu ? MAY_NOT_BLOCK : 0);
 }
 
 static noinline int audit_inode_permission(struct inode *inode,
index ddf8eec..5973c32 100644 (file)
@@ -130,7 +130,8 @@ static inline int avc_audit(u32 ssid, u32 tsid,
                            u16 tclass, u32 requested,
                            struct av_decision *avd,
                            int result,
-                           struct common_audit_data *a)
+                           struct common_audit_data *a,
+                           int flags)
 {
        u32 audited, denied;
        audited = avc_audit_required(requested, avd, result, 0, &denied);
@@ -138,7 +139,7 @@ static inline int avc_audit(u32 ssid, u32 tsid,
                return 0;
        return slow_avc_audit(ssid, tsid, tclass,
                              requested, audited, denied, result,
-                             a, 0);
+                             a, flags);
 }
 
 #define AVC_STRICT 1 /* Ignore permissive mode. */
@@ -150,6 +151,10 @@ int avc_has_perm_noaudit(u32 ssid, u32 tsid,
 int avc_has_perm(u32 ssid, u32 tsid,
                 u16 tclass, u32 requested,
                 struct common_audit_data *auditdata);
+int avc_has_perm_flags(u32 ssid, u32 tsid,
+                      u16 tclass, u32 requested,
+                      struct common_audit_data *auditdata,
+                      int flags);
 
 u32 avc_policy_seqno(void);
 
index cf4cedf..6dad042 100644 (file)
@@ -916,7 +916,6 @@ static struct ac97c_platform_data *atmel_ac97c_probe_dt(struct device *dev)
 {
        struct ac97c_platform_data *pdata;
        struct device_node *node = dev->of_node;
-       const struct of_device_id *match;
 
        if (!node) {
                dev_err(dev, "Device does not have associated DT data\n");
index ac6b33f..7d45645 100644 (file)
@@ -339,7 +339,7 @@ static int snd_pcm_update_hw_ptr0(struct snd_pcm_substream *substream,
                if (delta > new_hw_ptr) {
                        /* check for double acknowledged interrupts */
                        hdelta = curr_jiffies - runtime->hw_ptr_jiffies;
-                       if (hdelta > runtime->hw_ptr_buffer_jiffies/2) {
+                       if (hdelta > runtime->hw_ptr_buffer_jiffies/2 + 1) {
                                hw_base += runtime->buffer_size;
                                if (hw_base >= runtime->boundary) {
                                        hw_base = 0;
index 7371e0c..1eabcdf 100644 (file)
@@ -246,6 +246,9 @@ static int hda_reg_read(void *context, unsigned int reg, unsigned int *val)
                return hda_reg_read_stereo_amp(codec, reg, val);
        if (verb == AC_VERB_GET_PROC_COEF)
                return hda_reg_read_coef(codec, reg, val);
+       if ((verb & 0x700) == AC_VERB_SET_AMP_GAIN_MUTE)
+               reg &= ~AC_AMP_FAKE_MUTE;
+
        err = snd_hdac_exec_verb(codec, reg, 0, val);
        if (err < 0)
                return err;
@@ -265,6 +268,9 @@ static int hda_reg_write(void *context, unsigned int reg, unsigned int val)
        unsigned int verb;
        int i, bytes, err;
 
+       if (codec->caps_overwriting)
+               return 0;
+
        reg &= ~0x00080000U; /* drop GET bit */
        reg |= (codec->addr << 28);
        verb = get_verb(reg);
@@ -280,6 +286,8 @@ static int hda_reg_write(void *context, unsigned int reg, unsigned int val)
 
        switch (verb & 0xf00) {
        case AC_VERB_SET_AMP_GAIN_MUTE:
+               if ((reg & AC_AMP_FAKE_MUTE) && (val & AC_AMP_MUTE))
+                       val = 0;
                verb = AC_VERB_SET_AMP_GAIN_MUTE;
                if (reg & AC_AMP_GET_LEFT)
                        verb |= AC_AMP_SET_LEFT >> 8;
index d2f615a..2153d31 100644 (file)
@@ -12,12 +12,14 @@ if SND_MIPS
 config SND_SGI_O2
        tristate "SGI O2 Audio"
        depends on SGI_IP32
+       select SND_PCM
         help
                 Sound support for the SGI O2 Workstation. 
 
 config SND_SGI_HAL2
         tristate "SGI HAL2 Audio"
         depends on SGI_HAS_HAL2
+       select SND_PCM
         help
                 Sound support for the SGI Indy and Indigo2 Workstation.
 
index b49feff..5645481 100644 (file)
@@ -436,7 +436,7 @@ static unsigned int get_num_devices(struct hda_codec *codec, hda_nid_t nid)
            get_wcaps_type(wcaps) != AC_WID_PIN)
                return 0;
 
-       parm = snd_hda_param_read(codec, nid, AC_PAR_DEVLIST_LEN);
+       parm = snd_hdac_read_parm_uncached(&codec->core, nid, AC_PAR_DEVLIST_LEN);
        if (parm == -1 && codec->bus->rirb_error)
                parm = 0;
        return parm & AC_DEV_LIST_LEN_MASK;
@@ -1376,6 +1376,31 @@ int snd_hda_override_amp_caps(struct hda_codec *codec, hda_nid_t nid, int dir,
 EXPORT_SYMBOL_GPL(snd_hda_override_amp_caps);
 
 /**
+ * snd_hda_codec_amp_update - update the AMP mono value
+ * @codec: HD-audio codec
+ * @nid: NID to read the AMP value
+ * @ch: channel to update (0 or 1)
+ * @dir: #HDA_INPUT or #HDA_OUTPUT
+ * @idx: the index value (only for input direction)
+ * @mask: bit mask to set
+ * @val: the bits value to set
+ *
+ * Update the AMP values for the given channel, direction and index.
+ */
+int snd_hda_codec_amp_update(struct hda_codec *codec, hda_nid_t nid,
+                            int ch, int dir, int idx, int mask, int val)
+{
+       unsigned int cmd = snd_hdac_regmap_encode_amp(nid, ch, dir, idx);
+
+       /* enable fake mute if no h/w mute but min=mute */
+       if ((query_amp_caps(codec, nid, dir) &
+            (AC_AMPCAP_MUTE | AC_AMPCAP_MIN_MUTE)) == AC_AMPCAP_MIN_MUTE)
+               cmd |= AC_AMP_FAKE_MUTE;
+       return snd_hdac_regmap_update_raw(&codec->core, cmd, mask, val);
+}
+EXPORT_SYMBOL_GPL(snd_hda_codec_amp_update);
+
+/**
  * snd_hda_codec_amp_stereo - update the AMP stereo values
  * @codec: HD-audio codec
  * @nid: NID to read the AMP value
index 788f969..ac0db16 100644 (file)
@@ -844,8 +844,16 @@ static hda_nid_t path_power_update(struct hda_codec *codec,
                        snd_hda_codec_write(codec, nid, 0,
                                            AC_VERB_SET_POWER_STATE, state);
                        changed = nid;
+                       /* all known codecs seem to be capable to handl
+                        * widgets state even in D3, so far.
+                        * if any new codecs need to restore the widget
+                        * states after D0 transition, call the function
+                        * below.
+                        */
+#if 0 /* disabled */
                        if (state == AC_PWRST_D0)
                                snd_hdac_regmap_sync_node(&codec->core, nid);
+#endif
                }
        }
        return changed;
@@ -4918,9 +4926,12 @@ int snd_hda_gen_parse_auto_config(struct hda_codec *codec,
  dig_only:
        parse_digital(codec);
 
-       if (spec->power_down_unused || codec->power_save_node)
+       if (spec->power_down_unused || codec->power_save_node) {
                if (!codec->power_filter)
                        codec->power_filter = snd_hda_gen_path_power_filter;
+               if (!codec->patch_ops.stream_pm)
+                       codec->patch_ops.stream_pm = snd_hda_gen_stream_pm;
+       }
 
        if (!spec->no_analog && spec->beep_nid) {
                err = snd_hda_attach_beep_device(codec, spec->beep_nid);
index 34040d2..b6db25b 100644 (file)
@@ -340,6 +340,11 @@ enum {
 #define use_vga_switcheroo(chip)       0
 #endif
 
+#define CONTROLLER_IN_GPU(pci) (((pci)->device == 0x0a0c) || \
+                                       ((pci)->device == 0x0c0c) || \
+                                       ((pci)->device == 0x0d0c) || \
+                                       ((pci)->device == 0x160c))
+
 static char *driver_short_names[] = {
        [AZX_DRIVER_ICH] = "HDA Intel",
        [AZX_DRIVER_PCH] = "HDA Intel PCH",
@@ -1854,8 +1859,17 @@ static int azx_probe_continue(struct azx *chip)
        if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL) {
 #ifdef CONFIG_SND_HDA_I915
                err = hda_i915_init(hda);
-               if (err < 0)
-                       goto out_free;
+               if (err < 0) {
+                       /* if the controller is bound only with HDMI/DP
+                        * (for HSW and BDW), we need to abort the probe;
+                        * for other chips, still continue probing as other
+                        * codecs can be on the same link.
+                        */
+                       if (CONTROLLER_IN_GPU(pci))
+                               goto out_free;
+                       else
+                               goto skip_i915;
+               }
                err = hda_display_power(hda, true);
                if (err < 0) {
                        dev_err(chip->card->dev,
@@ -1865,6 +1879,9 @@ static int azx_probe_continue(struct azx *chip)
 #endif
        }
 
+#ifdef CONFIG_SND_HDA_I915
+ skip_i915:
+#endif
        err = azx_first_init(chip);
        if (err < 0)
                goto out_free;
@@ -2089,6 +2106,8 @@ static const struct pci_device_id azx_ids[] = {
          .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS },
        { PCI_DEVICE(0x1002, 0xaab0),
          .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS },
+       { PCI_DEVICE(0x1002, 0xaac8),
+         .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS },
        /* VIA VT8251/VT8237A */
        { PCI_DEVICE(0x1106, 0x3288),
          .driver_data = AZX_DRIVER_VIA | AZX_DCAPS_POSFIX_VIA },
index 3b567f4..bed66c3 100644 (file)
@@ -129,8 +129,8 @@ int snd_hda_mixer_amp_switch_put_beep(struct snd_kcontrol *kcontrol,
 /* lowlevel accessor with caching; use carefully */
 #define snd_hda_codec_amp_read(codec, nid, ch, dir, idx) \
        snd_hdac_regmap_get_amp(&(codec)->core, nid, ch, dir, idx)
-#define snd_hda_codec_amp_update(codec, nid, ch, dir, idx, mask, val) \
-       snd_hdac_regmap_update_amp(&(codec)->core, nid, ch, dir, idx, mask, val)
+int snd_hda_codec_amp_update(struct hda_codec *codec, hda_nid_t nid,
+                            int ch, int dir, int idx, int mask, int val);
 int snd_hda_codec_amp_stereo(struct hda_codec *codec, hda_nid_t nid,
                             int dir, int idx, int mask, int val);
 int snd_hda_codec_amp_init(struct hda_codec *codec, hda_nid_t nid, int ch,
index f8f0dfb..78b719b 100644 (file)
@@ -968,6 +968,14 @@ static const struct hda_codec_preset snd_hda_preset_conexant[] = {
          .patch = patch_conexant_auto },
        { .id = 0x14f150b9, .name = "CX20665",
          .patch = patch_conexant_auto },
+       { .id = 0x14f150f1, .name = "CX20721",
+         .patch = patch_conexant_auto },
+       { .id = 0x14f150f2, .name = "CX20722",
+         .patch = patch_conexant_auto },
+       { .id = 0x14f150f3, .name = "CX20723",
+         .patch = patch_conexant_auto },
+       { .id = 0x14f150f4, .name = "CX20724",
+         .patch = patch_conexant_auto },
        { .id = 0x14f1510f, .name = "CX20751/2",
          .patch = patch_conexant_auto },
        { .id = 0x14f15110, .name = "CX20751/2",
@@ -1002,6 +1010,10 @@ MODULE_ALIAS("snd-hda-codec-id:14f150ab");
 MODULE_ALIAS("snd-hda-codec-id:14f150ac");
 MODULE_ALIAS("snd-hda-codec-id:14f150b8");
 MODULE_ALIAS("snd-hda-codec-id:14f150b9");
+MODULE_ALIAS("snd-hda-codec-id:14f150f1");
+MODULE_ALIAS("snd-hda-codec-id:14f150f2");
+MODULE_ALIAS("snd-hda-codec-id:14f150f3");
+MODULE_ALIAS("snd-hda-codec-id:14f150f4");
 MODULE_ALIAS("snd-hda-codec-id:14f1510f");
 MODULE_ALIAS("snd-hda-codec-id:14f15110");
 MODULE_ALIAS("snd-hda-codec-id:14f15111");
index e2afd53..6d01045 100644 (file)
@@ -883,6 +883,8 @@ static struct alc_codec_rename_pci_table rename_pci_tbl[] = {
        { 0x10ec0668, 0x1028, 0, "ALC3661" },
        { 0x10ec0275, 0x1028, 0, "ALC3260" },
        { 0x10ec0899, 0x1028, 0, "ALC3861" },
+       { 0x10ec0298, 0x1028, 0, "ALC3266" },
+       { 0x10ec0256, 0x1028, 0, "ALC3246" },
        { 0x10ec0670, 0x1025, 0, "ALC669X" },
        { 0x10ec0676, 0x1025, 0, "ALC679X" },
        { 0x10ec0282, 0x1043, 0, "ALC3229" },
@@ -2166,6 +2168,7 @@ static const struct hda_fixup alc882_fixups[] = {
 static const struct snd_pci_quirk alc882_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1025, 0x006c, "Acer Aspire 9810", ALC883_FIXUP_ACER_EAPD),
        SND_PCI_QUIRK(0x1025, 0x0090, "Acer Aspire", ALC883_FIXUP_ACER_EAPD),
+       SND_PCI_QUIRK(0x1025, 0x0107, "Acer Aspire", ALC883_FIXUP_ACER_EAPD),
        SND_PCI_QUIRK(0x1025, 0x010a, "Acer Ferrari 5000", ALC883_FIXUP_ACER_EAPD),
        SND_PCI_QUIRK(0x1025, 0x0110, "Acer Aspire", ALC883_FIXUP_ACER_EAPD),
        SND_PCI_QUIRK(0x1025, 0x0112, "Acer Aspire 9303", ALC883_FIXUP_ACER_EAPD),
@@ -3673,6 +3676,10 @@ static void alc_headset_mode_mic_in(struct hda_codec *codec, hda_nid_t hp_pin,
                alc_process_coef_fw(codec, coef0293);
                snd_hda_set_pin_ctl_cache(codec, mic_pin, PIN_VREF50);
                break;
+       case 0x10ec0662:
+               snd_hda_set_pin_ctl_cache(codec, hp_pin, 0);
+               snd_hda_set_pin_ctl_cache(codec, mic_pin, PIN_VREF50);
+               break;
        case 0x10ec0668:
                alc_write_coef_idx(codec, 0x11, 0x0001);
                snd_hda_set_pin_ctl_cache(codec, hp_pin, 0);
@@ -3738,7 +3745,6 @@ static void alc_headset_mode_default(struct hda_codec *codec)
        case 0x10ec0288:
                alc_process_coef_fw(codec, coef0288);
                break;
-               break;
        case 0x10ec0292:
                alc_process_coef_fw(codec, coef0292);
                break;
@@ -4012,7 +4018,7 @@ static void alc_update_headset_mode(struct hda_codec *codec)
        if (new_headset_mode != ALC_HEADSET_MODE_MIC) {
                snd_hda_set_pin_ctl_cache(codec, hp_pin,
                                          AC_PINCTL_OUT_EN | AC_PINCTL_HP_EN);
-               if (spec->headphone_mic_pin)
+               if (spec->headphone_mic_pin && spec->headphone_mic_pin != hp_pin)
                        snd_hda_set_pin_ctl_cache(codec, spec->headphone_mic_pin,
                                                  PIN_VREFHIZ);
        }
@@ -4215,6 +4221,23 @@ static void alc_fixup_dell_xps13(struct hda_codec *codec,
        }
 }
 
+static void alc_fixup_headset_mode_alc662(struct hda_codec *codec,
+                               const struct hda_fixup *fix, int action)
+{
+       struct alc_spec *spec = codec->spec;
+
+       if (action == HDA_FIXUP_ACT_PRE_PROBE) {
+               spec->parse_flags |= HDA_PINCFG_HEADSET_MIC;
+               spec->gen.hp_mic = 1; /* Mic-in is same pin as headphone */
+
+               /* Disable boost for mic-in permanently. (This code is only called
+                  from quirks that guarantee that the headphone is at NID 0x1b.) */
+               snd_hda_codec_write(codec, 0x1b, 0, AC_VERB_SET_AMP_GAIN_MUTE, 0x7000);
+               snd_hda_override_wcaps(codec, 0x1b, get_wcaps(codec, 0x1b) & ~AC_WCAP_IN_AMP);
+       } else
+               alc_fixup_headset_mode(codec, fix, action);
+}
+
 static void alc_fixup_headset_mode_alc668(struct hda_codec *codec,
                                const struct hda_fixup *fix, int action)
 {
@@ -4492,6 +4515,8 @@ enum {
        ALC288_FIXUP_DELL_HEADSET_MODE,
        ALC288_FIXUP_DELL1_MIC_NO_PRESENCE,
        ALC288_FIXUP_DELL_XPS_13_GPIO6,
+       ALC292_FIXUP_DELL_E7X,
+       ALC292_FIXUP_DISABLE_AAMIX,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -5014,6 +5039,16 @@ static const struct hda_fixup alc269_fixups[] = {
                .chained = true,
                .chain_id = ALC288_FIXUP_DELL1_MIC_NO_PRESENCE
        },
+       [ALC292_FIXUP_DISABLE_AAMIX] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = alc_fixup_disable_aamix,
+       },
+       [ALC292_FIXUP_DELL_E7X] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = alc_fixup_dell_xps13,
+               .chained = true,
+               .chain_id = ALC292_FIXUP_DISABLE_AAMIX
+       },
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -5026,6 +5061,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1025, 0x0775, "Acer Aspire E1-572", ALC271_FIXUP_HP_GATE_MIC_JACK_E1_572),
        SND_PCI_QUIRK(0x1025, 0x079b, "Acer Aspire V5-573G", ALC282_FIXUP_ASPIRE_V5_PINS),
        SND_PCI_QUIRK(0x1028, 0x0470, "Dell M101z", ALC269_FIXUP_DELL_M101Z),
+       SND_PCI_QUIRK(0x1028, 0x05ca, "Dell Latitude E7240", ALC292_FIXUP_DELL_E7X),
+       SND_PCI_QUIRK(0x1028, 0x05cb, "Dell Latitude E7440", ALC292_FIXUP_DELL_E7X),
        SND_PCI_QUIRK(0x1028, 0x05da, "Dell Vostro 5460", ALC290_FIXUP_SUBWOOFER),
        SND_PCI_QUIRK(0x1028, 0x05f4, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1028, 0x05f5, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE),
@@ -5035,6 +5072,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1028, 0x0638, "Dell Inspiron 5439", ALC290_FIXUP_MONO_SPEAKERS_HSJACK),
        SND_PCI_QUIRK(0x1028, 0x064a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1028, 0x064b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1028, 0x0665, "Dell XPS 13", ALC292_FIXUP_DELL_E7X),
        SND_PCI_QUIRK(0x1028, 0x06c7, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1028, 0x06d9, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1028, 0x06da, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
@@ -5119,6 +5157,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x104d, 0x9099, "Sony VAIO S13", ALC275_FIXUP_SONY_DISABLE_AAMIX),
        SND_PCI_QUIRK(0x10cf, 0x1475, "Lifebook", ALC269_FIXUP_LIFEBOOK),
        SND_PCI_QUIRK(0x10cf, 0x15dc, "Lifebook T731", ALC269_FIXUP_LIFEBOOK_HP_PIN),
+       SND_PCI_QUIRK(0x10cf, 0x1757, "Lifebook E752", ALC269_FIXUP_LIFEBOOK_HP_PIN),
        SND_PCI_QUIRK(0x10cf, 0x1845, "Lifebook U904", ALC269_FIXUP_LIFEBOOK_EXTMIC),
        SND_PCI_QUIRK(0x144d, 0xc109, "Samsung Ativ book 9 (NP900X3G)", ALC269_FIXUP_INV_DMIC),
        SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_BXBT2807_MIC),
@@ -5148,6 +5187,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x17aa, 0x5026, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
        SND_PCI_QUIRK(0x17aa, 0x5034, "Thinkpad T450", ALC292_FIXUP_TPT440_DOCK),
        SND_PCI_QUIRK(0x17aa, 0x5036, "Thinkpad T450s", ALC292_FIXUP_TPT440_DOCK),
+       SND_PCI_QUIRK(0x17aa, 0x503c, "Thinkpad L450", ALC292_FIXUP_TPT440_DOCK),
        SND_PCI_QUIRK(0x17aa, 0x5109, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
        SND_PCI_QUIRK(0x17aa, 0x3bf8, "Quanta FL1", ALC269_FIXUP_PCM_44K),
        SND_PCI_QUIRK(0x17aa, 0x9e54, "LENOVO NB", ALC269_FIXUP_LENOVO_EAPD),
@@ -5345,6 +5385,20 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
                {0x17, 0x40000000},
                {0x1d, 0x40700001},
                {0x21, 0x02211050}),
+       SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell Inspiron 5548", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
+               ALC255_STANDARD_PINS,
+               {0x12, 0x90a60180},
+               {0x14, 0x90170130},
+               {0x17, 0x40000000},
+               {0x1d, 0x40700001},
+               {0x21, 0x02211040}),
+       SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
+               ALC255_STANDARD_PINS,
+               {0x12, 0x90a60160},
+               {0x14, 0x90170120},
+               {0x17, 0x40000000},
+               {0x1d, 0x40700001},
+               {0x21, 0x02211030}),
        SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
                ALC256_STANDARD_PINS,
                {0x13, 0x40000000}),
@@ -6079,7 +6133,9 @@ enum {
        ALC662_FIXUP_NO_JACK_DETECT,
        ALC662_FIXUP_ZOTAC_Z68,
        ALC662_FIXUP_INV_DMIC,
+       ALC662_FIXUP_DELL_MIC_NO_PRESENCE,
        ALC668_FIXUP_DELL_MIC_NO_PRESENCE,
+       ALC662_FIXUP_HEADSET_MODE,
        ALC668_FIXUP_HEADSET_MODE,
        ALC662_FIXUP_BASS_MODE4_CHMAP,
        ALC662_FIXUP_BASS_16,
@@ -6272,6 +6328,20 @@ static const struct hda_fixup alc662_fixups[] = {
                .chained = true,
                .chain_id = ALC668_FIXUP_DELL_MIC_NO_PRESENCE
        },
+       [ALC662_FIXUP_DELL_MIC_NO_PRESENCE] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = (const struct hda_pintbl[]) {
+                       { 0x19, 0x03a1113c }, /* use as headset mic, without its own jack detect */
+                       /* headphone mic by setting pin control of 0x1b (headphone out) to in + vref_50 */
+                       { }
+               },
+               .chained = true,
+               .chain_id = ALC662_FIXUP_HEADSET_MODE
+       },
+       [ALC662_FIXUP_HEADSET_MODE] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = alc_fixup_headset_mode_alc662,
+       },
        [ALC668_FIXUP_DELL_MIC_NO_PRESENCE] = {
                .type = HDA_FIXUP_PINS,
                .v.pins = (const struct hda_pintbl[]) {
@@ -6423,6 +6493,18 @@ static const struct hda_model_fixup alc662_fixup_models[] = {
 };
 
 static const struct snd_hda_pin_quirk alc662_pin_fixup_tbl[] = {
+       SND_HDA_PIN_QUIRK(0x10ec0662, 0x1028, "Dell", ALC662_FIXUP_DELL_MIC_NO_PRESENCE,
+               {0x12, 0x4004c000},
+               {0x14, 0x01014010},
+               {0x15, 0x411111f0},
+               {0x16, 0x411111f0},
+               {0x18, 0x01a19020},
+               {0x19, 0x411111f0},
+               {0x1a, 0x0181302f},
+               {0x1b, 0x0221401f},
+               {0x1c, 0x411111f0},
+               {0x1d, 0x4054c601},
+               {0x1e, 0x411111f0}),
        SND_HDA_PIN_QUIRK(0x10ec0668, 0x1028, "Dell", ALC668_FIXUP_AUTO_MUTE,
                {0x12, 0x99a30130},
                {0x14, 0x90170110},
index 43c99ce..6c66d7e 100644 (file)
@@ -100,6 +100,7 @@ enum {
        STAC_HP_ENVY_BASS,
        STAC_HP_BNB13_EQ,
        STAC_HP_ENVY_TS_BASS,
+       STAC_HP_ENVY_TS_DAC_BIND,
        STAC_92HD83XXX_GPIO10_EAPD,
        STAC_92HD83XXX_MODELS
 };
@@ -2171,6 +2172,22 @@ static void stac92hd83xxx_fixup_gpio10_eapd(struct hda_codec *codec,
        spec->eapd_switch = 0;
 }
 
+static void hp_envy_ts_fixup_dac_bind(struct hda_codec *codec,
+                                           const struct hda_fixup *fix,
+                                           int action)
+{
+       struct sigmatel_spec *spec = codec->spec;
+       static hda_nid_t preferred_pairs[] = {
+               0xd, 0x13,
+               0
+       };
+
+       if (action != HDA_FIXUP_ACT_PRE_PROBE)
+               return;
+
+       spec->gen.preferred_dacs = preferred_pairs;
+}
+
 static const struct hda_verb hp_bnb13_eq_verbs[] = {
        /* 44.1KHz base */
        { 0x22, 0x7A6, 0x3E },
@@ -2686,6 +2703,12 @@ static const struct hda_fixup stac92hd83xxx_fixups[] = {
                        {}
                },
        },
+       [STAC_HP_ENVY_TS_DAC_BIND] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = hp_envy_ts_fixup_dac_bind,
+               .chained = true,
+               .chain_id = STAC_HP_ENVY_TS_BASS,
+       },
        [STAC_92HD83XXX_GPIO10_EAPD] = {
                .type = HDA_FIXUP_FUNC,
                .v.func = stac92hd83xxx_fixup_gpio10_eapd,
@@ -2764,6 +2787,8 @@ static const struct snd_pci_quirk stac92hd83xxx_fixup_tbl[] = {
                          "HP bNB13", STAC_HP_BNB13_EQ),
        SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x190e,
                          "HP ENVY TS", STAC_HP_ENVY_TS_BASS),
+       SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x1967,
+                         "HP ENVY TS", STAC_HP_ENVY_TS_DAC_BIND),
        SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x1940,
                          "HP bNB13", STAC_HP_BNB13_EQ),
        SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x1941,
@@ -4403,7 +4428,6 @@ static const struct hda_codec_ops stac_patch_ops = {
 #ifdef CONFIG_PM
        .suspend = stac_suspend,
 #endif
-       .stream_pm = snd_hda_gen_stream_pm,
        .reboot_notify = stac_shutup,
 };
 
@@ -4697,7 +4721,8 @@ static int patch_stac92hd71bxx(struct hda_codec *codec)
                return err;
 
        spec = codec->spec;
-       codec->power_save_node = 1;
+       /* disabled power_save_node since it causes noises on a Dell machine */
+       /* codec->power_save_node = 1; */
        spec->linear_tone_beep = 0;
        spec->gen.own_eapd_ctl = 1;
        spec->gen.power_down_unused = 1;
index 31a95cc..bab6c04 100644 (file)
@@ -449,6 +449,15 @@ static int via_suspend(struct hda_codec *codec)
 
        return 0;
 }
+
+static int via_resume(struct hda_codec *codec)
+{
+       /* some delay here to make jack detection working (bko#98921) */
+       msleep(10);
+       codec->patch_ops.init(codec);
+       regcache_sync(codec->core.regmap);
+       return 0;
+}
 #endif
 
 #ifdef CONFIG_PM
@@ -475,6 +484,7 @@ static const struct hda_codec_ops via_patch_ops = {
        .stream_pm = snd_hda_gen_stream_pm,
 #ifdef CONFIG_PM
        .suspend = via_suspend,
+       .resume = via_resume,
        .check_power_status = via_check_power_status,
 #endif
 };
index d51703e..0a4ad5f 100644 (file)
@@ -72,7 +72,6 @@ static void hda_fixup_thinkpad_acpi(struct hda_codec *codec,
                if (led_set_func(TPACPI_LED_MUTE, false) >= 0) {
                        old_vmaster_hook = spec->vmaster_mute.hook;
                        spec->vmaster_mute.hook = update_tpacpi_mute_led;
-                       spec->vmaster_mute_enum = 1;
                        removefunc = false;
                }
                if (led_set_func(TPACPI_LED_MICMUTE, false) >= 0) {
index 2ffb9a0..3d44fc5 100644 (file)
@@ -623,14 +623,14 @@ static int mc13783_probe(struct snd_soc_codec *codec)
                                AUDIO_SSI_SEL, 0);
        else
                mc13xxx_reg_rmw(priv->mc13xxx, MC13783_AUDIO_CODEC,
-                               0, AUDIO_SSI_SEL);
+                               AUDIO_SSI_SEL, AUDIO_SSI_SEL);
 
        if (priv->dac_ssi_port == MC13783_SSI1_PORT)
                mc13xxx_reg_rmw(priv->mc13xxx, MC13783_AUDIO_DAC,
                                AUDIO_SSI_SEL, 0);
        else
                mc13xxx_reg_rmw(priv->mc13xxx, MC13783_AUDIO_DAC,
-                               0, AUDIO_SSI_SEL);
+                               AUDIO_SSI_SEL, AUDIO_SSI_SEL);
 
        return 0;
 }
index dc7778b..c3c33bd 100644 (file)
@@ -437,7 +437,7 @@ static int uda1380_set_dai_fmt_both(struct snd_soc_dai *codec_dai,
        if ((fmt & SND_SOC_DAIFMT_MASTER_MASK) != SND_SOC_DAIFMT_CBS_CFS)
                return -EINVAL;
 
-       uda1380_write(codec, UDA1380_IFACE, iface);
+       uda1380_write_reg_cache(codec, UDA1380_IFACE, iface);
 
        return 0;
 }
index 3035d98..e97a761 100644 (file)
@@ -395,7 +395,7 @@ static const struct snd_soc_dapm_route audio_paths[] = {
        { "Right Input Mixer", "Boost Switch", "Right Boost Mixer", },
        { "Right Input Mixer", NULL, "RINPUT1", },  /* Really Boost Switch */
        { "Right Input Mixer", NULL, "RINPUT2" },
-       { "Right Input Mixer", NULL, "LINPUT3" },
+       { "Right Input Mixer", NULL, "RINPUT3" },
 
        { "Left ADC", NULL, "Left Input Mixer" },
        { "Right ADC", NULL, "Right Input Mixer" },
index 4fbc768..a1c04da 100644 (file)
@@ -2754,7 +2754,7 @@ static struct {
 };
 
 static int fs_ratios[] = {
-       64, 128, 192, 256, 348, 512, 768, 1024, 1408, 1536
+       64, 128, 192, 256, 384, 512, 768, 1024, 1408, 1536
 };
 
 static int bclk_divs[] = {
index bb4b78e..23c91fa 100644 (file)
@@ -1247,7 +1247,7 @@ static int davinci_mcasp_suspend(struct snd_soc_dai *dai)
        u32 reg;
        int i;
 
-       context->pm_state = pm_runtime_enabled(mcasp->dev);
+       context->pm_state = pm_runtime_active(mcasp->dev);
        if (!context->pm_state)
                pm_runtime_get_sync(mcasp->dev);
 
index defe0f0..158204d 100644 (file)
@@ -3100,11 +3100,16 @@ snd_soc_dapm_new_control(struct snd_soc_dapm_context *dapm,
        }
 
        prefix = soc_dapm_prefix(dapm);
-       if (prefix)
+       if (prefix) {
                w->name = kasprintf(GFP_KERNEL, "%s %s", prefix, widget->name);
-       else
+               if (widget->sname)
+                       w->sname = kasprintf(GFP_KERNEL, "%s %s", prefix,
+                                            widget->sname);
+       } else {
                w->name = kasprintf(GFP_KERNEL, "%s", widget->name);
-
+               if (widget->sname)
+                       w->sname = kasprintf(GFP_KERNEL, "%s", widget->sname);
+       }
        if (w->name == NULL) {
                kfree(w);
                return NULL;
index 3e2ef61..8b7e391 100644 (file)
@@ -918,6 +918,7 @@ static void volume_control_quirks(struct usb_mixer_elem_info *cval,
        case USB_ID(0x046d, 0x081d): /* HD Webcam c510 */
        case USB_ID(0x046d, 0x0825): /* HD Webcam c270 */
        case USB_ID(0x046d, 0x0826): /* HD Webcam c525 */
+       case USB_ID(0x046d, 0x08ca): /* Logitech Quickcam Fusion */
        case USB_ID(0x046d, 0x0991):
        /* Most audio usb devices lie about volume resolution.
         * Most Logitech webcams have res = 384.
@@ -1582,12 +1583,6 @@ static int parse_audio_mixer_unit(struct mixer_build *state, int unitid,
                              unitid);
                return -EINVAL;
        }
-       /* no bmControls field (e.g. Maya44) -> ignore */
-       if (desc->bLength <= 10 + input_pins) {
-               usb_audio_dbg(state->chip, "MU %d has no bmControls field\n",
-                             unitid);
-               return 0;
-       }
 
        num_ins = 0;
        ich = 0;
@@ -1595,6 +1590,9 @@ static int parse_audio_mixer_unit(struct mixer_build *state, int unitid,
                err = parse_audio_unit(state, desc->baSourceID[pin]);
                if (err < 0)
                        continue;
+               /* no bmControls field (e.g. Maya44) -> ignore */
+               if (desc->bLength <= 10 + input_pins)
+                       continue;
                err = check_input_term(state, desc->baSourceID[pin], &iterm);
                if (err < 0)
                        return err;
index b703cb3..e5000da 100644 (file)
@@ -437,6 +437,11 @@ static struct usbmix_ctl_map usbmix_ctl_maps[] = {
                .map = ebox44_map,
        },
        {
+               /* MAYA44 USB+ */
+               .id = USB_ID(0x2573, 0x0008),
+               .map = maya44_map,
+       },
+       {
                /* KEF X300A */
                .id = USB_ID(0x27ac, 0x1000),
                .map = scms_usb3318_map,
index 7c5a701..754e689 100644 (file)
@@ -1117,7 +1117,10 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip)
        switch (chip->usb_id) {
        case USB_ID(0x045E, 0x075D): /* MS Lifecam Cinema  */
        case USB_ID(0x045E, 0x076D): /* MS Lifecam HD-5000 */
+       case USB_ID(0x045E, 0x0772): /* MS Lifecam Studio */
+       case USB_ID(0x045E, 0x0779): /* MS Lifecam HD-3000 */
        case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */
+       case USB_ID(0x074D, 0x3553): /* Outlaw RR2150 (Micronas UAC3553B) */
                return true;
        }
        return false;
@@ -1264,8 +1267,9 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip,
                if (fp->altsetting == 2)
                        return SNDRV_PCM_FMTBIT_DSD_U32_BE;
                break;
-       /* DIYINHK DSD DXD 384kHz USB to I2S/DSD */
-       case USB_ID(0x20b1, 0x2009):
+
+       case USB_ID(0x20b1, 0x2009): /* DIYINHK DSD DXD 384kHz USB to I2S/DSD */
+       case USB_ID(0x20b1, 0x2023): /* JLsounds I2SoverUSB */
                if (fp->altsetting == 3)
                        return SNDRV_PCM_FMTBIT_DSD_U32_BE;
                break;
index 9a617ad..b351027 100644 (file)
@@ -1,3 +1,8 @@
+# Some of the tools (perf) use same make variables
+# as in kernel build.
+export srctree=
+export objtree=
+
 include scripts/Makefile.include
 
 help:
@@ -47,11 +52,16 @@ cgroup firewire hv guest usb virtio vm net: FORCE
 liblockdep: FORCE
        $(call descend,lib/lockdep)
 
-libapikfs: FORCE
+libapi: FORCE
        $(call descend,lib/api)
 
-perf: libapikfs FORCE
-       $(call descend,$@)
+# The perf build does not follow the descend function setup,
+# invoking it via it's own make rule.
+PERF_O   = $(if $(O),$(O)/tools/perf,)
+
+perf: FORCE
+       $(Q)mkdir -p $(PERF_O) .
+       $(Q)$(MAKE) --no-print-directory -C perf O=$(PERF_O) subdir=
 
 selftests: FORCE
        $(call descend,testing/$@)
@@ -97,10 +107,10 @@ cgroup_clean hv_clean firewire_clean lguest_clean usb_clean virtio_clean vm_clea
 liblockdep_clean:
        $(call descend,lib/lockdep,clean)
 
-libapikfs_clean:
+libapi_clean:
        $(call descend,lib/api,clean)
 
-perf_clean: libapikfs_clean
+perf_clean:
        $(call descend,$(@:_clean=),clean)
 
 selftests_clean:
diff --git a/tools/arch/alpha/include/asm/barrier.h b/tools/arch/alpha/include/asm/barrier.h
new file mode 100644 (file)
index 0000000..95df19c
--- /dev/null
@@ -0,0 +1,8 @@
+#ifndef __TOOLS_LINUX_ASM_ALPHA_BARRIER_H
+#define __TOOLS_LINUX_ASM_ALPHA_BARRIER_H
+
+#define mb()   __asm__ __volatile__("mb": : :"memory")
+#define rmb()  __asm__ __volatile__("mb": : :"memory")
+#define wmb()  __asm__ __volatile__("wmb": : :"memory")
+
+#endif         /* __TOOLS_LINUX_ASM_ALPHA_BARRIER_H */
diff --git a/tools/arch/arm/include/asm/barrier.h b/tools/arch/arm/include/asm/barrier.h
new file mode 100644 (file)
index 0000000..005c618
--- /dev/null
@@ -0,0 +1,12 @@
+#ifndef _TOOLS_LINUX_ASM_ARM_BARRIER_H
+#define _TOOLS_LINUX_ASM_ARM_BARRIER_H
+
+/*
+ * Use the __kuser_memory_barrier helper in the CPU helper page. See
+ * arch/arm/kernel/entry-armv.S in the kernel source for details.
+ */
+#define mb()           ((void(*)(void))0xffff0fa0)()
+#define wmb()          ((void(*)(void))0xffff0fa0)()
+#define rmb()          ((void(*)(void))0xffff0fa0)()
+
+#endif /* _TOOLS_LINUX_ASM_ARM_BARRIER_H */
diff --git a/tools/arch/arm64/include/asm/barrier.h b/tools/arch/arm64/include/asm/barrier.h
new file mode 100644 (file)
index 0000000..a0483c8
--- /dev/null
@@ -0,0 +1,16 @@
+#ifndef _TOOLS_LINUX_ASM_AARCH64_BARRIER_H
+#define _TOOLS_LINUX_ASM_AARCH64_BARRIER_H
+
+/*
+ * From tools/perf/perf-sys.h, last modified in:
+ * f428ebd184c82a7914b2aa7e9f868918aaf7ea78 perf tools: Fix AAAAARGH64 memory barriers
+ *
+ * XXX: arch/arm64/include/asm/barrier.h in the kernel sources use dsb, is this
+ * a case like for arm32 where we do things differently in userspace?
+ */
+
+#define mb()           asm volatile("dmb ish" ::: "memory")
+#define wmb()          asm volatile("dmb ishst" ::: "memory")
+#define rmb()          asm volatile("dmb ishld" ::: "memory")
+
+#endif /* _TOOLS_LINUX_ASM_AARCH64_BARRIER_H */
diff --git a/tools/arch/ia64/include/asm/barrier.h b/tools/arch/ia64/include/asm/barrier.h
new file mode 100644 (file)
index 0000000..e4422b4
--- /dev/null
@@ -0,0 +1,48 @@
+/*
+ * Copied from the kernel sources to tools/:
+ *
+ * Memory barrier definitions.  This is based on information published
+ * in the Processor Abstraction Layer and the System Abstraction Layer
+ * manual.
+ *
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ *     David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 Asit Mallick <asit.k.mallick@intel.com>
+ * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
+ */
+#ifndef _TOOLS_LINUX_ASM_IA64_BARRIER_H
+#define _TOOLS_LINUX_ASM_IA64_BARRIER_H
+
+#include <linux/compiler.h>
+
+/*
+ * Macros to force memory ordering.  In these descriptions, "previous"
+ * and "subsequent" refer to program order; "visible" means that all
+ * architecturally visible effects of a memory access have occurred
+ * (at a minimum, this means the memory has been read or written).
+ *
+ *   wmb():    Guarantees that all preceding stores to memory-
+ *             like regions are visible before any subsequent
+ *             stores and that all following stores will be
+ *             visible only after all previous stores.
+ *   rmb():    Like wmb(), but for reads.
+ *   mb():     wmb()/rmb() combo, i.e., all previous memory
+ *             accesses are visible before all subsequent
+ *             accesses and vice versa.  This is also known as
+ *             a "fence."
+ *
+ * Note: "mb()" and its variants cannot be used as a fence to order
+ * accesses to memory mapped I/O registers.  For that, mf.a needs to
+ * be used.  However, we don't want to always use mf.a because (a)
+ * it's (presumably) much slower than mf and (b) mf.a is supported for
+ * sequential memory pages only.
+ */
+
+/* XXX From arch/ia64/include/uapi/asm/gcc_intrin.h */
+#define ia64_mf()       asm volatile ("mf" ::: "memory")
+
+#define mb()           ia64_mf()
+#define rmb()          mb()
+#define wmb()          mb()
+
+#endif /* _TOOLS_LINUX_ASM_IA64_BARRIER_H */
diff --git a/tools/arch/mips/include/asm/barrier.h b/tools/arch/mips/include/asm/barrier.h
new file mode 100644 (file)
index 0000000..80f96f7
--- /dev/null
@@ -0,0 +1,20 @@
+#ifndef _TOOLS_LINUX_ASM_MIPS_BARRIER_H
+#define _TOOLS_LINUX_ASM_MIPS_BARRIER_H
+/*
+ * FIXME: This came from tools/perf/perf-sys.h, where it was first introduced
+ * in c1e028ef40b8d6943b767028ba17d4f2ba020edb, more work needed to make it
+ * more closely follow the Linux kernel arch/mips/include/asm/barrier.h file.
+ * Probably when we continue work on tools/ Kconfig support to have all the
+ * CONFIG_ needed for properly doing that.
+ */
+#define mb()           asm volatile(                                   \
+                               ".set   mips2\n\t"                      \
+                               "sync\n\t"                              \
+                               ".set   mips0"                          \
+                               : /* no output */                       \
+                               : /* no input */                        \
+                               : "memory")
+#define wmb()  mb()
+#define rmb()  mb()
+
+#endif /* _TOOLS_LINUX_ASM_MIPS_BARRIER_H */
diff --git a/tools/arch/powerpc/include/asm/barrier.h b/tools/arch/powerpc/include/asm/barrier.h
new file mode 100644 (file)
index 0000000..b23aee8
--- /dev/null
@@ -0,0 +1,29 @@
+/*
+ * Copied from the kernel sources:
+ *
+ * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
+ */
+#ifndef _TOOLS_LINUX_ASM_POWERPC_BARRIER_H
+#define _TOOLS_LINUX_ASM_POWERPC_BARRIER_H
+
+/*
+ * Memory barrier.
+ * The sync instruction guarantees that all memory accesses initiated
+ * by this processor have been performed (with respect to all other
+ * mechanisms that access memory).  The eieio instruction is a barrier
+ * providing an ordering (separately) for (a) cacheable stores and (b)
+ * loads and stores to non-cacheable memory (e.g. I/O devices).
+ *
+ * mb() prevents loads and stores being reordered across this point.
+ * rmb() prevents loads being reordered across this point.
+ * wmb() prevents stores being reordered across this point.
+ *
+ * *mb() variants without smp_ prefix must order all types of memory
+ * operations with one another. sync is the only instruction sufficient
+ * to do this.
+ */
+#define mb()   __asm__ __volatile__ ("sync" : : : "memory")
+#define rmb()  __asm__ __volatile__ ("sync" : : : "memory")
+#define wmb()  __asm__ __volatile__ ("sync" : : : "memory")
+
+#endif /* _TOOLS_LINUX_ASM_POWERPC_BARRIER_H */
diff --git a/tools/arch/s390/include/asm/barrier.h b/tools/arch/s390/include/asm/barrier.h
new file mode 100644 (file)
index 0000000..f851412
--- /dev/null
@@ -0,0 +1,30 @@
+/*
+ * Copied from the kernel sources:
+ *
+ * Copyright IBM Corp. 1999, 2009
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef __TOOLS_LINUX_ASM_BARRIER_H
+#define __TOOLS_LINUX_ASM_BARRIER_H
+
+/*
+ * Force strict CPU ordering.
+ * And yes, this is required on UP too when we're talking
+ * to devices.
+ */
+
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+/* Fast-BCR without checkpoint synchronization */
+#define __ASM_BARRIER "bcr 14,0\n"
+#else
+#define __ASM_BARRIER "bcr 15,0\n"
+#endif
+
+#define mb() do {  asm volatile(__ASM_BARRIER : : : "memory"); } while (0)
+
+#define rmb()                          mb()
+#define wmb()                          mb()
+
+#endif /* __TOOLS_LIB_ASM_BARRIER_H */
diff --git a/tools/arch/sh/include/asm/barrier.h b/tools/arch/sh/include/asm/barrier.h
new file mode 100644 (file)
index 0000000..c18fd75
--- /dev/null
@@ -0,0 +1,32 @@
+/*
+ * Copied from the kernel sources:
+ *
+ * Copyright (C) 1999, 2000  Niibe Yutaka  &  Kaz Kojima
+ * Copyright (C) 2002 Paul Mundt
+ */
+#ifndef __TOOLS_LINUX_ASM_SH_BARRIER_H
+#define __TOOLS_LINUX_ASM_SH_BARRIER_H
+
+/*
+ * A brief note on ctrl_barrier(), the control register write barrier.
+ *
+ * Legacy SH cores typically require a sequence of 8 nops after
+ * modification of a control register in order for the changes to take
+ * effect. On newer cores (like the sh4a and sh5) this is accomplished
+ * with icbi.
+ *
+ * Also note that on sh4a in the icbi case we can forego a synco for the
+ * write barrier, as it's not necessary for control registers.
+ *
+ * Historically we have only done this type of barrier for the MMUCR, but
+ * it's also necessary for the CCR, so we make it generic here instead.
+ */
+#if defined(__SH4A__) || defined(__SH5__)
+#define mb()           __asm__ __volatile__ ("synco": : :"memory")
+#define rmb()          mb()
+#define wmb()          mb()
+#endif
+
+#include <asm-generic/barrier.h>
+
+#endif /* __TOOLS_LINUX_ASM_SH_BARRIER_H */
diff --git a/tools/arch/sparc/include/asm/barrier.h b/tools/arch/sparc/include/asm/barrier.h
new file mode 100644 (file)
index 0000000..8c017b3
--- /dev/null
@@ -0,0 +1,8 @@
+#ifndef ___TOOLS_LINUX_ASM_SPARC_BARRIER_H
+#define ___TOOLS_LINUX_ASM_SPARC_BARRIER_H
+#if defined(__sparc__) && defined(__arch64__)
+#include "barrier_64.h"
+#else
+#include "barrier_32.h"
+#endif
+#endif
diff --git a/tools/arch/sparc/include/asm/barrier_32.h b/tools/arch/sparc/include/asm/barrier_32.h
new file mode 100644 (file)
index 0000000..c5eadd0
--- /dev/null
@@ -0,0 +1,6 @@
+#ifndef __TOOLS_PERF_SPARC_BARRIER_H
+#define __TOOLS_PERF_SPARC_BARRIER_H
+
+#include <asm-generic/barrier.h>
+
+#endif /* !(__TOOLS_PERF_SPARC_BARRIER_H) */
diff --git a/tools/arch/sparc/include/asm/barrier_64.h b/tools/arch/sparc/include/asm/barrier_64.h
new file mode 100644 (file)
index 0000000..9a7d732
--- /dev/null
@@ -0,0 +1,42 @@
+#ifndef __TOOLS_LINUX_SPARC64_BARRIER_H
+#define __TOOLS_LINUX_SPARC64_BARRIER_H
+
+/* Copied from the kernel sources to tools/:
+ *
+ * These are here in an effort to more fully work around Spitfire Errata
+ * #51.  Essentially, if a memory barrier occurs soon after a mispredicted
+ * branch, the chip can stop executing instructions until a trap occurs.
+ * Therefore, if interrupts are disabled, the chip can hang forever.
+ *
+ * It used to be believed that the memory barrier had to be right in the
+ * delay slot, but a case has been traced recently wherein the memory barrier
+ * was one instruction after the branch delay slot and the chip still hung.
+ * The offending sequence was the following in sym_wakeup_done() of the
+ * sym53c8xx_2 driver:
+ *
+ *     call    sym_ccb_from_dsa, 0
+ *      movge  %icc, 0, %l0
+ *     brz,pn  %o0, .LL1303
+ *      mov    %o0, %l2
+ *     membar  #LoadLoad
+ *
+ * The branch has to be mispredicted for the bug to occur.  Therefore, we put
+ * the memory barrier explicitly into a "branch always, predicted taken"
+ * delay slot to avoid the problem case.
+ */
+#define membar_safe(type) \
+do {   __asm__ __volatile__("ba,pt     %%xcc, 1f\n\t" \
+                            " membar   " type "\n" \
+                            "1:\n" \
+                            : : : "memory"); \
+} while (0)
+
+/* The kernel always executes in TSO memory model these days,
+ * and furthermore most sparc64 chips implement more stringent
+ * memory ordering than required by the specifications.
+ */
+#define mb()   membar_safe("#StoreLoad")
+#define rmb()  __asm__ __volatile__("":::"memory")
+#define wmb()  __asm__ __volatile__("":::"memory")
+
+#endif /* !(__TOOLS_LINUX_SPARC64_BARRIER_H) */
diff --git a/tools/arch/tile/include/asm/barrier.h b/tools/arch/tile/include/asm/barrier.h
new file mode 100644 (file)
index 0000000..7d3692c
--- /dev/null
@@ -0,0 +1,15 @@
+#ifndef _TOOLS_LINUX_ASM_TILE_BARRIER_H
+#define _TOOLS_LINUX_ASM_TILE_BARRIER_H
+/*
+ * FIXME: This came from tools/perf/perf-sys.h, where it was first introduced
+ * in 620830b6954913647b7c7f68920cf48eddf6ad92, more work needed to make it
+ * more closely follow the Linux kernel arch/tile/include/asm/barrier.h file.
+ * Probably when we continue work on tools/ Kconfig support to have all the
+ * CONFIG_ needed for properly doing that.
+ */
+
+#define mb()           asm volatile ("mf" ::: "memory")
+#define wmb()          mb()
+#define rmb()          mb()
+
+#endif /* _TOOLS_LINUX_ASM_TILE_BARRIER_H */
diff --git a/tools/arch/x86/include/asm/atomic.h b/tools/arch/x86/include/asm/atomic.h
new file mode 100644 (file)
index 0000000..059e33e
--- /dev/null
@@ -0,0 +1,65 @@
+#ifndef _TOOLS_LINUX_ASM_X86_ATOMIC_H
+#define _TOOLS_LINUX_ASM_X86_ATOMIC_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include "rmwcc.h"
+
+#define LOCK_PREFIX "\n\tlock; "
+
+/*
+ * Atomic operations that C can't guarantee us.  Useful for
+ * resource counting etc..
+ */
+
+#define ATOMIC_INIT(i) { (i) }
+
+/**
+ * atomic_read - read atomic variable
+ * @v: pointer of type atomic_t
+ *
+ * Atomically reads the value of @v.
+ */
+static inline int atomic_read(const atomic_t *v)
+{
+       return ACCESS_ONCE((v)->counter);
+}
+
+/**
+ * atomic_set - set atomic variable
+ * @v: pointer of type atomic_t
+ * @i: required value
+ *
+ * Atomically sets the value of @v to @i.
+ */
+static inline void atomic_set(atomic_t *v, int i)
+{
+       v->counter = i;
+}
+
+/**
+ * atomic_inc - increment atomic variable
+ * @v: pointer of type atomic_t
+ *
+ * Atomically increments @v by 1.
+ */
+static inline void atomic_inc(atomic_t *v)
+{
+       asm volatile(LOCK_PREFIX "incl %0"
+                    : "+m" (v->counter));
+}
+
+/**
+ * atomic_dec_and_test - decrement and test
+ * @v: pointer of type atomic_t
+ *
+ * Atomically decrements @v by 1 and
+ * returns true if the result is 0, or false for all other
+ * cases.
+ */
+static inline int atomic_dec_and_test(atomic_t *v)
+{
+       GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", "e");
+}
+
+#endif /* _TOOLS_LINUX_ASM_X86_ATOMIC_H */
diff --git a/tools/arch/x86/include/asm/barrier.h b/tools/arch/x86/include/asm/barrier.h
new file mode 100644 (file)
index 0000000..f366d8e
--- /dev/null
@@ -0,0 +1,28 @@
+#ifndef _TOOLS_LINUX_ASM_X86_BARRIER_H
+#define _TOOLS_LINUX_ASM_X86_BARRIER_H
+
+/*
+ * Copied from the Linux kernel sources, and also moving code
+ * out from tools/perf/perf-sys.h so as to make it be located
+ * in a place similar as in the kernel sources.
+ *
+ * Force strict CPU ordering.
+ * And yes, this is required on UP too when we're talking
+ * to devices.
+ */
+
+#if defined(__i386__)
+/*
+ * Some non-Intel clones support out of order store. wmb() ceases to be a
+ * nop for these.
+ */
+#define mb()   asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
+#define rmb()  asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
+#define wmb()  asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
+#elif defined(__x86_64__)
+#define mb()   asm volatile("mfence":::"memory")
+#define rmb()  asm volatile("lfence":::"memory")
+#define wmb()  asm volatile("sfence" ::: "memory")
+#endif
+
+#endif /* _TOOLS_LINUX_ASM_X86_BARRIER_H */
diff --git a/tools/arch/x86/include/asm/rmwcc.h b/tools/arch/x86/include/asm/rmwcc.h
new file mode 100644 (file)
index 0000000..a6669bc
--- /dev/null
@@ -0,0 +1,41 @@
+#ifndef _TOOLS_LINUX_ASM_X86_RMWcc
+#define _TOOLS_LINUX_ASM_X86_RMWcc
+
+#ifdef CC_HAVE_ASM_GOTO
+
+#define __GEN_RMWcc(fullop, var, cc, ...)                              \
+do {                                                                   \
+       asm_volatile_goto (fullop "; j" cc " %l[cc_label]"              \
+                       : : "m" (var), ## __VA_ARGS__                   \
+                       : "memory" : cc_label);                         \
+       return 0;                                                       \
+cc_label:                                                              \
+       return 1;                                                       \
+} while (0)
+
+#define GEN_UNARY_RMWcc(op, var, arg0, cc)                             \
+       __GEN_RMWcc(op " " arg0, var, cc)
+
+#define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc)                 \
+       __GEN_RMWcc(op " %1, " arg0, var, cc, vcon (val))
+
+#else /* !CC_HAVE_ASM_GOTO */
+
+#define __GEN_RMWcc(fullop, var, cc, ...)                              \
+do {                                                                   \
+       char c;                                                         \
+       asm volatile (fullop "; set" cc " %1"                           \
+                       : "+m" (var), "=qm" (c)                         \
+                       : __VA_ARGS__ : "memory");                      \
+       return c != 0;                                                  \
+} while (0)
+
+#define GEN_UNARY_RMWcc(op, var, arg0, cc)                             \
+       __GEN_RMWcc(op " " arg0, var, cc)
+
+#define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc)                 \
+       __GEN_RMWcc(op " %2, " arg0, var, cc, vcon (val))
+
+#endif /* CC_HAVE_ASM_GOTO */
+
+#endif /* _TOOLS_LINUX_ASM_X86_RMWcc */
diff --git a/tools/arch/xtensa/include/asm/barrier.h b/tools/arch/xtensa/include/asm/barrier.h
new file mode 100644 (file)
index 0000000..583800b
--- /dev/null
@@ -0,0 +1,18 @@
+/*
+ * Copied from the kernel sources to tools/:
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2001 - 2012 Tensilica Inc.
+ */
+
+#ifndef _TOOLS_LINUX_XTENSA_SYSTEM_H
+#define _TOOLS_LINUX_XTENSA_SYSTEM_H
+
+#define mb()  ({ __asm__ __volatile__("memw" : : : "memory"); })
+#define rmb() barrier()
+#define wmb() mb()
+
+#endif /* _TOOLS_LINUX_XTENSA_SYSTEM_H */
index 10df572..a51244a 100644 (file)
@@ -37,7 +37,7 @@ subdir-obj-y :=
 
 # Build definitions
 build-file := $(dir)/Build
-include $(build-file)
+-include $(build-file)
 
 quiet_cmd_flex  = FLEX     $@
 quiet_cmd_bison = BISON    $@
@@ -94,12 +94,12 @@ obj-y        := $(patsubst %/, %/$(obj)-in.o, $(obj-y))
 subdir-obj-y := $(filter %/$(obj)-in.o, $(obj-y))
 
 # '$(OUTPUT)/dir' prefix to all objects
-prefix       := $(subst ./,,$(OUTPUT)$(dir)/)
-obj-y        := $(addprefix $(prefix),$(obj-y))
-subdir-obj-y := $(addprefix $(prefix),$(subdir-obj-y))
+objprefix    := $(subst ./,,$(OUTPUT)$(dir)/)
+obj-y        := $(addprefix $(objprefix),$(obj-y))
+subdir-obj-y := $(addprefix $(objprefix),$(subdir-obj-y))
 
 # Final '$(obj)-in.o' object
-in-target := $(prefix)$(obj)-in.o
+in-target := $(objprefix)$(obj)-in.o
 
 PHONY += $(subdir-y)
 
index 3a0b0ca..2975632 100644 (file)
@@ -27,7 +27,7 @@ endef
 #   the rule that uses them - an example for that is the 'bionic'
 #   feature check. ]
 #
-FEATURE_TESTS                        \
+FEATURE_TESTS ?=                       \
        backtrace                       \
        dwarf                           \
        fortify-source                  \
@@ -53,7 +53,7 @@ FEATURE_TESTS =                       \
        zlib                            \
        lzma
 
-FEATURE_DISPLAY                      \
+FEATURE_DISPLAY ?=                     \
        dwarf                           \
        glibc                           \
        gtk2                            \
index 0e6c3e6..70d8762 100644 (file)
@@ -2,6 +2,7 @@ ex-y += ex.o
 ex-y += a.o
 ex-y += b.o
 ex-y += empty/
+ex-y += empty2/
 
 libex-y += c.o
 libex-y += d.o
diff --git a/tools/build/tests/ex/empty2/README b/tools/build/tests/ex/empty2/README
new file mode 100644 (file)
index 0000000..2107cc5
--- /dev/null
@@ -0,0 +1,2 @@
+This directory is left intentionally without Build file
+to test proper nesting into Build-less directories.
diff --git a/tools/include/asm-generic/atomic-gcc.h b/tools/include/asm-generic/atomic-gcc.h
new file mode 100644 (file)
index 0000000..2ba78c9
--- /dev/null
@@ -0,0 +1,63 @@
+#ifndef __TOOLS_ASM_GENERIC_ATOMIC_H
+#define __TOOLS_ASM_GENERIC_ATOMIC_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+/*
+ * Atomic operations that C can't guarantee us.  Useful for
+ * resource counting etc..
+ *
+ * Excerpts obtained from the Linux kernel sources.
+ */
+
+#define ATOMIC_INIT(i) { (i) }
+
+/**
+ * atomic_read - read atomic variable
+ * @v: pointer of type atomic_t
+ *
+ * Atomically reads the value of @v.
+ */
+static inline int atomic_read(const atomic_t *v)
+{
+       return ACCESS_ONCE((v)->counter);
+}
+
+/**
+ * atomic_set - set atomic variable
+ * @v: pointer of type atomic_t
+ * @i: required value
+ *
+ * Atomically sets the value of @v to @i.
+ */
+static inline void atomic_set(atomic_t *v, int i)
+{
+        v->counter = i;
+}
+
+/**
+ * atomic_inc - increment atomic variable
+ * @v: pointer of type atomic_t
+ *
+ * Atomically increments @v by 1.
+ */
+static inline void atomic_inc(atomic_t *v)
+{
+       __sync_add_and_fetch(&v->counter, 1);
+}
+
+/**
+ * atomic_dec_and_test - decrement and test
+ * @v: pointer of type atomic_t
+ *
+ * Atomically decrements @v by 1 and
+ * returns true if the result is 0, or false for all other
+ * cases.
+ */
+static inline int atomic_dec_and_test(atomic_t *v)
+{
+       return __sync_sub_and_fetch(&v->counter, 1) == 0;
+}
+
+#endif /* __TOOLS_ASM_GENERIC_ATOMIC_H */
diff --git a/tools/include/asm-generic/barrier.h b/tools/include/asm-generic/barrier.h
new file mode 100644 (file)
index 0000000..47b9339
--- /dev/null
@@ -0,0 +1,44 @@
+/*
+ * Copied from the kernel sources to tools/perf/:
+ *
+ * Generic barrier definitions, originally based on MN10300 definitions.
+ *
+ * It should be possible to use these on really simple architectures,
+ * but it serves more as a starting point for new ports.
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef __TOOLS_LINUX_ASM_GENERIC_BARRIER_H
+#define __TOOLS_LINUX_ASM_GENERIC_BARRIER_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/compiler.h>
+
+/*
+ * Force strict CPU ordering. And yes, this is required on UP too when we're
+ * talking to devices.
+ *
+ * Fall back to compiler barriers if nothing better is provided.
+ */
+
+#ifndef mb
+#define mb()   barrier()
+#endif
+
+#ifndef rmb
+#define rmb()  mb()
+#endif
+
+#ifndef wmb
+#define wmb()  mb()
+#endif
+
+#endif /* !__ASSEMBLY__ */
+#endif /* __TOOLS_LINUX_ASM_GENERIC_BARRIER_H */
diff --git a/tools/include/asm/atomic.h b/tools/include/asm/atomic.h
new file mode 100644 (file)
index 0000000..70794f5
--- /dev/null
@@ -0,0 +1,10 @@
+#ifndef __TOOLS_LINUX_ASM_ATOMIC_H
+#define __TOOLS_LINUX_ASM_ATOMIC_H
+
+#if defined(__i386__) || defined(__x86_64__)
+#include "../../arch/x86/include/asm/atomic.h"
+#else
+#include <asm-generic/atomic-gcc.h>
+#endif
+
+#endif /* __TOOLS_LINUX_ASM_ATOMIC_H */
diff --git a/tools/include/asm/barrier.h b/tools/include/asm/barrier.h
new file mode 100644 (file)
index 0000000..ac66ac5
--- /dev/null
@@ -0,0 +1,27 @@
+#if defined(__i386__) || defined(__x86_64__)
+#include "../../arch/x86/include/asm/barrier.h"
+#elif defined(__arm__)
+#include "../../arch/arm/include/asm/barrier.h"
+#elif defined(__aarch64__)
+#include "../../arch/arm64/include/asm/barrier.h"
+#elif defined(__powerpc__)
+#include "../../arch/powerpc/include/asm/barrier.h"
+#elif defined(__s390__)
+#include "../../arch/s390/include/asm/barrier.h"
+#elif defined(__sh__)
+#include "../../arch/sh/include/asm/barrier.h"
+#elif defined(__sparc__)
+#include "../../arch/sparc/include/asm/barrier.h"
+#elif defined(__tile__)
+#include "../../arch/tile/include/asm/barrier.h"
+#elif defined(__alpha__)
+#include "../../arch/alpha/include/asm/barrier.h"
+#elif defined(__mips__)
+#include "../../arch/mips/include/asm/barrier.h"
+#elif defined(__ia64__)
+#include "../../arch/ia64/include/asm/barrier.h"
+#elif defined(__xtensa__)
+#include "../../arch/xtensa/include/asm/barrier.h"
+#else
+#include <asm-generic/barrier.h>
+#endif
diff --git a/tools/include/linux/atomic.h b/tools/include/linux/atomic.h
new file mode 100644 (file)
index 0000000..4e3d3d1
--- /dev/null
@@ -0,0 +1,6 @@
+#ifndef __TOOLS_LINUX_ATOMIC_H
+#define __TOOLS_LINUX_ATOMIC_H
+
+#include <asm/atomic.h>
+
+#endif /* __TOOLS_LINUX_ATOMIC_H */
index 88461f0..f0e7267 100644 (file)
@@ -1,6 +1,10 @@
 #ifndef _TOOLS_LINUX_COMPILER_H_
 #define _TOOLS_LINUX_COMPILER_H_
 
+/* Optimization barrier */
+/* The "volatile" is due to gcc bugs */
+#define barrier() __asm__ __volatile__("": : :"memory")
+
 #ifndef __always_inline
 # define __always_inline       inline __attribute__((always_inline))
 #endif
similarity index 97%
rename from tools/perf/util/include/linux/kernel.h
rename to tools/include/linux/kernel.h
index 09e8e7a..76df535 100644 (file)
@@ -1,5 +1,5 @@
-#ifndef PERF_LINUX_KERNEL_H_
-#define PERF_LINUX_KERNEL_H_
+#ifndef __TOOLS_LINUX_KERNEL_H
+#define __TOOLS_LINUX_KERNEL_H
 
 #include <stdarg.h>
 #include <stdio.h>
similarity index 90%
rename from tools/perf/util/include/linux/list.h
rename to tools/include/linux/list.h
index 76ddbc7..76b014c 100644 (file)
@@ -1,10 +1,10 @@
 #include <linux/kernel.h>
 #include <linux/types.h>
 
-#include "../../../../include/linux/list.h"
+#include "../../../include/linux/list.h"
 
-#ifndef PERF_LIST_H
-#define PERF_LIST_H
+#ifndef TOOLS_LIST_H
+#define TOOLS_LIST_H
 /**
  * list_del_range - deletes range of entries from list.
  * @begin: first element in the range to delete from the list.
diff --git a/tools/include/linux/poison.h b/tools/include/linux/poison.h
new file mode 100644 (file)
index 0000000..0c27bdf
--- /dev/null
@@ -0,0 +1 @@
+#include "../../../include/linux/poison.h"
index b5cf25e..8ebf627 100644 (file)
@@ -60,6 +60,14 @@ typedef __u32 __bitwise __be32;
 typedef __u64 __bitwise __le64;
 typedef __u64 __bitwise __be64;
 
+typedef struct {
+       int counter;
+} atomic_t;
+
+#ifndef __aligned_u64
+# define __aligned_u64 __u64 __attribute__((aligned(8)))
+#endif
+
 struct list_head {
        struct list_head *next, *prev;
 };
index d8fe29f..8bd9606 100644 (file)
@@ -16,7 +16,7 @@ MAKEFLAGS += --no-print-directory
 LIBFILE = $(OUTPUT)libapi.a
 
 CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS)
-CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -D_FORTIFY_SOURCE=2 -fPIC
+CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -fPIC
 CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
 
 RM = rm -f
index 0c356fb..18ffccf 100644 (file)
@@ -14,9 +14,10 @@ define allow-override
     $(eval $(1) = $(2)))
 endef
 
-# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix.
+# Allow setting CC and AR and LD, or setting CROSS_COMPILE as a prefix.
 $(call allow-override,CC,$(CROSS_COMPILE)gcc)
 $(call allow-override,AR,$(CROSS_COMPILE)ar)
+$(call allow-override,LD,$(CROSS_COMPILE)ld)
 
 INSTALL = install
 
index a11e3c3..cd2cc59 100644 (file)
@@ -28,6 +28,9 @@
 #define __init
 #define noinline
 #define list_add_tail_rcu list_add_tail
+#define list_for_each_entry_rcu list_for_each_entry
+#define barrier() 
+#define synchronize_sched()
 
 #ifndef CALLER_ADDR0
 #define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
index 35f56be..3c60335 100644 (file)
@@ -1 +1,2 @@
 TRACEEVENT-CFLAGS
+libtraceevent-dynamic-list
index d410da3..6daaff6 100644 (file)
@@ -23,6 +23,7 @@ endef
 # Allow setting CC and AR, or setting CROSS_COMPILE as a prefix.
 $(call allow-override,CC,$(CROSS_COMPILE)gcc)
 $(call allow-override,AR,$(CROSS_COMPILE)ar)
+$(call allow-override,NM,$(CROSS_COMPILE)nm)
 
 EXT = -std=gnu99
 INSTALL = install
@@ -34,9 +35,15 @@ INSTALL = install
 DESTDIR ?=
 DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))'
 
+LP64 := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1)
+ifeq ($(LP64), 1)
+  libdir_relative = lib64
+else
+  libdir_relative = lib
+endif
+
 prefix ?= /usr/local
-bindir_relative = bin
-bindir = $(prefix)/$(bindir_relative)
+libdir = $(prefix)/$(libdir_relative)
 man_dir = $(prefix)/share/man
 man_dir_SQ = '$(subst ','\'',$(man_dir))'
 
@@ -58,7 +65,7 @@ ifeq ($(prefix),$(HOME))
 override plugin_dir = $(HOME)/.traceevent/plugins
 set_plugin_dir := 0
 else
-override plugin_dir = $(prefix)/lib/traceevent/plugins
+override plugin_dir = $(libdir)/traceevent/plugins
 endif
 endif
 
@@ -85,11 +92,11 @@ srctree := $(patsubst %/,%,$(dir $(srctree)))
 #$(info Determined 'srctree' to be $(srctree))
 endif
 
-export prefix bindir src obj
+export prefix libdir src obj
 
 # Shell quotes
-bindir_SQ = $(subst ','\'',$(bindir))
-bindir_relative_SQ = $(subst ','\'',$(bindir_relative))
+libdir_SQ = $(subst ','\'',$(libdir))
+libdir_relative_SQ = $(subst ','\'',$(libdir_relative))
 plugin_dir_SQ = $(subst ','\'',$(plugin_dir))
 
 LIB_FILE = libtraceevent.a libtraceevent.so
@@ -151,8 +158,9 @@ PLUGINS_IN := $(PLUGINS:.so=-in.o)
 
 TE_IN    := $(OUTPUT)libtraceevent-in.o
 LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE))
+DYNAMIC_LIST_FILE := $(OUTPUT)libtraceevent-dynamic-list
 
-CMD_TARGETS = $(LIB_FILE) $(PLUGINS)
+CMD_TARGETS = $(LIB_FILE) $(PLUGINS) $(DYNAMIC_LIST_FILE)
 
 TARGETS = $(CMD_TARGETS)
 
@@ -169,6 +177,9 @@ $(OUTPUT)libtraceevent.so: $(TE_IN)
 $(OUTPUT)libtraceevent.a: $(TE_IN)
        $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^
 
+$(OUTPUT)libtraceevent-dynamic-list: $(PLUGINS)
+       $(QUIET_GEN)$(call do_generate_dynamic_list_file, $(PLUGINS), $@)
+
 plugins: $(PLUGINS)
 
 __plugin_obj = $(notdir $@)
@@ -238,9 +249,16 @@ define do_install_plugins
        done
 endef
 
+define do_generate_dynamic_list_file
+       (echo '{';                                                      \
+       $(NM) -u -D $1 | awk 'NF>1 {print "\t"$$2";"}' | sort -u;       \
+       echo '};';                                                      \
+       ) > $2
+endef
+
 install_lib: all_cmd install_plugins
        $(call QUIET_INSTALL, $(LIB_FILE)) \
-               $(call do_install,$(LIB_FILE),$(bindir_SQ))
+               $(call do_install,$(LIB_FILE),$(libdir_SQ))
 
 install_plugins: $(PLUGINS)
        $(call QUIET_INSTALL, trace_plugins) \
index e0917c0..cc25f05 100644 (file)
@@ -1387,7 +1387,7 @@ static int event_read_fields(struct event_format *event, struct format_field **f
                        do_warning_event(event, "%s: no type found", __func__);
                        goto fail;
                }
-               field->name = last_token;
+               field->name = field->alias = last_token;
 
                if (test_type(type, EVENT_OP))
                        goto fail;
@@ -1469,7 +1469,7 @@ static int event_read_fields(struct event_format *event, struct format_field **f
                                size_dynamic = type_size(field->name);
                                free_token(field->name);
                                strcat(field->type, brackets);
-                               field->name = token;
+                               field->name = field->alias = token;
                                type = read_token(&token);
                        } else {
                                char *new_type;
@@ -3865,7 +3865,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
                        } else if (el_size == 4) {
                                trace_seq_printf(s, "%u", *(uint32_t *)num);
                        } else if (el_size == 8) {
-                               trace_seq_printf(s, "%lu", *(uint64_t *)num);
+                               trace_seq_printf(s, "%"PRIu64, *(uint64_t *)num);
                        } else {
                                trace_seq_printf(s, "BAD SIZE:%d 0x%x",
                                                 el_size, *(uint8_t *)num);
@@ -6444,6 +6444,8 @@ void pevent_ref(struct pevent *pevent)
 void pevent_free_format_field(struct format_field *field)
 {
        free(field->type);
+       if (field->alias != field->name)
+               free(field->alias);
        free(field->name);
        free(field);
 }
index 86a5839..063b197 100644 (file)
@@ -191,6 +191,7 @@ struct format_field {
        struct event_format     *event;
        char                    *type;
        char                    *name;
+       char                    *alias;
        int                     offset;
        int                     size;
        unsigned int            arraylen;
index 4592d84..ec57d0c 100644 (file)
@@ -4,6 +4,19 @@
 #include <endian.h>
 #include "event-parse.h"
 
+/*
+ * From glibc endian.h, for older systems where it is not present, e.g.: RHEL5,
+ * Fedora6.
+ */
+#ifndef le16toh
+# if __BYTE_ORDER == __LITTLE_ENDIAN
+#  define le16toh(x) (x)
+# else
+#  define le16toh(x) __bswap_16 (x)
+# endif
+#endif
+
+
 static unsigned long long
 process___le16_to_cpup(struct trace_seq *s, unsigned long long *args)
 {
index c5baf9c..618c2bc 100644 (file)
@@ -123,6 +123,8 @@ static int get_last_jit_image(char *haystack, size_t hlen,
        assert(ret == 0);
 
        ptr = haystack;
+       memset(pmatch, 0, sizeof(pmatch));
+
        while (1) {
                ret = regexec(&regex, ptr, 1, pmatch, 0);
                if (ret == 0) {
index 812f904..09db62b 100644 (file)
@@ -28,3 +28,4 @@ config.mak.autogen
 *-flex.*
 *.pyc
 *.pyo
+.config-detected
diff --git a/tools/perf/Documentation/callchain-overhead-calculation.txt b/tools/perf/Documentation/callchain-overhead-calculation.txt
new file mode 100644 (file)
index 0000000..1a75792
--- /dev/null
@@ -0,0 +1,108 @@
+Overhead calculation
+--------------------
+The overhead can be shown in two columns as 'Children' and 'Self' when
+perf collects callchains.  The 'self' overhead is simply calculated by
+adding all period values of the entry - usually a function (symbol).
+This is the value that perf shows traditionally and sum of all the
+'self' overhead values should be 100%.
+
+The 'children' overhead is calculated by adding all period values of
+the child functions so that it can show the total overhead of the
+higher level functions even if they don't directly execute much.
+'Children' here means functions that are called from another (parent)
+function.
+
+It might be confusing that the sum of all the 'children' overhead
+values exceeds 100% since each of them is already an accumulation of
+'self' overhead of its child functions.  But with this enabled, users
+can find which function has the most overhead even if samples are
+spread over the children.
+
+Consider the following example; there are three functions like below.
+
+-----------------------
+void foo(void) {
+    /* do something */
+}
+
+void bar(void) {
+    /* do something */
+    foo();
+}
+
+int main(void) {
+    bar()
+    return 0;
+}
+-----------------------
+
+In this case 'foo' is a child of 'bar', and 'bar' is an immediate
+child of 'main' so 'foo' also is a child of 'main'.  In other words,
+'main' is a parent of 'foo' and 'bar', and 'bar' is a parent of 'foo'.
+
+Suppose all samples are recorded in 'foo' and 'bar' only.  When it's
+recorded with callchains the output will show something like below
+in the usual (self-overhead-only) output of perf report:
+
+----------------------------------
+Overhead  Symbol
+........  .....................
+  60.00%  foo
+          |
+          --- foo
+              bar
+              main
+              __libc_start_main
+
+  40.00%  bar
+          |
+          --- bar
+              main
+              __libc_start_main
+----------------------------------
+
+When the --children option is enabled, the 'self' overhead values of
+child functions (i.e. 'foo' and 'bar') are added to the parents to
+calculate the 'children' overhead.  In this case the report could be
+displayed as:
+
+-------------------------------------------
+Children      Self  Symbol
+........  ........  ....................
+ 100.00%     0.00%  __libc_start_main
+          |
+          --- __libc_start_main
+
+ 100.00%     0.00%  main
+          |
+          --- main
+              __libc_start_main
+
+ 100.00%    40.00%  bar
+          |
+          --- bar
+              main
+              __libc_start_main
+
+  60.00%    60.00%  foo
+          |
+          --- foo
+              bar
+              main
+              __libc_start_main
+-------------------------------------------
+
+In the above output, the 'self' overhead of 'foo' (60%) was add to the
+'children' overhead of 'bar', 'main' and '\_\_libc_start_main'.
+Likewise, the 'self' overhead of 'bar' (40%) was added to the
+'children' overhead of 'main' and '\_\_libc_start_main'.
+
+So '\_\_libc_start_main' and 'main' are shown first since they have
+same (100%) 'children' overhead (even though they have zero 'self'
+overhead) and they are the parents of 'foo' and 'bar'.
+
+Since v3.16 the 'children' overhead is shown by default and the output
+is sorted by its values. The 'children' overhead is disabled by
+specifying --no-children option on the command line or by adding
+'report.children = false' or 'top.children = false' in the perf config
+file.
index f6480cb..bf3d064 100644 (file)
@@ -210,6 +210,9 @@ Suite for evaluating hash tables.
 *wake*::
 Suite for evaluating wake calls.
 
+*wake-parallel*::
+Suite for evaluating parallel wake calls.
+
 *requeue*::
 Suite for evaluating requeue calls.
 
index dc7442c..b876ae3 100644 (file)
@@ -44,6 +44,33 @@ OPTIONS
 --kallsyms=<file>::
        kallsyms pathname
 
+--itrace::
+       Decode Instruction Tracing data, replacing it with synthesized events.
+       Options are:
+
+               i       synthesize instructions events
+               b       synthesize branches events
+               c       synthesize branches events (calls only)
+               r       synthesize branches events (returns only)
+               x       synthesize transactions events
+               e       synthesize error events
+               d       create a debug log
+               g       synthesize a call chain (use with i or x)
+
+       The default is all events i.e. the same as --itrace=ibxe
+
+       In addition, the period (default 100000) for instructions events
+       can be specified in units of:
+
+               i       instructions
+               t       ticks
+               ms      milliseconds
+               us      microseconds
+               ns      nanoseconds (default)
+
+       Also the call chain size (default 16, max. 1024) for instructions or
+       transactions events can be specified.
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1]
index 23219c6..ff0f433 100644 (file)
@@ -37,7 +37,11 @@ OPTIONS
 
 -s <key[,key2...]>::
 --sort=<key[,key2...]>::
-       Sort the output (default: frag,hit,bytes)
+       Sort the output (default: 'frag,hit,bytes' for slab and 'bytes,hit'
+       for page).  Available sort keys are 'ptr, callsite, bytes, hit,
+       pingpong, frag' for slab and 'page, callsite, bytes, hit, order,
+       migtype, gfp' for page.  This option should be preceded by one of the
+       mode selection options - i.e. --slab, --page, --alloc and/or --caller.
 
 -l <num>::
 --line=<num>::
@@ -52,6 +56,11 @@ OPTIONS
 --page::
        Analyze page allocator events
 
+--live::
+       Show live page stat.  The perf kmem shows total allocation stat by
+       default, but this option shows live (currently allocated) pages
+       instead.  (This option works with --page option only)
+
 SEE ALSO
 --------
 linkperf:perf-record[1]
index 6252e77..6a5bb2b 100644 (file)
@@ -151,6 +151,12 @@ STAT LIVE OPTIONS
        Show events other than HLT (x86 only) or Wait state (s390 only)
        that take longer than duration usecs.
 
+--proc-map-timeout::
+       When processing pre-existing threads /proc/XXX/mmap, it may take
+       a long time, because the file may be huge. A time out is needed
+       in such cases.
+       This option sets the time out limit. The default value is 500 ms.
+
 SEE ALSO
 --------
 linkperf:perf-top[1], linkperf:perf-record[1], linkperf:perf-report[1],
index 239609c..3a8a9ba 100644 (file)
@@ -14,11 +14,13 @@ or
 or
 'perf probe' [options] --del='[GROUP:]EVENT' [...]
 or
-'perf probe' --list
+'perf probe' --list[=[GROUP:]EVENT]
 or
 'perf probe' [options] --line='LINE'
 or
 'perf probe' [options] --vars='PROBEPOINT'
+or
+'perf probe' [options] --funcs
 
 DESCRIPTION
 -----------
@@ -64,8 +66,8 @@ OPTIONS
        classes(e.g. [a-z], [!A-Z]).
 
 -l::
---list::
-       List up current probe events.
+--list[=[GROUP:]EVENT]::
+       List up current probe events. This can also accept filtering patterns of event names.
 
 -L::
 --line=::
@@ -81,10 +83,15 @@ OPTIONS
        (Only for --vars) Show external defined variables in addition to local
        variables.
 
+--no-inlines::
+       (Only for --add) Search only for non-inlined functions. The functions
+       which do not have instances are ignored.
+
 -F::
---funcs::
+--funcs[=FILTER]::
        Show available functions in given module or kernel. With -x/--exec,
        can also list functions in a user space executable / shared library.
+       This also can accept a FILTER rule argument.
 
 --filter=FILTER::
        (Only for --vars and --funcs) Set filter. FILTER is a combination of glob
@@ -148,7 +155,7 @@ Each probe argument follows below syntax.
  [NAME=]LOCALVAR|$retval|%REG|@SYMBOL[:TYPE]
 
 'NAME' specifies the name of this argument (optional). You can use the name of local variable, local data structure member (e.g. var->field, var.field2), local array with fixed index (e.g. array[1], var->array[0], var->pointer[2]), or kprobe-tracer argument format (e.g. $retval, %ax, etc). Note that the name of this argument will be set as the last member name if you specify a local data structure member (e.g. field2 for 'var->field1.field2'.)
-'$vars' special argument is also available for NAME, it is expanded to the local variables which can access at given probe point.
+'$vars' and '$params' special arguments are also available for NAME, '$vars' is expanded to the local variables (including function parameters) which can access at given probe point. '$params' is expanded to only the function parameters.
 'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type.
 
 On x86 systems %REG is always the short form of the register: for example %AX. %RAX or %EAX is not valid.
index 4847a79..9b9d9d0 100644 (file)
@@ -108,6 +108,8 @@ OPTIONS
        Number of mmap data pages (must be a power of two) or size
        specification with appended unit character - B/K/M/G. The
        size is rounded up to have nearest pages power of two value.
+       Also, by adding a comma, the number of mmap pages for AUX
+       area tracing can be specified.
 
 --group::
        Put all events in a single event group.  This precedes the --event
@@ -145,16 +147,21 @@ OPTIONS
 
 -s::
 --stat::
-       Per thread counts.
+       Record per-thread event counts.  Use it with 'perf report -T' to see
+       the values.
 
 -d::
 --data::
-       Sample addresses.
+       Record the sample addresses.
 
 -T::
 --timestamp::
-       Sample timestamps. Use it with 'perf report -D' to see the timestamps,
-       for instance.
+       Record the sample timestamps. Use it with 'perf report -D' to see the
+       timestamps, for instance.
+
+-P::
+--period::
+       Record the sample period.
 
 -n::
 --no-samples::
@@ -257,6 +264,18 @@ records. See clock_gettime(). In particular CLOCK_MONOTONIC and
 CLOCK_MONOTONIC_RAW are supported, some events might also allow
 CLOCK_BOOTTIME, CLOCK_REALTIME and CLOCK_TAI.
 
+-S::
+--snapshot::
+Select AUX area tracing Snapshot Mode. This option is valid only with an
+AUX area tracing event. Optionally the number of bytes to capture per
+snapshot can be specified. In Snapshot Mode, trace data is captured only when
+signal SIGUSR2 is received.
+
+--proc-map-timeout::
+When processing pre-existing threads /proc/XXX/mmap, it may take a long time,
+because the file may be huge. A time out is needed in such cases.
+This option sets the time out limit. The default value is 500 ms.
+
 SEE ALSO
 --------
 linkperf:perf-stat[1], linkperf:perf-list[1]
index 4879cf6..c33b69f 100644 (file)
@@ -34,7 +34,8 @@ OPTIONS
 
 -T::
 --threads::
-       Show per-thread event counters
+       Show per-thread event counters.  The input data file should be recorded
+       with -s option.
 -c::
 --comms=::
        Only consider symbols in these comms. CSV that understands
@@ -193,6 +194,7 @@ OPTIONS
        Accumulate callchain of children to parent entry so that then can
        show up in the output.  The output will have a new "Children" column
        and will be sorted on the data.  It requires callchains are recorded.
+       See the `overhead calculation' section for more details.
 
 --max-stack::
        Set the stack depth limit when parsing the callchain, anything
@@ -323,6 +325,37 @@ OPTIONS
 --header-only::
        Show only perf.data header (forces --stdio).
 
+--itrace::
+       Options for decoding instruction tracing data. The options are:
+
+               i       synthesize instructions events
+               b       synthesize branches events
+               c       synthesize branches events (calls only)
+               r       synthesize branches events (returns only)
+               x       synthesize transactions events
+               e       synthesize error events
+               d       create a debug log
+               g       synthesize a call chain (use with i or x)
+
+       The default is all events i.e. the same as --itrace=ibxe
+
+       In addition, the period (default 100000) for instructions events
+       can be specified in units of:
+
+               i       instructions
+               t       ticks
+               ms      milliseconds
+               us      microseconds
+               ns      nanoseconds (default)
+
+       Also the call chain size (default 16, max. 1024) for instructions or
+       transactions events can be specified.
+
+       To disable decoding entirely, use --no-itrace.
+
+
+include::callchain-overhead-calculation.txt[]
+
 SEE ALSO
 --------
 linkperf:perf-stat[1], linkperf:perf-annotate[1]
index 7944575..c82df57 100644 (file)
@@ -115,7 +115,8 @@ OPTIONS
 -f::
 --fields::
         Comma separated list of fields to print. Options are:
-        comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, srcline, period.
+        comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
+       srcline, period, flags.
         Field list can be prepended with the type, trace, sw or hw,
         to indicate to which event type the field list applies.
         e.g., -f sw:comm,tid,time,ip,sym  and -f trace:time,cpu,trace
@@ -165,6 +166,12 @@ OPTIONS
 
        At this point usage is displayed, and perf-script exits.
 
+       The flags field is synthesized and may have a value when Instruction
+       Trace decoding. The flags are "bcrosyiABEx" which stand for branch,
+       call, return, conditional, system, asynchronous, interrupt,
+       transaction abort, trace begin, trace end, and in transaction,
+       respectively.
+
        Finally, a user may not set fields to none for all event types.
        i.e., -f "" is not allowed.
 
@@ -221,6 +228,34 @@ OPTIONS
 --header-only
        Show only perf.data header.
 
+--itrace::
+       Options for decoding instruction tracing data. The options are:
+
+               i       synthesize instructions events
+               b       synthesize branches events
+               c       synthesize branches events (calls only)
+               r       synthesize branches events (returns only)
+               x       synthesize transactions events
+               e       synthesize error events
+               d       create a debug log
+               g       synthesize a call chain (use with i or x)
+
+       The default is all events i.e. the same as --itrace=ibxe
+
+       In addition, the period (default 100000) for instructions events
+       can be specified in units of:
+
+               i       instructions
+               t       ticks
+               ms      milliseconds
+               us      microseconds
+               ns      nanoseconds (default)
+
+       Also the call chain size (default 16, max. 1024) for instructions or
+       transactions events can be specified.
+
+       To disable decoding entirely, use --no-itrace.
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-script-perl[1],
index 3265b10..776aec4 100644 (file)
@@ -168,7 +168,7 @@ Default is to monitor all CPUS.
        Accumulate callchain of children to parent entry so that then can
        show up in the output.  The output will have a new "Children" column
        and will be sorted on the data.  It requires -g/--call-graph option
-       enabled.
+       enabled.  See the `overhead calculation' section for more details.
 
 --max-stack::
        Set the stack depth limit when parsing the callchain, anything
@@ -201,6 +201,12 @@ Default is to monitor all CPUS.
        Force each column width to the provided list, for large terminal
        readability.  0 means no limit (default behavior).
 
+--proc-map-timeout::
+       When processing pre-existing threads /proc/XXX/mmap, it may take
+       a long time, because the file may be huge. A time out is needed
+       in such cases.
+       This option sets the time out limit. The default value is 500 ms.
+
 
 INTERACTIVE PROMPTING KEYS
 --------------------------
@@ -234,6 +240,7 @@ INTERACTIVE PROMPTING KEYS
 
 Pressing any unmapped key displays a menu, and prompts for input.
 
+include::callchain-overhead-calculation.txt[]
 
 SEE ALSO
 --------
index ba03fd5..7ea0786 100644 (file)
@@ -35,7 +35,7 @@ OPTIONS
 
 -e::
 --expr::
-       List of events to show, currently only syscall names.
+       List of syscalls to show, currently only syscall names.
        Prefixing with ! shows all syscalls but the ones specified.  You may
        need to escape it.
 
@@ -121,6 +121,11 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
 --event::
        Trace other events, see 'perf list' for a complete list.
 
+--proc-map-timeout::
+       When processing pre-existing threads /proc/XXX/mmap, it may take a long time,
+       because the file may be huge. A time out is needed in such cases.
+       This option sets the time out limit. The default value is 500 ms.
+
 PAGEFAULTS
 ----------
 
index 11ccbb2..fe50a1b 100644 (file)
@@ -1,12 +1,30 @@
 tools/perf
+tools/arch/alpha/include/asm/barrier.h
+tools/arch/arm/include/asm/barrier.h
+tools/arch/ia64/include/asm/barrier.h
+tools/arch/mips/include/asm/barrier.h
+tools/arch/powerpc/include/asm/barrier.h
+tools/arch/s390/include/asm/barrier.h
+tools/arch/sh/include/asm/barrier.h
+tools/arch/sparc/include/asm/barrier.h
+tools/arch/sparc/include/asm/barrier_32.h
+tools/arch/sparc/include/asm/barrier_64.h
+tools/arch/tile/include/asm/barrier.h
+tools/arch/x86/include/asm/barrier.h
+tools/arch/xtensa/include/asm/barrier.h
 tools/scripts
 tools/build
+tools/arch/x86/include/asm/atomic.h
+tools/arch/x86/include/asm/rmwcc.h
 tools/lib/traceevent
 tools/lib/api
 tools/lib/symbol/kallsyms.c
 tools/lib/symbol/kallsyms.h
 tools/lib/util/find_next_bit.c
+tools/include/asm/atomic.h
+tools/include/asm/barrier.h
 tools/include/asm/bug.h
+tools/include/asm-generic/barrier.h
 tools/include/asm-generic/bitops/arch_hweight.h
 tools/include/asm-generic/bitops/atomic.h
 tools/include/asm-generic/bitops/const_hweight.h
@@ -17,35 +35,35 @@ tools/include/asm-generic/bitops/fls64.h
 tools/include/asm-generic/bitops/fls.h
 tools/include/asm-generic/bitops/hweight.h
 tools/include/asm-generic/bitops.h
+tools/include/linux/atomic.h
 tools/include/linux/bitops.h
 tools/include/linux/compiler.h
 tools/include/linux/export.h
 tools/include/linux/hash.h
+tools/include/linux/kernel.h
+tools/include/linux/list.h
 tools/include/linux/log2.h
+tools/include/linux/poison.h
 tools/include/linux/types.h
 include/asm-generic/bitops/arch_hweight.h
 include/asm-generic/bitops/const_hweight.h
 include/asm-generic/bitops/fls64.h
 include/asm-generic/bitops/__fls.h
 include/asm-generic/bitops/fls.h
-include/linux/const.h
 include/linux/perf_event.h
 include/linux/rbtree.h
 include/linux/list.h
 include/linux/hash.h
 include/linux/stringify.h
-lib/find_next_bit.c
 lib/hweight.c
 lib/rbtree.c
 include/linux/swab.h
 arch/*/include/asm/unistd*.h
-arch/*/include/asm/perf_regs.h
 arch/*/include/uapi/asm/unistd*.h
 arch/*/include/uapi/asm/perf_regs.h
 arch/*/lib/memcpy*.S
 arch/*/lib/memset*.S
 include/linux/poison.h
-include/linux/magic.h
 include/linux/hw_breakpoint.h
 include/linux/rbtree_augmented.h
 include/uapi/linux/perf_event.h
index c699dc3..d31a7bb 100644 (file)
@@ -24,7 +24,7 @@ unexport MAKEFLAGS
 # (To override it, run 'make JOBS=1' and similar.)
 #
 ifeq ($(JOBS),)
-  JOBS := $(shell egrep -c '^processor|^CPU' /proc/cpuinfo 2>/dev/null)
+  JOBS := $(shell (getconf _NPROCESSORS_ONLN || egrep -c '^processor|^CPU[0-9]' /proc/cpuinfo) 2>/dev/null)
   ifeq ($(JOBS),0)
     JOBS := 1
   endif
index c43a205..1af0cfe 100644 (file)
@@ -73,6 +73,8 @@ include config/utilities.mak
 # for CTF data format.
 #
 # Define NO_LZMA if you do not want to support compressed (xz) kernel modules
+#
+# Define NO_AUXTRACE if you do not want AUX area tracing support
 
 ifeq ($(srctree),)
 srctree := $(patsubst %/,%,$(dir $(shell pwd)))
@@ -171,6 +173,9 @@ endif
 LIBTRACEEVENT = $(TE_PATH)libtraceevent.a
 export LIBTRACEEVENT
 
+LIBTRACEEVENT_DYNAMIC_LIST = $(TE_PATH)libtraceevent-dynamic-list
+LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS = -Xlinker --dynamic-list=$(LIBTRACEEVENT_DYNAMIC_LIST)
+
 LIBAPI = $(LIB_PATH)libapi.a
 export LIBAPI
 
@@ -185,8 +190,9 @@ python-clean := $(call QUIET_CLEAN, python) $(RM) -r $(PYTHON_EXTBUILD) $(OUTPUT
 PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources)
 PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT) $(LIBAPI)
 
-$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS)
-       $(QUIET_GEN)CFLAGS='$(CFLAGS)' $(PYTHON_WORD) util/setup.py \
+$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(LIBTRACEEVENT_DYNAMIC_LIST)
+       $(QUIET_GEN)CFLAGS='$(CFLAGS)' LDFLAGS='$(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS)' \
+         $(PYTHON_WORD) util/setup.py \
          --quiet build_ext; \
        mkdir -p $(OUTPUT)python && \
        cp $(PYTHON_EXTBUILD_LIB)perf.so $(OUTPUT)python/
@@ -276,8 +282,9 @@ build := -f $(srctree)/tools/build/Makefile.build dir=. obj
 $(PERF_IN): $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h FORCE
        $(Q)$(MAKE) $(build)=perf
 
-$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN)
-       $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(PERF_IN) $(LIBS) -o $@
+$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(LIBTRACEEVENT_DYNAMIC_LIST)
+       $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS) \
+               $(PERF_IN) $(LIBS) -o $@
 
 $(GTK_IN): FORCE
        $(Q)$(MAKE) $(build)=gtk
@@ -371,7 +378,13 @@ $(LIB_FILE): $(LIBPERF_IN)
 LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ)
 
 $(LIBTRACEEVENT): FORCE
-       $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a plugins
+       $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a
+
+libtraceevent_plugins: FORCE
+       $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) plugins
+
+$(LIBTRACEEVENT_DYNAMIC_LIST): libtraceevent_plugins
+       $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent-dynamic-list
 
 $(LIBTRACEEVENT)-clean:
        $(call QUIET_CLEAN, libtraceevent)
@@ -462,7 +475,7 @@ check: $(OUTPUT)common-cmds.h
 
 install-gtk:
 
-install-bin: all install-gtk
+install-tools: all install-gtk
        $(call QUIET_INSTALL, binaries) \
                $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'; \
                $(INSTALL) $(OUTPUT)perf '$(DESTDIR_SQ)$(bindir_SQ)'; \
@@ -500,12 +513,16 @@ endif
        $(call QUIET_INSTALL, perf_completion-script) \
                $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d'; \
                $(INSTALL) perf-completion.sh '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d/perf'
+
+install-tests: all install-gtk
        $(call QUIET_INSTALL, tests) \
                $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \
                $(INSTALL) tests/attr.py '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \
                $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \
                $(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'
 
+install-bin: install-tools install-tests
+
 install: install-bin try-install-man install-traceevent-plugins
 
 install-python_ext:
@@ -549,4 +566,5 @@ FORCE:
 .PHONY: all install clean config-clean strip install-gtk
 .PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
 .PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope FORCE single_dep
+.PHONY: libtraceevent_plugins
 
index 54afe4a..41bf61d 100644 (file)
@@ -1 +1,2 @@
 libperf-y += util/
+libperf-$(CONFIG_DWARF_UNWIND) += tests/
index 1d3f39c..4e5af27 100644 (file)
@@ -5,8 +5,11 @@
 #include <linux/types.h>
 #include <asm/perf_regs.h>
 
+void perf_regs_load(u64 *regs);
+
 #define PERF_REGS_MASK ((1ULL << PERF_REG_ARM64_MAX) - 1)
 #define PERF_REGS_MAX  PERF_REG_ARM64_MAX
+#define PERF_SAMPLE_REGS_ABI   PERF_SAMPLE_REGS_ABI_64
 
 #define PERF_REG_IP    PERF_REG_ARM64_PC
 #define PERF_REG_SP    PERF_REG_ARM64_SP
diff --git a/tools/perf/arch/arm64/tests/Build b/tools/perf/arch/arm64/tests/Build
new file mode 100644 (file)
index 0000000..b30eff9
--- /dev/null
@@ -0,0 +1,2 @@
+libperf-y += regs_load.o
+libperf-y += dwarf-unwind.o
diff --git a/tools/perf/arch/arm64/tests/dwarf-unwind.c b/tools/perf/arch/arm64/tests/dwarf-unwind.c
new file mode 100644 (file)
index 0000000..cf04a4c
--- /dev/null
@@ -0,0 +1,61 @@
+#include <string.h>
+#include "perf_regs.h"
+#include "thread.h"
+#include "map.h"
+#include "event.h"
+#include "debug.h"
+#include "tests/tests.h"
+
+#define STACK_SIZE 8192
+
+static int sample_ustack(struct perf_sample *sample,
+               struct thread *thread, u64 *regs)
+{
+       struct stack_dump *stack = &sample->user_stack;
+       struct map *map;
+       unsigned long sp;
+       u64 stack_size, *buf;
+
+       buf = malloc(STACK_SIZE);
+       if (!buf) {
+               pr_debug("failed to allocate sample uregs data\n");
+               return -1;
+       }
+
+       sp = (unsigned long) regs[PERF_REG_ARM64_SP];
+
+       map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp);
+       if (!map) {
+               pr_debug("failed to get stack map\n");
+               free(buf);
+               return -1;
+       }
+
+       stack_size = map->end - sp;
+       stack_size = stack_size > STACK_SIZE ? STACK_SIZE : stack_size;
+
+       memcpy(buf, (void *) sp, stack_size);
+       stack->data = (char *) buf;
+       stack->size = stack_size;
+       return 0;
+}
+
+int test__arch_unwind_sample(struct perf_sample *sample,
+               struct thread *thread)
+{
+       struct regs_dump *regs = &sample->user_regs;
+       u64 *buf;
+
+       buf = calloc(1, sizeof(u64) * PERF_REGS_MAX);
+       if (!buf) {
+               pr_debug("failed to allocate sample uregs data\n");
+               return -1;
+       }
+
+       perf_regs_load(buf);
+       regs->abi  = PERF_SAMPLE_REGS_ABI;
+       regs->regs = buf;
+       regs->mask = PERF_REGS_MASK;
+
+       return sample_ustack(sample, thread, buf);
+}
diff --git a/tools/perf/arch/arm64/tests/regs_load.S b/tools/perf/arch/arm64/tests/regs_load.S
new file mode 100644 (file)
index 0000000..025b46e
--- /dev/null
@@ -0,0 +1,46 @@
+#include <linux/linkage.h>
+
+.text
+.type perf_regs_load,%function
+#define STR_REG(r)     str x##r, [x0, 8 * r]
+#define LDR_REG(r)     ldr x##r, [x0, 8 * r]
+#define SP     (8 * 31)
+#define PC     (8 * 32)
+ENTRY(perf_regs_load)
+       STR_REG(0)
+       STR_REG(1)
+       STR_REG(2)
+       STR_REG(3)
+       STR_REG(4)
+       STR_REG(5)
+       STR_REG(6)
+       STR_REG(7)
+       STR_REG(8)
+       STR_REG(9)
+       STR_REG(10)
+       STR_REG(11)
+       STR_REG(12)
+       STR_REG(13)
+       STR_REG(14)
+       STR_REG(15)
+       STR_REG(16)
+       STR_REG(17)
+       STR_REG(18)
+       STR_REG(19)
+       STR_REG(20)
+       STR_REG(21)
+       STR_REG(22)
+       STR_REG(23)
+       STR_REG(24)
+       STR_REG(25)
+       STR_REG(26)
+       STR_REG(27)
+       STR_REG(28)
+       STR_REG(29)
+       STR_REG(30)
+       mov x1, sp
+       str x1, [x0, #SP]
+       str x30, [x0, #PC]
+       LDR_REG(1)
+       ret
+ENDPROC(perf_regs_load)
index 49776f1..b7bb42c 100644 (file)
@@ -61,7 +61,7 @@ const char *const mips_triplets[] = {
 static bool lookup_path(char *name)
 {
        bool found = false;
-       char *path, *tmp;
+       char *path, *tmp = NULL;
        char buf[PATH_MAX];
        char *env = getenv("PATH");
 
index 0af6e9b..7b8b0d1 100644 (file)
@@ -1,4 +1,5 @@
 libperf-y += header.o
+libperf-y += sym-handling.o
 
 libperf-$(CONFIG_DWARF) += dwarf-regs.o
 libperf-$(CONFIG_DWARF) += skip-callchain-idx.o
diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c
new file mode 100644 (file)
index 0000000..bbc1a50
--- /dev/null
@@ -0,0 +1,82 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * Copyright (C) 2015 Naveen N. Rao, IBM Corporation
+ */
+
+#include "debug.h"
+#include "symbol.h"
+#include "map.h"
+#include "probe-event.h"
+
+#ifdef HAVE_LIBELF_SUPPORT
+bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
+{
+       return ehdr.e_type == ET_EXEC ||
+              ehdr.e_type == ET_REL ||
+              ehdr.e_type == ET_DYN;
+}
+
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+void arch__elf_sym_adjust(GElf_Sym *sym)
+{
+       sym->st_value += PPC64_LOCAL_ENTRY_OFFSET(sym->st_other);
+}
+#endif
+#endif
+
+#if !defined(_CALL_ELF) || _CALL_ELF != 2
+int arch__choose_best_symbol(struct symbol *syma,
+                            struct symbol *symb __maybe_unused)
+{
+       char *sym = syma->name;
+
+       /* Skip over any initial dot */
+       if (*sym == '.')
+               sym++;
+
+       /* Avoid "SyS" kernel syscall aliases */
+       if (strlen(sym) >= 3 && !strncmp(sym, "SyS", 3))
+               return SYMBOL_B;
+       if (strlen(sym) >= 10 && !strncmp(sym, "compat_SyS", 10))
+               return SYMBOL_B;
+
+       return SYMBOL_A;
+}
+
+/* Allow matching against dot variants */
+int arch__compare_symbol_names(const char *namea, const char *nameb)
+{
+       /* Skip over initial dot */
+       if (*namea == '.')
+               namea++;
+       if (*nameb == '.')
+               nameb++;
+
+       return strcmp(namea, nameb);
+}
+#endif
+
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+bool arch__prefers_symtab(void)
+{
+       return true;
+}
+
+#define PPC64LE_LEP_OFFSET     8
+
+void arch__fix_tev_from_maps(struct perf_probe_event *pev,
+                            struct probe_trace_event *tev, struct map *map)
+{
+       /*
+        * ppc64 ABIv2 local entry point is currently always 2 instructions
+        * (8 bytes) after the global entry point.
+        */
+       if (!pev->uprobes && map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS) {
+               tev->point.address += PPC64LE_LEP_OFFSET;
+               tev->point.offset += PPC64LE_LEP_OFFSET;
+       }
+}
+#endif
index 5ce9802..c3ab760 100644 (file)
@@ -3,6 +3,7 @@ perf-y += sched-pipe.o
 perf-y += mem-memcpy.o
 perf-y += futex-hash.o
 perf-y += futex-wake.o
+perf-y += futex-wake-parallel.o
 perf-y += futex-requeue.o
 
 perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
index 3c4dd44..70b2f71 100644 (file)
@@ -33,6 +33,8 @@ extern int bench_mem_memcpy(int argc, const char **argv,
 extern int bench_mem_memset(int argc, const char **argv, const char *prefix);
 extern int bench_futex_hash(int argc, const char **argv, const char *prefix);
 extern int bench_futex_wake(int argc, const char **argv, const char *prefix);
+extern int bench_futex_wake_parallel(int argc, const char **argv,
+                                    const char *prefix);
 extern int bench_futex_requeue(int argc, const char **argv, const char *prefix);
 
 #define BENCH_FORMAT_DEFAULT_STR       "default"
index bedff6b..ad0d9b5 100644 (file)
@@ -132,6 +132,9 @@ int bench_futex_requeue(int argc, const char **argv,
        if (!fshared)
                futex_flag = FUTEX_PRIVATE_FLAG;
 
+       if (nrequeue > nthreads)
+               nrequeue = nthreads;
+
        printf("Run summary [PID %d]: Requeuing %d threads (from [%s] %p to %p), "
               "%d at a time.\n\n",  getpid(), nthreads,
               fshared ? "shared":"private", &futex1, &futex2, nrequeue);
@@ -161,20 +164,18 @@ int bench_futex_requeue(int argc, const char **argv,
 
                /* Ok, all threads are patiently blocked, start requeueing */
                gettimeofday(&start, NULL);
-               for (nrequeued = 0; nrequeued < nthreads; nrequeued += nrequeue) {
+               while (nrequeued < nthreads) {
                        /*
                         * Do not wakeup any tasks blocked on futex1, allowing
                         * us to really measure futex_wait functionality.
                         */
-                       futex_cmp_requeue(&futex1, 0, &futex2, 0,
-                                         nrequeue, futex_flag);
+                       nrequeued += futex_cmp_requeue(&futex1, 0, &futex2, 0,
+                                                      nrequeue, futex_flag);
                }
+
                gettimeofday(&end, NULL);
                timersub(&end, &start, &runtime);
 
-               if (nrequeued > nthreads)
-                       nrequeued = nthreads;
-
                update_stats(&requeued_stats, nrequeued);
                update_stats(&requeuetime_stats, runtime.tv_usec);
 
@@ -184,7 +185,7 @@ int bench_futex_requeue(int argc, const char **argv,
                }
 
                /* everybody should be blocked on futex2, wake'em up */
-               nrequeued = futex_wake(&futex2, nthreads, futex_flag);
+               nrequeued = futex_wake(&futex2, nrequeued, futex_flag);
                if (nthreads != nrequeued)
                        warnx("couldn't wakeup all tasks (%d/%d)", nrequeued, nthreads);
 
diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
new file mode 100644 (file)
index 0000000..6d8c9fa
--- /dev/null
@@ -0,0 +1,294 @@
+/*
+ * Copyright (C) 2015 Davidlohr Bueso.
+ *
+ * Block a bunch of threads and let parallel waker threads wakeup an
+ * equal amount of them. The program output reflects the avg latency
+ * for each individual thread to service its share of work. Ultimately
+ * it can be used to measure futex_wake() changes.
+ */
+
+#include "../perf.h"
+#include "../util/util.h"
+#include "../util/stat.h"
+#include "../util/parse-options.h"
+#include "../util/header.h"
+#include "bench.h"
+#include "futex.h"
+
+#include <err.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <pthread.h>
+
+struct thread_data {
+       pthread_t worker;
+       unsigned int nwoken;
+       struct timeval runtime;
+};
+
+static unsigned int nwakes = 1;
+
+/* all threads will block on the same futex -- hash bucket chaos ;) */
+static u_int32_t futex = 0;
+
+static pthread_t *blocked_worker;
+static bool done = false, silent = false, fshared = false;
+static unsigned int nblocked_threads = 0, nwaking_threads = 0;
+static pthread_mutex_t thread_lock;
+static pthread_cond_t thread_parent, thread_worker;
+static struct stats waketime_stats, wakeup_stats;
+static unsigned int ncpus, threads_starting;
+static int futex_flag = 0;
+
+static const struct option options[] = {
+       OPT_UINTEGER('t', "threads", &nblocked_threads, "Specify amount of threads"),
+       OPT_UINTEGER('w', "nwakers", &nwaking_threads, "Specify amount of waking threads"),
+       OPT_BOOLEAN( 's', "silent",  &silent,   "Silent mode: do not display data/details"),
+       OPT_BOOLEAN( 'S', "shared",  &fshared,  "Use shared futexes instead of private ones"),
+       OPT_END()
+};
+
+static const char * const bench_futex_wake_parallel_usage[] = {
+       "perf bench futex wake-parallel <options>",
+       NULL
+};
+
+static void *waking_workerfn(void *arg)
+{
+       struct thread_data *waker = (struct thread_data *) arg;
+       struct timeval start, end;
+
+       gettimeofday(&start, NULL);
+
+       waker->nwoken = futex_wake(&futex, nwakes, futex_flag);
+       if (waker->nwoken != nwakes)
+               warnx("couldn't wakeup all tasks (%d/%d)",
+                     waker->nwoken, nwakes);
+
+       gettimeofday(&end, NULL);
+       timersub(&end, &start, &waker->runtime);
+
+       pthread_exit(NULL);
+       return NULL;
+}
+
+static void wakeup_threads(struct thread_data *td, pthread_attr_t thread_attr)
+{
+       unsigned int i;
+
+       pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE);
+
+       /* create and block all threads */
+       for (i = 0; i < nwaking_threads; i++) {
+               /*
+                * Thread creation order will impact per-thread latency
+                * as it will affect the order to acquire the hb spinlock.
+                * For now let the scheduler decide.
+                */
+               if (pthread_create(&td[i].worker, &thread_attr,
+                                  waking_workerfn, (void *)&td[i]))
+                       err(EXIT_FAILURE, "pthread_create");
+       }
+
+       for (i = 0; i < nwaking_threads; i++)
+               if (pthread_join(td[i].worker, NULL))
+                       err(EXIT_FAILURE, "pthread_join");
+}
+
+static void *blocked_workerfn(void *arg __maybe_unused)
+{
+       pthread_mutex_lock(&thread_lock);
+       threads_starting--;
+       if (!threads_starting)
+               pthread_cond_signal(&thread_parent);
+       pthread_cond_wait(&thread_worker, &thread_lock);
+       pthread_mutex_unlock(&thread_lock);
+
+       while (1) { /* handle spurious wakeups */
+               if (futex_wait(&futex, 0, NULL, futex_flag) != EINTR)
+                       break;
+       }
+
+       pthread_exit(NULL);
+       return NULL;
+}
+
+static void block_threads(pthread_t *w, pthread_attr_t thread_attr)
+{
+       cpu_set_t cpu;
+       unsigned int i;
+
+       threads_starting = nblocked_threads;
+
+       /* create and block all threads */
+       for (i = 0; i < nblocked_threads; i++) {
+               CPU_ZERO(&cpu);
+               CPU_SET(i % ncpus, &cpu);
+
+               if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu))
+                       err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
+
+               if (pthread_create(&w[i], &thread_attr, blocked_workerfn, NULL))
+                       err(EXIT_FAILURE, "pthread_create");
+       }
+}
+
+static void print_run(struct thread_data *waking_worker, unsigned int run_num)
+{
+       unsigned int i, wakeup_avg;
+       double waketime_avg, waketime_stddev;
+       struct stats __waketime_stats, __wakeup_stats;
+
+       init_stats(&__wakeup_stats);
+       init_stats(&__waketime_stats);
+
+       for (i = 0; i < nwaking_threads; i++) {
+               update_stats(&__waketime_stats, waking_worker[i].runtime.tv_usec);
+               update_stats(&__wakeup_stats, waking_worker[i].nwoken);
+       }
+
+       waketime_avg = avg_stats(&__waketime_stats);
+       waketime_stddev = stddev_stats(&__waketime_stats);
+       wakeup_avg = avg_stats(&__wakeup_stats);
+
+       printf("[Run %d]: Avg per-thread latency (waking %d/%d threads) "
+              "in %.4f ms (+-%.2f%%)\n", run_num + 1, wakeup_avg,
+              nblocked_threads, waketime_avg/1e3,
+              rel_stddev_stats(waketime_stddev, waketime_avg));
+}
+
+static void print_summary(void)
+{
+       unsigned int wakeup_avg;
+       double waketime_avg, waketime_stddev;
+
+       waketime_avg = avg_stats(&waketime_stats);
+       waketime_stddev = stddev_stats(&waketime_stats);
+       wakeup_avg = avg_stats(&wakeup_stats);
+
+       printf("Avg per-thread latency (waking %d/%d threads) in %.4f ms (+-%.2f%%)\n",
+              wakeup_avg,
+              nblocked_threads,
+              waketime_avg/1e3,
+              rel_stddev_stats(waketime_stddev, waketime_avg));
+}
+
+
+static void do_run_stats(struct thread_data *waking_worker)
+{
+       unsigned int i;
+
+       for (i = 0; i < nwaking_threads; i++) {
+               update_stats(&waketime_stats, waking_worker[i].runtime.tv_usec);
+               update_stats(&wakeup_stats, waking_worker[i].nwoken);
+       }
+
+}
+
+static void toggle_done(int sig __maybe_unused,
+                       siginfo_t *info __maybe_unused,
+                       void *uc __maybe_unused)
+{
+       done = true;
+}
+
+int bench_futex_wake_parallel(int argc, const char **argv,
+                             const char *prefix __maybe_unused)
+{
+       int ret = 0;
+       unsigned int i, j;
+       struct sigaction act;
+       pthread_attr_t thread_attr;
+       struct thread_data *waking_worker;
+
+       argc = parse_options(argc, argv, options,
+                            bench_futex_wake_parallel_usage, 0);
+       if (argc) {
+               usage_with_options(bench_futex_wake_parallel_usage, options);
+               exit(EXIT_FAILURE);
+       }
+
+       sigfillset(&act.sa_mask);
+       act.sa_sigaction = toggle_done;
+       sigaction(SIGINT, &act, NULL);
+
+       ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+       if (!nblocked_threads)
+               nblocked_threads = ncpus;
+
+       /* some sanity checks */
+       if (nwaking_threads > nblocked_threads || !nwaking_threads)
+               nwaking_threads = nblocked_threads;
+
+       if (nblocked_threads % nwaking_threads)
+               errx(EXIT_FAILURE, "Must be perfectly divisible");
+       /*
+        * Each thread will wakeup nwakes tasks in
+        * a single futex_wait call.
+        */
+       nwakes = nblocked_threads/nwaking_threads;
+
+       blocked_worker = calloc(nblocked_threads, sizeof(*blocked_worker));
+       if (!blocked_worker)
+               err(EXIT_FAILURE, "calloc");
+
+       if (!fshared)
+               futex_flag = FUTEX_PRIVATE_FLAG;
+
+       printf("Run summary [PID %d]: blocking on %d threads (at [%s] "
+              "futex %p), %d threads waking up %d at a time.\n\n",
+              getpid(), nblocked_threads, fshared ? "shared":"private",
+              &futex, nwaking_threads, nwakes);
+
+       init_stats(&wakeup_stats);
+       init_stats(&waketime_stats);
+
+       pthread_attr_init(&thread_attr);
+       pthread_mutex_init(&thread_lock, NULL);
+       pthread_cond_init(&thread_parent, NULL);
+       pthread_cond_init(&thread_worker, NULL);
+
+       for (j = 0; j < bench_repeat && !done; j++) {
+               waking_worker = calloc(nwaking_threads, sizeof(*waking_worker));
+               if (!waking_worker)
+                       err(EXIT_FAILURE, "calloc");
+
+               /* create, launch & block all threads */
+               block_threads(blocked_worker, thread_attr);
+
+               /* make sure all threads are already blocked */
+               pthread_mutex_lock(&thread_lock);
+               while (threads_starting)
+                       pthread_cond_wait(&thread_parent, &thread_lock);
+               pthread_cond_broadcast(&thread_worker);
+               pthread_mutex_unlock(&thread_lock);
+
+               usleep(100000);
+
+               /* Ok, all threads are patiently blocked, start waking folks up */
+               wakeup_threads(waking_worker, thread_attr);
+
+               for (i = 0; i < nblocked_threads; i++) {
+                       ret = pthread_join(blocked_worker[i], NULL);
+                       if (ret)
+                               err(EXIT_FAILURE, "pthread_join");
+               }
+
+               do_run_stats(waking_worker);
+               if (!silent)
+                       print_run(waking_worker, j);
+
+               free(waking_worker);
+       }
+
+       /* cleanup & report results */
+       pthread_cond_destroy(&thread_parent);
+       pthread_cond_destroy(&thread_worker);
+       pthread_mutex_destroy(&thread_lock);
+       pthread_attr_destroy(&thread_attr);
+
+       print_summary();
+
+       free(blocked_worker);
+       return ret;
+}
index 929f762..e5e41d3 100644 (file)
@@ -60,7 +60,12 @@ static void *workerfn(void *arg __maybe_unused)
        pthread_cond_wait(&thread_worker, &thread_lock);
        pthread_mutex_unlock(&thread_lock);
 
-       futex_wait(&futex1, 0, NULL, futex_flag);
+       while (1) {
+               if (futex_wait(&futex1, 0, NULL, futex_flag) != EINTR)
+                       break;
+       }
+
+       pthread_exit(NULL);
        return NULL;
 }
 
index ebfa163..870b7e6 100644 (file)
@@ -8,6 +8,7 @@
 #include "../builtin.h"
 #include "../util/util.h"
 #include "../util/parse-options.h"
+#include "../util/cloexec.h"
 
 #include "bench.h"
 
@@ -23,6 +24,7 @@
 #include <pthread.h>
 #include <sys/mman.h>
 #include <sys/time.h>
+#include <sys/resource.h>
 #include <sys/wait.h>
 #include <sys/prctl.h>
 #include <sys/types.h>
@@ -51,6 +53,9 @@ struct thread_data {
        unsigned int            loops_done;
        u64                     val;
        u64                     runtime_ns;
+       u64                     system_time_ns;
+       u64                     user_time_ns;
+       double                  speed_gbs;
        pthread_mutex_t         *process_lock;
 };
 
@@ -180,7 +185,7 @@ static const struct option options[] = {
        OPT_INTEGER('H', "thp"          , &p0.thp,              "MADV_NOHUGEPAGE < 0 < MADV_HUGEPAGE"),
        OPT_BOOLEAN('c', "show_convergence", &p0.show_convergence, "show convergence details"),
        OPT_BOOLEAN('m', "measure_convergence", &p0.measure_convergence, "measure convergence latency"),
-       OPT_BOOLEAN('q', "quiet"        , &p0.show_quiet,       "bzero the initial allocations"),
+       OPT_BOOLEAN('q', "quiet"        , &p0.show_quiet,       "quiet mode"),
        OPT_BOOLEAN('S', "serialize-startup", &p0.serialize_startup,"serialize thread startup"),
 
        /* Special option string parsing callbacks: */
@@ -828,6 +833,9 @@ static int count_process_nodes(int process_nr)
                td = g->threads + task_nr;
 
                node = numa_node_of_cpu(td->curr_cpu);
+               if (node < 0) /* curr_cpu was likely still -1 */
+                       return 0;
+
                node_present[node] = 1;
        }
 
@@ -882,6 +890,11 @@ static void calc_convergence_compression(int *strong)
        for (p = 0; p < g->p.nr_proc; p++) {
                unsigned int nodes = count_process_nodes(p);
 
+               if (!nodes) {
+                       *strong = 0;
+                       return;
+               }
+
                nodes_min = min(nodes, nodes_min);
                nodes_max = max(nodes, nodes_max);
        }
@@ -1034,6 +1047,7 @@ static void *worker_thread(void *__tdata)
        u64 bytes_done;
        long work_done;
        u32 l;
+       struct rusage rusage;
 
        bind_to_cpumask(td->bind_cpumask);
        bind_to_memnode(td->bind_node);
@@ -1186,6 +1200,13 @@ static void *worker_thread(void *__tdata)
        timersub(&stop, &start0, &diff);
        td->runtime_ns = diff.tv_sec * 1000000000ULL;
        td->runtime_ns += diff.tv_usec * 1000ULL;
+       td->speed_gbs = bytes_done / (td->runtime_ns / 1e9) / 1e9;
+
+       getrusage(RUSAGE_THREAD, &rusage);
+       td->system_time_ns = rusage.ru_stime.tv_sec * 1000000000ULL;
+       td->system_time_ns += rusage.ru_stime.tv_usec * 1000ULL;
+       td->user_time_ns = rusage.ru_utime.tv_sec * 1000000000ULL;
+       td->user_time_ns += rusage.ru_utime.tv_usec * 1000ULL;
 
        free_data(thread_data, g->p.bytes_thread);
 
@@ -1395,7 +1416,7 @@ static void print_res(const char *name, double val,
        if (!name)
                name = "main,";
 
-       if (g->p.show_quiet)
+       if (!g->p.show_quiet)
                printf(" %-30s %15.3f, %-15s %s\n", name, val, txt_unit, txt_short);
        else
                printf(" %14.3f %s\n", val, txt_long);
@@ -1412,7 +1433,7 @@ static int __bench_numa(const char *name)
        double runtime_sec_min;
        int wait_stat;
        double bytes;
-       int i, t;
+       int i, t, p;
 
        if (init())
                return -1;
@@ -1548,6 +1569,24 @@ static int __bench_numa(const char *name)
        print_res(name, bytes / runtime_sec_max / 1e9,
                "GB/sec,", "total-speed",       "GB/sec total speed");
 
+       if (g->p.show_details >= 2) {
+               char tname[32];
+               struct thread_data *td;
+               for (p = 0; p < g->p.nr_proc; p++) {
+                       for (t = 0; t < g->p.nr_threads; t++) {
+                               memset(tname, 0, 32);
+                               td = g->threads + p*g->p.nr_threads + t;
+                               snprintf(tname, 32, "process%d:thread%d", p, t);
+                               print_res(tname, td->speed_gbs,
+                                       "GB/sec",       "thread-speed", "GB/sec/thread speed");
+                               print_res(tname, td->system_time_ns / 1e9,
+                                       "secs", "thread-system-time", "system CPU time/thread");
+                               print_res(tname, td->user_time_ns / 1e9,
+                                       "secs", "thread-user-time", "user CPU time/thread");
+                       }
+               }
+       }
+
        free(pids);
 
        deinit();
index 71bf745..2c1bec3 100644 (file)
@@ -59,6 +59,10 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel,
            (al->sym == NULL ||
             strcmp(ann->sym_hist_filter, al->sym->name) != 0)) {
                /* We're only interested in a symbol named sym_hist_filter */
+               /*
+                * FIXME: why isn't this done in the symbol_filter when loading
+                * the DSO?
+                */
                if (al->sym != NULL) {
                        rb_erase(&al->sym->rb_node,
                                 &al->map->dso->symbols[al->map->type]);
@@ -84,6 +88,7 @@ static int process_sample_event(struct perf_tool *tool,
 {
        struct perf_annotate *ann = container_of(tool, struct perf_annotate, tool);
        struct addr_location al;
+       int ret = 0;
 
        if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
                pr_warning("problem processing %d event, skipping it.\n",
@@ -92,15 +97,16 @@ static int process_sample_event(struct perf_tool *tool,
        }
 
        if (ann->cpu_list && !test_bit(sample->cpu, ann->cpu_bitmap))
-               return 0;
+               goto out_put;
 
        if (!al.filtered && perf_evsel__add_sample(evsel, sample, &al, ann)) {
                pr_warning("problem incrementing symbol count, "
                           "skipping event\n");
-               return -1;
+               ret = -1;
        }
-
-       return 0;
+out_put:
+       addr_location__put(&al);
+       return ret;
 }
 
 static int hist_entry__tty_annotate(struct hist_entry *he,
@@ -283,7 +289,6 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
                },
        };
        struct perf_data_file file = {
-               .path  = input_name,
                .mode  = PERF_DATA_MODE_READ,
        };
        const struct option options[] = {
@@ -324,6 +329,8 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
                   "objdump binary to use for disassembly and annotations"),
        OPT_BOOLEAN(0, "group", &symbol_conf.event_group,
                    "Show event group information together"),
+       OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
+                   "Show a column with the sum of periods"),
        OPT_END()
        };
        int ret = hists__init();
@@ -340,6 +347,8 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
        else if (annotate.use_gtk)
                use_browser = 2;
 
+       file.path  = input_name;
+
        setup_browser(true);
 
        annotate.session = perf_session__new(&file, false, &annotate.tool);
index b9a56fa..b5314e4 100644 (file)
@@ -58,6 +58,7 @@ static struct bench mem_benchmarks[] = {
 static struct bench futex_benchmarks[] = {
        { "hash",       "Benchmark for futex hash table",               bench_futex_hash        },
        { "wake",       "Benchmark for futex wake calls",               bench_futex_wake        },
+       { "wake-parallel", "Benchmark for parallel futex wake calls",   bench_futex_wake_parallel },
        { "requeue",    "Benchmark for futex requeue calls",            bench_futex_requeue     },
        { "all",        "Test all futex benchmarks",                    NULL                    },
        { NULL,         NULL,                                           NULL                    }
index feb420f..9fe93c8 100644 (file)
@@ -69,6 +69,15 @@ static int perf_session__list_build_ids(bool force, bool with_hits)
        session = perf_session__new(&file, false, &build_id__mark_dso_hit_ops);
        if (session == NULL)
                return -1;
+
+       /*
+        * We take all buildids when the file contains AUX area tracing data
+        * because we do not decode the trace because it would take too long.
+        */
+       if (!perf_data_file__is_pipe(&file) &&
+           perf_header__has_feat(&session->header, HEADER_AUXTRACE))
+               with_hits = false;
+
        /*
         * in pipe-mode, the only way to get the buildids is to parse
         * the record stream. Buildids are stored as RECORD_HEADER_BUILD_ID
index df6307b..daaa7dc 100644 (file)
@@ -328,6 +328,7 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused,
 {
        struct addr_location al;
        struct hists *hists = evsel__hists(evsel);
+       int ret = -1;
 
        if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
                pr_warning("problem processing %d event, skipping it.\n",
@@ -338,7 +339,7 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused,
        if (hists__add_entry(hists, &al, sample->period,
                             sample->weight, sample->transaction)) {
                pr_warning("problem incrementing symbol period, skipping event\n");
-               return -1;
+               goto out_put;
        }
 
        /*
@@ -350,8 +351,10 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused,
        hists->stats.total_period += sample->period;
        if (!al.filtered)
                hists->stats.total_non_filtered_period += sample->period;
-
-       return 0;
+       ret = 0;
+out_put:
+       addr_location__put(&al);
+       return ret;
 }
 
 static struct perf_tool tool = {
index 40a33d7..52ec66b 100644 (file)
@@ -16,6 +16,7 @@
 #include "util/debug.h"
 #include "util/build-id.h"
 #include "util/data.h"
+#include "util/auxtrace.h"
 
 #include "util/parse-options.h"
 
@@ -26,10 +27,12 @@ struct perf_inject {
        struct perf_session     *session;
        bool                    build_ids;
        bool                    sched_stat;
+       bool                    have_auxtrace;
        const char              *input_name;
        struct perf_data_file   output;
        u64                     bytes_written;
        struct list_head        samples;
+       struct itrace_synth_opts itrace_synth_opts;
 };
 
 struct event_entry {
@@ -38,14 +41,11 @@ struct event_entry {
        union perf_event event[0];
 };
 
-static int perf_event__repipe_synth(struct perf_tool *tool,
-                                   union perf_event *event)
+static int output_bytes(struct perf_inject *inject, void *buf, size_t sz)
 {
-       struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
        ssize_t size;
 
-       size = perf_data_file__write(&inject->output, event,
-                                    event->header.size);
+       size = perf_data_file__write(&inject->output, buf, sz);
        if (size < 0)
                return -errno;
 
@@ -53,6 +53,15 @@ static int perf_event__repipe_synth(struct perf_tool *tool,
        return 0;
 }
 
+static int perf_event__repipe_synth(struct perf_tool *tool,
+                                   union perf_event *event)
+{
+       struct perf_inject *inject = container_of(tool, struct perf_inject,
+                                                 tool);
+
+       return output_bytes(inject, event, event->header.size);
+}
+
 static int perf_event__repipe_oe_synth(struct perf_tool *tool,
                                       union perf_event *event,
                                       struct ordered_events *oe __maybe_unused)
@@ -86,6 +95,79 @@ static int perf_event__repipe_attr(struct perf_tool *tool,
        return perf_event__repipe_synth(tool, event);
 }
 
+#ifdef HAVE_AUXTRACE_SUPPORT
+
+static int copy_bytes(struct perf_inject *inject, int fd, off_t size)
+{
+       char buf[4096];
+       ssize_t ssz;
+       int ret;
+
+       while (size > 0) {
+               ssz = read(fd, buf, min(size, (off_t)sizeof(buf)));
+               if (ssz < 0)
+                       return -errno;
+               ret = output_bytes(inject, buf, ssz);
+               if (ret)
+                       return ret;
+               size -= ssz;
+       }
+
+       return 0;
+}
+
+static s64 perf_event__repipe_auxtrace(struct perf_tool *tool,
+                                      union perf_event *event,
+                                      struct perf_session *session
+                                      __maybe_unused)
+{
+       struct perf_inject *inject = container_of(tool, struct perf_inject,
+                                                 tool);
+       int ret;
+
+       inject->have_auxtrace = true;
+
+       if (!inject->output.is_pipe) {
+               off_t offset;
+
+               offset = lseek(inject->output.fd, 0, SEEK_CUR);
+               if (offset == -1)
+                       return -errno;
+               ret = auxtrace_index__auxtrace_event(&session->auxtrace_index,
+                                                    event, offset);
+               if (ret < 0)
+                       return ret;
+       }
+
+       if (perf_data_file__is_pipe(session->file) || !session->one_mmap) {
+               ret = output_bytes(inject, event, event->header.size);
+               if (ret < 0)
+                       return ret;
+               ret = copy_bytes(inject, perf_data_file__fd(session->file),
+                                event->auxtrace.size);
+       } else {
+               ret = output_bytes(inject, event,
+                                  event->header.size + event->auxtrace.size);
+       }
+       if (ret < 0)
+               return ret;
+
+       return event->auxtrace.size;
+}
+
+#else
+
+static s64
+perf_event__repipe_auxtrace(struct perf_tool *tool __maybe_unused,
+                           union perf_event *event __maybe_unused,
+                           struct perf_session *session __maybe_unused)
+{
+       pr_err("AUX area tracing not supported\n");
+       return -EINVAL;
+}
+
+#endif
+
 static int perf_event__repipe(struct perf_tool *tool,
                              union perf_event *event,
                              struct perf_sample *sample __maybe_unused,
@@ -155,6 +237,32 @@ static int perf_event__repipe_fork(struct perf_tool *tool,
        return err;
 }
 
+static int perf_event__repipe_comm(struct perf_tool *tool,
+                                  union perf_event *event,
+                                  struct perf_sample *sample,
+                                  struct machine *machine)
+{
+       int err;
+
+       err = perf_event__process_comm(tool, event, sample, machine);
+       perf_event__repipe(tool, event, sample, machine);
+
+       return err;
+}
+
+static int perf_event__repipe_exit(struct perf_tool *tool,
+                                  union perf_event *event,
+                                  struct perf_sample *sample,
+                                  struct machine *machine)
+{
+       int err;
+
+       err = perf_event__process_exit(tool, event, sample, machine);
+       perf_event__repipe(tool, event, sample, machine);
+
+       return err;
+}
+
 static int perf_event__repipe_tracing_data(struct perf_tool *tool,
                                           union perf_event *event,
                                           struct perf_session *session)
@@ -167,6 +275,18 @@ static int perf_event__repipe_tracing_data(struct perf_tool *tool,
        return err;
 }
 
+static int perf_event__repipe_id_index(struct perf_tool *tool,
+                                      union perf_event *event,
+                                      struct perf_session *session)
+{
+       int err;
+
+       perf_event__repipe_synth(tool, event);
+       err = perf_event__process_id_index(tool, event, session);
+
+       return err;
+}
+
 static int dso__read_build_id(struct dso *dso)
 {
        if (dso->has_build_id)
@@ -245,6 +365,7 @@ static int perf_event__inject_buildid(struct perf_tool *tool,
                }
        }
 
+       thread__put(thread);
 repipe:
        perf_event__repipe(tool, event, sample, machine);
        return 0;
@@ -351,16 +472,20 @@ static int __cmd_inject(struct perf_inject *inject)
        struct perf_session *session = inject->session;
        struct perf_data_file *file_out = &inject->output;
        int fd = perf_data_file__fd(file_out);
+       u64 output_data_offset;
 
        signal(SIGINT, sig_handler);
 
-       if (inject->build_ids || inject->sched_stat) {
+       if (inject->build_ids || inject->sched_stat ||
+           inject->itrace_synth_opts.set) {
                inject->tool.mmap         = perf_event__repipe_mmap;
                inject->tool.mmap2        = perf_event__repipe_mmap2;
                inject->tool.fork         = perf_event__repipe_fork;
                inject->tool.tracing_data = perf_event__repipe_tracing_data;
        }
 
+       output_data_offset = session->header.data_offset;
+
        if (inject->build_ids) {
                inject->tool.sample = perf_event__inject_buildid;
        } else if (inject->sched_stat) {
@@ -379,17 +504,43 @@ static int __cmd_inject(struct perf_inject *inject)
                        else if (!strncmp(name, "sched:sched_stat_", 17))
                                evsel->handler = perf_inject__sched_stat;
                }
+       } else if (inject->itrace_synth_opts.set) {
+               session->itrace_synth_opts = &inject->itrace_synth_opts;
+               inject->itrace_synth_opts.inject = true;
+               inject->tool.comm           = perf_event__repipe_comm;
+               inject->tool.exit           = perf_event__repipe_exit;
+               inject->tool.id_index       = perf_event__repipe_id_index;
+               inject->tool.auxtrace_info  = perf_event__process_auxtrace_info;
+               inject->tool.auxtrace       = perf_event__process_auxtrace;
+               inject->tool.ordered_events = true;
+               inject->tool.ordering_requires_timestamps = true;
+               /* Allow space in the header for new attributes */
+               output_data_offset = 4096;
        }
 
+       if (!inject->itrace_synth_opts.set)
+               auxtrace_index__free(&session->auxtrace_index);
+
        if (!file_out->is_pipe)
-               lseek(fd, session->header.data_offset, SEEK_SET);
+               lseek(fd, output_data_offset, SEEK_SET);
 
        ret = perf_session__process_events(session);
 
        if (!file_out->is_pipe) {
-               if (inject->build_ids)
+               if (inject->build_ids) {
                        perf_header__set_feat(&session->header,
                                              HEADER_BUILD_ID);
+                       if (inject->have_auxtrace)
+                               dsos__hit_all(session);
+               }
+               /*
+                * The AUX areas have been removed and replaced with
+                * synthesized hardware events, so clear the feature flag.
+                */
+               if (inject->itrace_synth_opts.set)
+                       perf_header__clear_feat(&session->header,
+                                               HEADER_AUXTRACE);
+               session->header.data_offset = output_data_offset;
                session->header.data_size = inject->bytes_written;
                perf_session__write_header(session, session->evlist, fd, true);
        }
@@ -408,11 +559,16 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
                        .fork           = perf_event__repipe,
                        .exit           = perf_event__repipe,
                        .lost           = perf_event__repipe,
+                       .aux            = perf_event__repipe,
+                       .itrace_start   = perf_event__repipe,
                        .read           = perf_event__repipe_sample,
                        .throttle       = perf_event__repipe,
                        .unthrottle     = perf_event__repipe,
                        .attr           = perf_event__repipe_attr,
                        .tracing_data   = perf_event__repipe_op2_synth,
+                       .auxtrace_info  = perf_event__repipe_op2_synth,
+                       .auxtrace       = perf_event__repipe_auxtrace,
+                       .auxtrace_error = perf_event__repipe_op2_synth,
                        .finished_round = perf_event__repipe_oe_synth,
                        .build_id       = perf_event__repipe_op2_synth,
                        .id_index       = perf_event__repipe_op2_synth,
@@ -444,6 +600,9 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
                OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file",
                           "kallsyms pathname"),
                OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"),
+               OPT_CALLBACK_OPTARG(0, "itrace", &inject.itrace_synth_opts,
+                                   NULL, "opts", "Instruction Tracing options",
+                                   itrace_parse_synth_opts),
                OPT_END()
        };
        const char * const inject_usage[] = {
index 63ea013..950f296 100644 (file)
@@ -10,6 +10,7 @@
 #include "util/header.h"
 #include "util/session.h"
 #include "util/tool.h"
+#include "util/callchain.h"
 
 #include "util/parse-options.h"
 #include "util/trace-event.h"
 #include <linux/rbtree.h>
 #include <linux/string.h>
 #include <locale.h>
+#include <regex.h>
 
 static int     kmem_slab;
 static int     kmem_page;
 
 static long    kmem_page_size;
+static enum {
+       KMEM_SLAB,
+       KMEM_PAGE,
+} kmem_default = KMEM_SLAB;  /* for backward compatibility */
 
 struct alloc_stat;
-typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *);
+typedef int (*sort_fn_t)(void *, void *);
 
 static int                     alloc_flag;
 static int                     caller_flag;
@@ -179,8 +185,8 @@ static int perf_evsel__process_alloc_node_event(struct perf_evsel *evsel,
        return ret;
 }
 
-static int ptr_cmp(struct alloc_stat *, struct alloc_stat *);
-static int callsite_cmp(struct alloc_stat *, struct alloc_stat *);
+static int ptr_cmp(void *, void *);
+static int slab_callsite_cmp(void *, void *);
 
 static struct alloc_stat *search_alloc_stat(unsigned long ptr,
                                            unsigned long call_site,
@@ -221,7 +227,8 @@ static int perf_evsel__process_free_event(struct perf_evsel *evsel,
                s_alloc->pingpong++;
 
                s_caller = search_alloc_stat(0, s_alloc->call_site,
-                                            &root_caller_stat, callsite_cmp);
+                                            &root_caller_stat,
+                                            slab_callsite_cmp);
                if (!s_caller)
                        return -1;
                s_caller->pingpong++;
@@ -241,6 +248,8 @@ static unsigned long nr_page_fails;
 static unsigned long nr_page_nomatch;
 
 static bool use_pfn;
+static bool live_page;
+static struct perf_session *kmem_session;
 
 #define MAX_MIGRATE_TYPES  6
 #define MAX_PAGE_ORDER     11
@@ -250,6 +259,7 @@ static int order_stats[MAX_PAGE_ORDER][MAX_MIGRATE_TYPES];
 struct page_stat {
        struct rb_node  node;
        u64             page;
+       u64             callsite;
        int             order;
        unsigned        gfp_flags;
        unsigned        migrate_type;
@@ -259,13 +269,158 @@ struct page_stat {
        int             nr_free;
 };
 
-static struct rb_root page_tree;
+static struct rb_root page_live_tree;
 static struct rb_root page_alloc_tree;
 static struct rb_root page_alloc_sorted;
+static struct rb_root page_caller_tree;
+static struct rb_root page_caller_sorted;
 
-static struct page_stat *search_page(unsigned long page, bool create)
+struct alloc_func {
+       u64 start;
+       u64 end;
+       char *name;
+};
+
+static int nr_alloc_funcs;
+static struct alloc_func *alloc_func_list;
+
+static int funcmp(const void *a, const void *b)
+{
+       const struct alloc_func *fa = a;
+       const struct alloc_func *fb = b;
+
+       if (fa->start > fb->start)
+               return 1;
+       else
+               return -1;
+}
+
+static int callcmp(const void *a, const void *b)
+{
+       const struct alloc_func *fa = a;
+       const struct alloc_func *fb = b;
+
+       if (fb->start <= fa->start && fa->end < fb->end)
+               return 0;
+
+       if (fa->start > fb->start)
+               return 1;
+       else
+               return -1;
+}
+
+static int build_alloc_func_list(void)
+{
+       int ret;
+       struct map *kernel_map;
+       struct symbol *sym;
+       struct rb_node *node;
+       struct alloc_func *func;
+       struct machine *machine = &kmem_session->machines.host;
+       regex_t alloc_func_regex;
+       const char pattern[] = "^_?_?(alloc|get_free|get_zeroed)_pages?";
+
+       ret = regcomp(&alloc_func_regex, pattern, REG_EXTENDED);
+       if (ret) {
+               char err[BUFSIZ];
+
+               regerror(ret, &alloc_func_regex, err, sizeof(err));
+               pr_err("Invalid regex: %s\n%s", pattern, err);
+               return -EINVAL;
+       }
+
+       kernel_map = machine->vmlinux_maps[MAP__FUNCTION];
+       if (map__load(kernel_map, NULL) < 0) {
+               pr_err("cannot load kernel map\n");
+               return -ENOENT;
+       }
+
+       map__for_each_symbol(kernel_map, sym, node) {
+               if (regexec(&alloc_func_regex, sym->name, 0, NULL, 0))
+                       continue;
+
+               func = realloc(alloc_func_list,
+                              (nr_alloc_funcs + 1) * sizeof(*func));
+               if (func == NULL)
+                       return -ENOMEM;
+
+               pr_debug("alloc func: %s\n", sym->name);
+               func[nr_alloc_funcs].start = sym->start;
+               func[nr_alloc_funcs].end   = sym->end;
+               func[nr_alloc_funcs].name  = sym->name;
+
+               alloc_func_list = func;
+               nr_alloc_funcs++;
+       }
+
+       qsort(alloc_func_list, nr_alloc_funcs, sizeof(*func), funcmp);
+
+       regfree(&alloc_func_regex);
+       return 0;
+}
+
+/*
+ * Find first non-memory allocation function from callchain.
+ * The allocation functions are in the 'alloc_func_list'.
+ */
+static u64 find_callsite(struct perf_evsel *evsel, struct perf_sample *sample)
+{
+       struct addr_location al;
+       struct machine *machine = &kmem_session->machines.host;
+       struct callchain_cursor_node *node;
+
+       if (alloc_func_list == NULL) {
+               if (build_alloc_func_list() < 0)
+                       goto out;
+       }
+
+       al.thread = machine__findnew_thread(machine, sample->pid, sample->tid);
+       sample__resolve_callchain(sample, NULL, evsel, &al, 16);
+
+       callchain_cursor_commit(&callchain_cursor);
+       while (true) {
+               struct alloc_func key, *caller;
+               u64 addr;
+
+               node = callchain_cursor_current(&callchain_cursor);
+               if (node == NULL)
+                       break;
+
+               key.start = key.end = node->ip;
+               caller = bsearch(&key, alloc_func_list, nr_alloc_funcs,
+                                sizeof(key), callcmp);
+               if (!caller) {
+                       /* found */
+                       if (node->map)
+                               addr = map__unmap_ip(node->map, node->ip);
+                       else
+                               addr = node->ip;
+
+                       return addr;
+               } else
+                       pr_debug3("skipping alloc function: %s\n", caller->name);
+
+               callchain_cursor_advance(&callchain_cursor);
+       }
+
+out:
+       pr_debug2("unknown callsite: %"PRIx64 "\n", sample->ip);
+       return sample->ip;
+}
+
+struct sort_dimension {
+       const char              name[20];
+       sort_fn_t               cmp;
+       struct list_head        list;
+};
+
+static LIST_HEAD(page_alloc_sort_input);
+static LIST_HEAD(page_caller_sort_input);
+
+static struct page_stat *
+__page_stat__findnew_page(struct page_stat *pstat, bool create)
 {
-       struct rb_node **node = &page_tree.rb_node;
+       struct rb_node **node = &page_live_tree.rb_node;
        struct rb_node *parent = NULL;
        struct page_stat *data;
 
@@ -275,7 +430,7 @@ static struct page_stat *search_page(unsigned long page, bool create)
                parent = *node;
                data = rb_entry(*node, struct page_stat, node);
 
-               cmp = data->page - page;
+               cmp = data->page - pstat->page;
                if (cmp < 0)
                        node = &parent->rb_left;
                else if (cmp > 0)
@@ -289,49 +444,48 @@ static struct page_stat *search_page(unsigned long page, bool create)
 
        data = zalloc(sizeof(*data));
        if (data != NULL) {
-               data->page = page;
+               data->page = pstat->page;
+               data->order = pstat->order;
+               data->gfp_flags = pstat->gfp_flags;
+               data->migrate_type = pstat->migrate_type;
 
                rb_link_node(&data->node, parent, node);
-               rb_insert_color(&data->node, &page_tree);
+               rb_insert_color(&data->node, &page_live_tree);
        }
 
        return data;
 }
 
-static int page_stat_cmp(struct page_stat *a, struct page_stat *b)
+static struct page_stat *page_stat__find_page(struct page_stat *pstat)
 {
-       if (a->page > b->page)
-               return -1;
-       if (a->page < b->page)
-               return 1;
-       if (a->order > b->order)
-               return -1;
-       if (a->order < b->order)
-               return 1;
-       if (a->migrate_type > b->migrate_type)
-               return -1;
-       if (a->migrate_type < b->migrate_type)
-               return 1;
-       if (a->gfp_flags > b->gfp_flags)
-               return -1;
-       if (a->gfp_flags < b->gfp_flags)
-               return 1;
-       return 0;
+       return __page_stat__findnew_page(pstat, false);
+}
+
+static struct page_stat *page_stat__findnew_page(struct page_stat *pstat)
+{
+       return __page_stat__findnew_page(pstat, true);
 }
 
-static struct page_stat *search_page_alloc_stat(struct page_stat *stat, bool create)
+static struct page_stat *
+__page_stat__findnew_alloc(struct page_stat *pstat, bool create)
 {
        struct rb_node **node = &page_alloc_tree.rb_node;
        struct rb_node *parent = NULL;
        struct page_stat *data;
+       struct sort_dimension *sort;
 
        while (*node) {
-               s64 cmp;
+               int cmp = 0;
 
                parent = *node;
                data = rb_entry(*node, struct page_stat, node);
 
-               cmp = page_stat_cmp(data, stat);
+               list_for_each_entry(sort, &page_alloc_sort_input, list) {
+                       cmp = sort->cmp(pstat, data);
+                       if (cmp)
+                               break;
+               }
+
                if (cmp < 0)
                        node = &parent->rb_left;
                else if (cmp > 0)
@@ -345,10 +499,10 @@ static struct page_stat *search_page_alloc_stat(struct page_stat *stat, bool cre
 
        data = zalloc(sizeof(*data));
        if (data != NULL) {
-               data->page = stat->page;
-               data->order = stat->order;
-               data->gfp_flags = stat->gfp_flags;
-               data->migrate_type = stat->migrate_type;
+               data->page = pstat->page;
+               data->order = pstat->order;
+               data->gfp_flags = pstat->gfp_flags;
+               data->migrate_type = pstat->migrate_type;
 
                rb_link_node(&data->node, parent, node);
                rb_insert_color(&data->node, &page_alloc_tree);
@@ -357,6 +511,71 @@ static struct page_stat *search_page_alloc_stat(struct page_stat *stat, bool cre
        return data;
 }
 
+static struct page_stat *page_stat__find_alloc(struct page_stat *pstat)
+{
+       return __page_stat__findnew_alloc(pstat, false);
+}
+
+static struct page_stat *page_stat__findnew_alloc(struct page_stat *pstat)
+{
+       return __page_stat__findnew_alloc(pstat, true);
+}
+
+static struct page_stat *
+__page_stat__findnew_caller(struct page_stat *pstat, bool create)
+{
+       struct rb_node **node = &page_caller_tree.rb_node;
+       struct rb_node *parent = NULL;
+       struct page_stat *data;
+       struct sort_dimension *sort;
+
+       while (*node) {
+               int cmp = 0;
+
+               parent = *node;
+               data = rb_entry(*node, struct page_stat, node);
+
+               list_for_each_entry(sort, &page_caller_sort_input, list) {
+                       cmp = sort->cmp(pstat, data);
+                       if (cmp)
+                               break;
+               }
+
+               if (cmp < 0)
+                       node = &parent->rb_left;
+               else if (cmp > 0)
+                       node = &parent->rb_right;
+               else
+                       return data;
+       }
+
+       if (!create)
+               return NULL;
+
+       data = zalloc(sizeof(*data));
+       if (data != NULL) {
+               data->callsite = pstat->callsite;
+               data->order = pstat->order;
+               data->gfp_flags = pstat->gfp_flags;
+               data->migrate_type = pstat->migrate_type;
+
+               rb_link_node(&data->node, parent, node);
+               rb_insert_color(&data->node, &page_caller_tree);
+       }
+
+       return data;
+}
+
+static struct page_stat *page_stat__find_caller(struct page_stat *pstat)
+{
+       return __page_stat__findnew_caller(pstat, false);
+}
+
+static struct page_stat *page_stat__findnew_caller(struct page_stat *pstat)
+{
+       return __page_stat__findnew_caller(pstat, true);
+}
+
 static bool valid_page(u64 pfn_or_page)
 {
        if (use_pfn && pfn_or_page == -1UL)
@@ -366,6 +585,176 @@ static bool valid_page(u64 pfn_or_page)
        return true;
 }
 
+struct gfp_flag {
+       unsigned int flags;
+       char *compact_str;
+       char *human_readable;
+};
+
+static struct gfp_flag *gfps;
+static int nr_gfps;
+
+static int gfpcmp(const void *a, const void *b)
+{
+       const struct gfp_flag *fa = a;
+       const struct gfp_flag *fb = b;
+
+       return fa->flags - fb->flags;
+}
+
+/* see include/trace/events/gfpflags.h */
+static const struct {
+       const char *original;
+       const char *compact;
+} gfp_compact_table[] = {
+       { "GFP_TRANSHUGE",              "THP" },
+       { "GFP_HIGHUSER_MOVABLE",       "HUM" },
+       { "GFP_HIGHUSER",               "HU" },
+       { "GFP_USER",                   "U" },
+       { "GFP_TEMPORARY",              "TMP" },
+       { "GFP_KERNEL",                 "K" },
+       { "GFP_NOFS",                   "NF" },
+       { "GFP_ATOMIC",                 "A" },
+       { "GFP_NOIO",                   "NI" },
+       { "GFP_HIGH",                   "H" },
+       { "GFP_WAIT",                   "W" },
+       { "GFP_IO",                     "I" },
+       { "GFP_COLD",                   "CO" },
+       { "GFP_NOWARN",                 "NWR" },
+       { "GFP_REPEAT",                 "R" },
+       { "GFP_NOFAIL",                 "NF" },
+       { "GFP_NORETRY",                "NR" },
+       { "GFP_COMP",                   "C" },
+       { "GFP_ZERO",                   "Z" },
+       { "GFP_NOMEMALLOC",             "NMA" },
+       { "GFP_MEMALLOC",               "MA" },
+       { "GFP_HARDWALL",               "HW" },
+       { "GFP_THISNODE",               "TN" },
+       { "GFP_RECLAIMABLE",            "RC" },
+       { "GFP_MOVABLE",                "M" },
+       { "GFP_NOTRACK",                "NT" },
+       { "GFP_NO_KSWAPD",              "NK" },
+       { "GFP_OTHER_NODE",             "ON" },
+       { "GFP_NOWAIT",                 "NW" },
+};
+
+static size_t max_gfp_len;
+
+static char *compact_gfp_flags(char *gfp_flags)
+{
+       char *orig_flags = strdup(gfp_flags);
+       char *new_flags = NULL;
+       char *str, *pos = NULL;
+       size_t len = 0;
+
+       if (orig_flags == NULL)
+               return NULL;
+
+       str = strtok_r(orig_flags, "|", &pos);
+       while (str) {
+               size_t i;
+               char *new;
+               const char *cpt;
+
+               for (i = 0; i < ARRAY_SIZE(gfp_compact_table); i++) {
+                       if (strcmp(gfp_compact_table[i].original, str))
+                               continue;
+
+                       cpt = gfp_compact_table[i].compact;
+                       new = realloc(new_flags, len + strlen(cpt) + 2);
+                       if (new == NULL) {
+                               free(new_flags);
+                               return NULL;
+                       }
+
+                       new_flags = new;
+
+                       if (!len) {
+                               strcpy(new_flags, cpt);
+                       } else {
+                               strcat(new_flags, "|");
+                               strcat(new_flags, cpt);
+                               len++;
+                       }
+
+                       len += strlen(cpt);
+               }
+
+               str = strtok_r(NULL, "|", &pos);
+       }
+
+       if (max_gfp_len < len)
+               max_gfp_len = len;
+
+       free(orig_flags);
+       return new_flags;
+}
+
+static char *compact_gfp_string(unsigned long gfp_flags)
+{
+       struct gfp_flag key = {
+               .flags = gfp_flags,
+       };
+       struct gfp_flag *gfp;
+
+       gfp = bsearch(&key, gfps, nr_gfps, sizeof(*gfps), gfpcmp);
+       if (gfp)
+               return gfp->compact_str;
+
+       return NULL;
+}
+
+static int parse_gfp_flags(struct perf_evsel *evsel, struct perf_sample *sample,
+                          unsigned int gfp_flags)
+{
+       struct pevent_record record = {
+               .cpu = sample->cpu,
+               .data = sample->raw_data,
+               .size = sample->raw_size,
+       };
+       struct trace_seq seq;
+       char *str, *pos = NULL;
+
+       if (nr_gfps) {
+               struct gfp_flag key = {
+                       .flags = gfp_flags,
+               };
+
+               if (bsearch(&key, gfps, nr_gfps, sizeof(*gfps), gfpcmp))
+                       return 0;
+       }
+
+       trace_seq_init(&seq);
+       pevent_event_info(&seq, evsel->tp_format, &record);
+
+       str = strtok_r(seq.buffer, " ", &pos);
+       while (str) {
+               if (!strncmp(str, "gfp_flags=", 10)) {
+                       struct gfp_flag *new;
+
+                       new = realloc(gfps, (nr_gfps + 1) * sizeof(*gfps));
+                       if (new == NULL)
+                               return -ENOMEM;
+
+                       gfps = new;
+                       new += nr_gfps++;
+
+                       new->flags = gfp_flags;
+                       new->human_readable = strdup(str + 10);
+                       new->compact_str = compact_gfp_flags(str + 10);
+                       if (!new->human_readable || !new->compact_str)
+                               return -ENOMEM;
+
+                       qsort(gfps, nr_gfps, sizeof(*gfps), gfpcmp);
+               }
+
+               str = strtok_r(NULL, " ", &pos);
+       }
+
+       trace_seq_destroy(&seq);
+       return 0;
+}
+
 static int perf_evsel__process_page_alloc_event(struct perf_evsel *evsel,
                                                struct perf_sample *sample)
 {
@@ -375,7 +764,8 @@ static int perf_evsel__process_page_alloc_event(struct perf_evsel *evsel,
        unsigned int migrate_type = perf_evsel__intval(evsel, sample,
                                                       "migratetype");
        u64 bytes = kmem_page_size << order;
-       struct page_stat *stat;
+       u64 callsite;
+       struct page_stat *pstat;
        struct page_stat this = {
                .order = order,
                .gfp_flags = gfp_flags,
@@ -397,25 +787,41 @@ static int perf_evsel__process_page_alloc_event(struct perf_evsel *evsel,
                return 0;
        }
 
+       if (parse_gfp_flags(evsel, sample, gfp_flags) < 0)
+               return -1;
+
+       callsite = find_callsite(evsel, sample);
+
        /*
         * This is to find the current page (with correct gfp flags and
         * migrate type) at free event.
         */
-       stat = search_page(page, true);
-       if (stat == NULL)
+       this.page = page;
+       pstat = page_stat__findnew_page(&this);
+       if (pstat == NULL)
                return -ENOMEM;
 
-       stat->order = order;
-       stat->gfp_flags = gfp_flags;
-       stat->migrate_type = migrate_type;
+       pstat->nr_alloc++;
+       pstat->alloc_bytes += bytes;
+       pstat->callsite = callsite;
 
-       this.page = page;
-       stat = search_page_alloc_stat(&this, true);
-       if (stat == NULL)
+       if (!live_page) {
+               pstat = page_stat__findnew_alloc(&this);
+               if (pstat == NULL)
+                       return -ENOMEM;
+
+               pstat->nr_alloc++;
+               pstat->alloc_bytes += bytes;
+               pstat->callsite = callsite;
+       }
+
+       this.callsite = callsite;
+       pstat = page_stat__findnew_caller(&this);
+       if (pstat == NULL)
                return -ENOMEM;
 
-       stat->nr_alloc++;
-       stat->alloc_bytes += bytes;
+       pstat->nr_alloc++;
+       pstat->alloc_bytes += bytes;
 
        order_stats[order][migrate_type]++;
 
@@ -428,7 +834,7 @@ static int perf_evsel__process_page_free_event(struct perf_evsel *evsel,
        u64 page;
        unsigned int order = perf_evsel__intval(evsel, sample, "order");
        u64 bytes = kmem_page_size << order;
-       struct page_stat *stat;
+       struct page_stat *pstat;
        struct page_stat this = {
                .order = order,
        };
@@ -441,8 +847,9 @@ static int perf_evsel__process_page_free_event(struct perf_evsel *evsel,
        nr_page_frees++;
        total_page_free_bytes += bytes;
 
-       stat = search_page(page, false);
-       if (stat == NULL) {
+       this.page = page;
+       pstat = page_stat__find_page(&this);
+       if (pstat == NULL) {
                pr_debug2("missing free at page %"PRIx64" (order: %d)\n",
                          page, order);
 
@@ -452,19 +859,40 @@ static int perf_evsel__process_page_free_event(struct perf_evsel *evsel,
                return 0;
        }
 
-       this.page = page;
-       this.gfp_flags = stat->gfp_flags;
-       this.migrate_type = stat->migrate_type;
+       this.gfp_flags = pstat->gfp_flags;
+       this.migrate_type = pstat->migrate_type;
+       this.callsite = pstat->callsite;
+
+       rb_erase(&pstat->node, &page_live_tree);
+       free(pstat);
 
-       rb_erase(&stat->node, &page_tree);
-       free(stat);
+       if (live_page) {
+               order_stats[this.order][this.migrate_type]--;
+       } else {
+               pstat = page_stat__find_alloc(&this);
+               if (pstat == NULL)
+                       return -ENOMEM;
+
+               pstat->nr_free++;
+               pstat->free_bytes += bytes;
+       }
 
-       stat = search_page_alloc_stat(&this, false);
-       if (stat == NULL)
+       pstat = page_stat__find_caller(&this);
+       if (pstat == NULL)
                return -ENOENT;
 
-       stat->nr_free++;
-       stat->free_bytes += bytes;
+       pstat->nr_free++;
+       pstat->free_bytes += bytes;
+
+       if (live_page) {
+               pstat->nr_alloc--;
+               pstat->alloc_bytes -= bytes;
+
+               if (pstat->nr_alloc == 0) {
+                       rb_erase(&pstat->node, &page_caller_tree);
+                       free(pstat);
+               }
+       }
 
        return 0;
 }
@@ -478,6 +906,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
                                struct perf_evsel *evsel,
                                struct machine *machine)
 {
+       int err = 0;
        struct thread *thread = machine__findnew_thread(machine, sample->pid,
                                                        sample->tid);
 
@@ -491,10 +920,12 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
 
        if (evsel->handler != NULL) {
                tracepoint_handler f = evsel->handler;
-               return f(evsel, sample);
+               err = f(evsel, sample);
        }
 
-       return 0;
+       thread__put(thread);
+
+       return err;
 }
 
 static struct perf_tool perf_kmem = {
@@ -576,41 +1007,111 @@ static const char * const migrate_type_str[] = {
        "UNKNOWN",
 };
 
-static void __print_page_result(struct rb_root *root,
-                               struct perf_session *session __maybe_unused,
-                               int n_lines)
+static void __print_page_alloc_result(struct perf_session *session, int n_lines)
 {
-       struct rb_node *next = rb_first(root);
+       struct rb_node *next = rb_first(&page_alloc_sorted);
+       struct machine *machine = &session->machines.host;
        const char *format;
+       int gfp_len = max(strlen("GFP flags"), max_gfp_len);
 
-       printf("\n%.80s\n", graph_dotted_line);
-       printf(" %-16s | Total alloc (KB) | Hits      | Order | Mig.type | GFP flags\n",
-              use_pfn ? "PFN" : "Page");
-       printf("%.80s\n", graph_dotted_line);
+       printf("\n%.105s\n", graph_dotted_line);
+       printf(" %-16s | %5s alloc (KB) | Hits      | Order | Mig.type | %-*s | Callsite\n",
+              use_pfn ? "PFN" : "Page", live_page ? "Live" : "Total",
+              gfp_len, "GFP flags");
+       printf("%.105s\n", graph_dotted_line);
 
        if (use_pfn)
-               format = " %16llu | %'16llu | %'9d | %5d | %8s |  %08lx\n";
+               format = " %16llu | %'16llu | %'9d | %5d | %8s | %-*s | %s\n";
        else
-               format = " %016llx | %'16llu | %'9d | %5d | %8s |  %08lx\n";
+               format = " %016llx | %'16llu | %'9d | %5d | %8s | %-*s | %s\n";
 
        while (next && n_lines--) {
                struct page_stat *data;
+               struct symbol *sym;
+               struct map *map;
+               char buf[32];
+               char *caller = buf;
 
                data = rb_entry(next, struct page_stat, node);
+               sym = machine__find_kernel_function(machine, data->callsite,
+                                                   &map, NULL);
+               if (sym && sym->name)
+                       caller = sym->name;
+               else
+                       scnprintf(buf, sizeof(buf), "%"PRIx64, data->callsite);
 
                printf(format, (unsigned long long)data->page,
                       (unsigned long long)data->alloc_bytes / 1024,
                       data->nr_alloc, data->order,
                       migrate_type_str[data->migrate_type],
-                      (unsigned long)data->gfp_flags);
+                      gfp_len, compact_gfp_string(data->gfp_flags), caller);
 
                next = rb_next(next);
        }
 
-       if (n_lines == -1)
-               printf(" ...              | ...              | ...       | ...   | ...      | ...     \n");
+       if (n_lines == -1) {
+               printf(" ...              | ...              | ...       | ...   | ...      | %-*s | ...\n",
+                      gfp_len, "...");
+       }
 
-       printf("%.80s\n", graph_dotted_line);
+       printf("%.105s\n", graph_dotted_line);
+}
+
+static void __print_page_caller_result(struct perf_session *session, int n_lines)
+{
+       struct rb_node *next = rb_first(&page_caller_sorted);
+       struct machine *machine = &session->machines.host;
+       int gfp_len = max(strlen("GFP flags"), max_gfp_len);
+
+       printf("\n%.105s\n", graph_dotted_line);
+       printf(" %5s alloc (KB) | Hits      | Order | Mig.type | %-*s | Callsite\n",
+              live_page ? "Live" : "Total", gfp_len, "GFP flags");
+       printf("%.105s\n", graph_dotted_line);
+
+       while (next && n_lines--) {
+               struct page_stat *data;
+               struct symbol *sym;
+               struct map *map;
+               char buf[32];
+               char *caller = buf;
+
+               data = rb_entry(next, struct page_stat, node);
+               sym = machine__find_kernel_function(machine, data->callsite,
+                                                   &map, NULL);
+               if (sym && sym->name)
+                       caller = sym->name;
+               else
+                       scnprintf(buf, sizeof(buf), "%"PRIx64, data->callsite);
+
+               printf(" %'16llu | %'9d | %5d | %8s | %-*s | %s\n",
+                      (unsigned long long)data->alloc_bytes / 1024,
+                      data->nr_alloc, data->order,
+                      migrate_type_str[data->migrate_type],
+                      gfp_len, compact_gfp_string(data->gfp_flags), caller);
+
+               next = rb_next(next);
+       }
+
+       if (n_lines == -1) {
+               printf(" ...              | ...       | ...   | ...      | %-*s | ...\n",
+                      gfp_len, "...");
+       }
+
+       printf("%.105s\n", graph_dotted_line);
+}
+
+static void print_gfp_flags(void)
+{
+       int i;
+
+       printf("#\n");
+       printf("# GFP flags\n");
+       printf("# ---------\n");
+       for (i = 0; i < nr_gfps; i++) {
+               printf("# %08x: %*s: %s\n", gfps[i].flags,
+                      (int) max_gfp_len, gfps[i].compact_str,
+                      gfps[i].human_readable);
+       }
 }
 
 static void print_slab_summary(void)
@@ -640,9 +1141,9 @@ static void print_page_summary(void)
               nr_page_frees, total_page_free_bytes / 1024);
        printf("\n");
 
-       printf("%-30s: %'16lu   [ %'16"PRIu64" KB ]\n", "Total alloc+freed requests",
+       printf("%-30s: %'16"PRIu64"   [ %'16"PRIu64" KB ]\n", "Total alloc+freed requests",
               nr_alloc_freed, (total_alloc_freed_bytes) / 1024);
-       printf("%-30s: %'16lu   [ %'16"PRIu64" KB ]\n", "Total alloc-only requests",
+       printf("%-30s: %'16"PRIu64"   [ %'16"PRIu64" KB ]\n", "Total alloc-only requests",
               nr_page_allocs - nr_alloc_freed,
               (total_page_alloc_bytes - total_alloc_freed_bytes) / 1024);
        printf("%-30s: %'16lu   [ %'16"PRIu64" KB ]\n", "Total free-only requests",
@@ -682,8 +1183,12 @@ static void print_slab_result(struct perf_session *session)
 
 static void print_page_result(struct perf_session *session)
 {
+       if (caller_flag || alloc_flag)
+               print_gfp_flags();
+       if (caller_flag)
+               __print_page_caller_result(session, caller_lines);
        if (alloc_flag)
-               __print_page_result(&page_alloc_sorted, session, alloc_lines);
+               __print_page_alloc_result(session, alloc_lines);
        print_page_summary();
 }
 
@@ -695,14 +1200,10 @@ static void print_result(struct perf_session *session)
                print_page_result(session);
 }
 
-struct sort_dimension {
-       const char              name[20];
-       sort_fn_t               cmp;
-       struct list_head        list;
-};
-
-static LIST_HEAD(caller_sort);
-static LIST_HEAD(alloc_sort);
+static LIST_HEAD(slab_caller_sort);
+static LIST_HEAD(slab_alloc_sort);
+static LIST_HEAD(page_caller_sort);
+static LIST_HEAD(page_alloc_sort);
 
 static void sort_slab_insert(struct rb_root *root, struct alloc_stat *data,
                             struct list_head *sort_list)
@@ -751,10 +1252,12 @@ static void __sort_slab_result(struct rb_root *root, struct rb_root *root_sorted
        }
 }
 
-static void sort_page_insert(struct rb_root *root, struct page_stat *data)
+static void sort_page_insert(struct rb_root *root, struct page_stat *data,
+                            struct list_head *sort_list)
 {
        struct rb_node **new = &root->rb_node;
        struct rb_node *parent = NULL;
+       struct sort_dimension *sort;
 
        while (*new) {
                struct page_stat *this;
@@ -763,8 +1266,11 @@ static void sort_page_insert(struct rb_root *root, struct page_stat *data)
                this = rb_entry(*new, struct page_stat, node);
                parent = *new;
 
-               /* TODO: support more sort key */
-               cmp = data->alloc_bytes - this->alloc_bytes;
+               list_for_each_entry(sort, sort_list, list) {
+                       cmp = sort->cmp(data, this);
+                       if (cmp)
+                               break;
+               }
 
                if (cmp > 0)
                        new = &parent->rb_left;
@@ -776,7 +1282,8 @@ static void sort_page_insert(struct rb_root *root, struct page_stat *data)
        rb_insert_color(&data->node, root);
 }
 
-static void __sort_page_result(struct rb_root *root, struct rb_root *root_sorted)
+static void __sort_page_result(struct rb_root *root, struct rb_root *root_sorted,
+                              struct list_head *sort_list)
 {
        struct rb_node *node;
        struct page_stat *data;
@@ -788,7 +1295,7 @@ static void __sort_page_result(struct rb_root *root, struct rb_root *root_sorted
 
                rb_erase(node, root);
                data = rb_entry(node, struct page_stat, node);
-               sort_page_insert(root_sorted, data);
+               sort_page_insert(root_sorted, data, sort_list);
        }
 }
 
@@ -796,12 +1303,20 @@ static void sort_result(void)
 {
        if (kmem_slab) {
                __sort_slab_result(&root_alloc_stat, &root_alloc_sorted,
-                                  &alloc_sort);
+                                  &slab_alloc_sort);
                __sort_slab_result(&root_caller_stat, &root_caller_sorted,
-                                  &caller_sort);
+                                  &slab_caller_sort);
        }
        if (kmem_page) {
-               __sort_page_result(&page_alloc_tree, &page_alloc_sorted);
+               if (live_page)
+                       __sort_page_result(&page_live_tree, &page_alloc_sorted,
+                                          &page_alloc_sort);
+               else
+                       __sort_page_result(&page_alloc_tree, &page_alloc_sorted,
+                                          &page_alloc_sort);
+
+               __sort_page_result(&page_caller_tree, &page_caller_sorted,
+                                  &page_caller_sort);
        }
 }
 
@@ -850,8 +1365,12 @@ out:
        return err;
 }
 
-static int ptr_cmp(struct alloc_stat *l, struct alloc_stat *r)
+/* slab sort keys */
+static int ptr_cmp(void *a, void *b)
 {
+       struct alloc_stat *l = a;
+       struct alloc_stat *r = b;
+
        if (l->ptr < r->ptr)
                return -1;
        else if (l->ptr > r->ptr)
@@ -864,8 +1383,11 @@ static struct sort_dimension ptr_sort_dimension = {
        .cmp    = ptr_cmp,
 };
 
-static int callsite_cmp(struct alloc_stat *l, struct alloc_stat *r)
+static int slab_callsite_cmp(void *a, void *b)
 {
+       struct alloc_stat *l = a;
+       struct alloc_stat *r = b;
+
        if (l->call_site < r->call_site)
                return -1;
        else if (l->call_site > r->call_site)
@@ -875,11 +1397,14 @@ static int callsite_cmp(struct alloc_stat *l, struct alloc_stat *r)
 
 static struct sort_dimension callsite_sort_dimension = {
        .name   = "callsite",
-       .cmp    = callsite_cmp,
+       .cmp    = slab_callsite_cmp,
 };
 
-static int hit_cmp(struct alloc_stat *l, struct alloc_stat *r)
+static int hit_cmp(void *a, void *b)
 {
+       struct alloc_stat *l = a;
+       struct alloc_stat *r = b;
+
        if (l->hit < r->hit)
                return -1;
        else if (l->hit > r->hit)
@@ -892,8 +1417,11 @@ static struct sort_dimension hit_sort_dimension = {
        .cmp    = hit_cmp,
 };
 
-static int bytes_cmp(struct alloc_stat *l, struct alloc_stat *r)
+static int bytes_cmp(void *a, void *b)
 {
+       struct alloc_stat *l = a;
+       struct alloc_stat *r = b;
+
        if (l->bytes_alloc < r->bytes_alloc)
                return -1;
        else if (l->bytes_alloc > r->bytes_alloc)
@@ -906,9 +1434,11 @@ static struct sort_dimension bytes_sort_dimension = {
        .cmp    = bytes_cmp,
 };
 
-static int frag_cmp(struct alloc_stat *l, struct alloc_stat *r)
+static int frag_cmp(void *a, void *b)
 {
        double x, y;
+       struct alloc_stat *l = a;
+       struct alloc_stat *r = b;
 
        x = fragmentation(l->bytes_req, l->bytes_alloc);
        y = fragmentation(r->bytes_req, r->bytes_alloc);
@@ -925,8 +1455,11 @@ static struct sort_dimension frag_sort_dimension = {
        .cmp    = frag_cmp,
 };
 
-static int pingpong_cmp(struct alloc_stat *l, struct alloc_stat *r)
+static int pingpong_cmp(void *a, void *b)
 {
+       struct alloc_stat *l = a;
+       struct alloc_stat *r = b;
+
        if (l->pingpong < r->pingpong)
                return -1;
        else if (l->pingpong > r->pingpong)
@@ -939,7 +1472,135 @@ static struct sort_dimension pingpong_sort_dimension = {
        .cmp    = pingpong_cmp,
 };
 
-static struct sort_dimension *avail_sorts[] = {
+/* page sort keys */
+static int page_cmp(void *a, void *b)
+{
+       struct page_stat *l = a;
+       struct page_stat *r = b;
+
+       if (l->page < r->page)
+               return -1;
+       else if (l->page > r->page)
+               return 1;
+       return 0;
+}
+
+static struct sort_dimension page_sort_dimension = {
+       .name   = "page",
+       .cmp    = page_cmp,
+};
+
+static int page_callsite_cmp(void *a, void *b)
+{
+       struct page_stat *l = a;
+       struct page_stat *r = b;
+
+       if (l->callsite < r->callsite)
+               return -1;
+       else if (l->callsite > r->callsite)
+               return 1;
+       return 0;
+}
+
+static struct sort_dimension page_callsite_sort_dimension = {
+       .name   = "callsite",
+       .cmp    = page_callsite_cmp,
+};
+
+static int page_hit_cmp(void *a, void *b)
+{
+       struct page_stat *l = a;
+       struct page_stat *r = b;
+
+       if (l->nr_alloc < r->nr_alloc)
+               return -1;
+       else if (l->nr_alloc > r->nr_alloc)
+               return 1;
+       return 0;
+}
+
+static struct sort_dimension page_hit_sort_dimension = {
+       .name   = "hit",
+       .cmp    = page_hit_cmp,
+};
+
+static int page_bytes_cmp(void *a, void *b)
+{
+       struct page_stat *l = a;
+       struct page_stat *r = b;
+
+       if (l->alloc_bytes < r->alloc_bytes)
+               return -1;
+       else if (l->alloc_bytes > r->alloc_bytes)
+               return 1;
+       return 0;
+}
+
+static struct sort_dimension page_bytes_sort_dimension = {
+       .name   = "bytes",
+       .cmp    = page_bytes_cmp,
+};
+
+static int page_order_cmp(void *a, void *b)
+{
+       struct page_stat *l = a;
+       struct page_stat *r = b;
+
+       if (l->order < r->order)
+               return -1;
+       else if (l->order > r->order)
+               return 1;
+       return 0;
+}
+
+static struct sort_dimension page_order_sort_dimension = {
+       .name   = "order",
+       .cmp    = page_order_cmp,
+};
+
+static int migrate_type_cmp(void *a, void *b)
+{
+       struct page_stat *l = a;
+       struct page_stat *r = b;
+
+       /* for internal use to find free'd page */
+       if (l->migrate_type == -1U)
+               return 0;
+
+       if (l->migrate_type < r->migrate_type)
+               return -1;
+       else if (l->migrate_type > r->migrate_type)
+               return 1;
+       return 0;
+}
+
+static struct sort_dimension migrate_type_sort_dimension = {
+       .name   = "migtype",
+       .cmp    = migrate_type_cmp,
+};
+
+static int gfp_flags_cmp(void *a, void *b)
+{
+       struct page_stat *l = a;
+       struct page_stat *r = b;
+
+       /* for internal use to find free'd page */
+       if (l->gfp_flags == -1U)
+               return 0;
+
+       if (l->gfp_flags < r->gfp_flags)
+               return -1;
+       else if (l->gfp_flags > r->gfp_flags)
+               return 1;
+       return 0;
+}
+
+static struct sort_dimension gfp_flags_sort_dimension = {
+       .name   = "gfp",
+       .cmp    = gfp_flags_cmp,
+};
+
+static struct sort_dimension *slab_sorts[] = {
        &ptr_sort_dimension,
        &callsite_sort_dimension,
        &hit_sort_dimension,
@@ -948,16 +1609,44 @@ static struct sort_dimension *avail_sorts[] = {
        &pingpong_sort_dimension,
 };
 
-#define NUM_AVAIL_SORTS        ((int)ARRAY_SIZE(avail_sorts))
+static struct sort_dimension *page_sorts[] = {
+       &page_sort_dimension,
+       &page_callsite_sort_dimension,
+       &page_hit_sort_dimension,
+       &page_bytes_sort_dimension,
+       &page_order_sort_dimension,
+       &migrate_type_sort_dimension,
+       &gfp_flags_sort_dimension,
+};
+
+static int slab_sort_dimension__add(const char *tok, struct list_head *list)
+{
+       struct sort_dimension *sort;
+       int i;
+
+       for (i = 0; i < (int)ARRAY_SIZE(slab_sorts); i++) {
+               if (!strcmp(slab_sorts[i]->name, tok)) {
+                       sort = memdup(slab_sorts[i], sizeof(*slab_sorts[i]));
+                       if (!sort) {
+                               pr_err("%s: memdup failed\n", __func__);
+                               return -1;
+                       }
+                       list_add_tail(&sort->list, list);
+                       return 0;
+               }
+       }
+
+       return -1;
+}
 
-static int sort_dimension__add(const char *tok, struct list_head *list)
+static int page_sort_dimension__add(const char *tok, struct list_head *list)
 {
        struct sort_dimension *sort;
        int i;
 
-       for (i = 0; i < NUM_AVAIL_SORTS; i++) {
-               if (!strcmp(avail_sorts[i]->name, tok)) {
-                       sort = memdup(avail_sorts[i], sizeof(*avail_sorts[i]));
+       for (i = 0; i < (int)ARRAY_SIZE(page_sorts); i++) {
+               if (!strcmp(page_sorts[i]->name, tok)) {
+                       sort = memdup(page_sorts[i], sizeof(*page_sorts[i]));
                        if (!sort) {
                                pr_err("%s: memdup failed\n", __func__);
                                return -1;
@@ -970,7 +1659,7 @@ static int sort_dimension__add(const char *tok, struct list_head *list)
        return -1;
 }
 
-static int setup_sorting(struct list_head *sort_list, const char *arg)
+static int setup_slab_sorting(struct list_head *sort_list, const char *arg)
 {
        char *tok;
        char *str = strdup(arg);
@@ -985,8 +1674,34 @@ static int setup_sorting(struct list_head *sort_list, const char *arg)
                tok = strsep(&pos, ",");
                if (!tok)
                        break;
-               if (sort_dimension__add(tok, sort_list) < 0) {
-                       error("Unknown --sort key: '%s'", tok);
+               if (slab_sort_dimension__add(tok, sort_list) < 0) {
+                       error("Unknown slab --sort key: '%s'", tok);
+                       free(str);
+                       return -1;
+               }
+       }
+
+       free(str);
+       return 0;
+}
+
+static int setup_page_sorting(struct list_head *sort_list, const char *arg)
+{
+       char *tok;
+       char *str = strdup(arg);
+       char *pos = str;
+
+       if (!str) {
+               pr_err("%s: strdup failed\n", __func__);
+               return -1;
+       }
+
+       while (true) {
+               tok = strsep(&pos, ",");
+               if (!tok)
+                       break;
+               if (page_sort_dimension__add(tok, sort_list) < 0) {
+                       error("Unknown page --sort key: '%s'", tok);
                        free(str);
                        return -1;
                }
@@ -1002,10 +1717,18 @@ static int parse_sort_opt(const struct option *opt __maybe_unused,
        if (!arg)
                return -1;
 
-       if (caller_flag > alloc_flag)
-               return setup_sorting(&caller_sort, arg);
-       else
-               return setup_sorting(&alloc_sort, arg);
+       if (kmem_page > kmem_slab ||
+           (kmem_page == 0 && kmem_slab == 0 && kmem_default == KMEM_PAGE)) {
+               if (caller_flag > alloc_flag)
+                       return setup_page_sorting(&page_caller_sort, arg);
+               else
+                       return setup_page_sorting(&page_alloc_sort, arg);
+       } else {
+               if (caller_flag > alloc_flag)
+                       return setup_slab_sorting(&slab_caller_sort, arg);
+               else
+                       return setup_slab_sorting(&slab_alloc_sort, arg);
+       }
 
        return 0;
 }
@@ -1084,7 +1807,7 @@ static int __cmd_record(int argc, const char **argv)
        if (kmem_slab)
                rec_argc += ARRAY_SIZE(slab_events);
        if (kmem_page)
-               rec_argc += ARRAY_SIZE(page_events);
+               rec_argc += ARRAY_SIZE(page_events) + 1; /* for -g */
 
        rec_argv = calloc(rec_argc + 1, sizeof(char *));
 
@@ -1099,6 +1822,8 @@ static int __cmd_record(int argc, const char **argv)
                        rec_argv[i] = strdup(slab_events[j]);
        }
        if (kmem_page) {
+               rec_argv[i++] = strdup("-g");
+
                for (j = 0; j < ARRAY_SIZE(page_events); j++, i++)
                        rec_argv[i] = strdup(page_events[j]);
        }
@@ -1109,9 +1834,26 @@ static int __cmd_record(int argc, const char **argv)
        return cmd_record(i, rec_argv, NULL);
 }
 
+static int kmem_config(const char *var, const char *value, void *cb)
+{
+       if (!strcmp(var, "kmem.default")) {
+               if (!strcmp(value, "slab"))
+                       kmem_default = KMEM_SLAB;
+               else if (!strcmp(value, "page"))
+                       kmem_default = KMEM_PAGE;
+               else
+                       pr_err("invalid default value ('slab' or 'page' required): %s\n",
+                              value);
+               return 0;
+       }
+
+       return perf_default_config(var, value, cb);
+}
+
 int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
 {
-       const char * const default_sort_order = "frag,hit,bytes";
+       const char * const default_slab_sort = "frag,hit,bytes";
+       const char * const default_page_sort = "bytes,hit";
        struct perf_data_file file = {
                .mode = PERF_DATA_MODE_READ,
        };
@@ -1124,8 +1866,8 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
        OPT_CALLBACK_NOOPT(0, "alloc", NULL, NULL,
                           "show per-allocation statistics", parse_alloc_opt),
        OPT_CALLBACK('s', "sort", NULL, "key[,key2...]",
-                    "sort by keys: ptr, call_site, bytes, hit, pingpong, frag",
-                    parse_sort_opt),
+                    "sort by keys: ptr, callsite, bytes, hit, pingpong, frag, "
+                    "page, order, migtype, gfp", parse_sort_opt),
        OPT_CALLBACK('l', "line", NULL, "num", "show n lines", parse_line_opt),
        OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"),
        OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"),
@@ -1133,6 +1875,7 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
                           parse_slab_opt),
        OPT_CALLBACK_NOOPT(0, "page", NULL, NULL, "Analyze page allocator",
                           parse_page_opt),
+       OPT_BOOLEAN(0, "live", &live_page, "Show live page stat"),
        OPT_END()
        };
        const char *const kmem_subcommands[] = { "record", "stat", NULL };
@@ -1142,15 +1885,21 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
        };
        struct perf_session *session;
        int ret = -1;
+       const char errmsg[] = "No %s allocation events found.  Have you run 'perf kmem record --%s'?\n";
 
+       perf_config(kmem_config, NULL);
        argc = parse_options_subcommand(argc, argv, kmem_options,
                                        kmem_subcommands, kmem_usage, 0);
 
        if (!argc)
                usage_with_options(kmem_usage, kmem_options);
 
-       if (kmem_slab == 0 && kmem_page == 0)
-               kmem_slab = 1;  /* for backward compatibility */
+       if (kmem_slab == 0 && kmem_page == 0) {
+               if (kmem_default == KMEM_SLAB)
+                       kmem_slab = 1;
+               else
+                       kmem_page = 1;
+       }
 
        if (!strncmp(argv[0], "rec", 3)) {
                symbol__init(NULL);
@@ -1159,19 +1908,30 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
 
        file.path = input_name;
 
-       session = perf_session__new(&file, false, &perf_kmem);
+       kmem_session = session = perf_session__new(&file, false, &perf_kmem);
        if (session == NULL)
                return -1;
 
+       if (kmem_slab) {
+               if (!perf_evlist__find_tracepoint_by_name(session->evlist,
+                                                         "kmem:kmalloc")) {
+                       pr_err(errmsg, "slab", "slab");
+                       return -1;
+               }
+       }
+
        if (kmem_page) {
-               struct perf_evsel *evsel = perf_evlist__first(session->evlist);
+               struct perf_evsel *evsel;
 
-               if (evsel == NULL || evsel->tp_format == NULL) {
-                       pr_err("invalid event found.. aborting\n");
+               evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
+                                                            "kmem:mm_page_alloc");
+               if (evsel == NULL) {
+                       pr_err(errmsg, "page", "page");
                        return -1;
                }
 
                kmem_page_size = pevent_get_page_size(evsel->tp_format->pevent);
+               symbol_conf.use_callchain = true;
        }
 
        symbol__init(&session->header.env);
@@ -1182,11 +1942,21 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
                if (cpu__setup_cpunode_map())
                        goto out_delete;
 
-               if (list_empty(&caller_sort))
-                       setup_sorting(&caller_sort, default_sort_order);
-               if (list_empty(&alloc_sort))
-                       setup_sorting(&alloc_sort, default_sort_order);
-
+               if (list_empty(&slab_caller_sort))
+                       setup_slab_sorting(&slab_caller_sort, default_slab_sort);
+               if (list_empty(&slab_alloc_sort))
+                       setup_slab_sorting(&slab_alloc_sort, default_slab_sort);
+               if (list_empty(&page_caller_sort))
+                       setup_page_sorting(&page_caller_sort, default_page_sort);
+               if (list_empty(&page_alloc_sort))
+                       setup_page_sorting(&page_alloc_sort, default_page_sort);
+
+               if (kmem_page) {
+                       setup_page_sorting(&page_alloc_sort_input,
+                                          "page,order,migtype,gfp");
+                       setup_page_sorting(&page_caller_sort_input,
+                                          "callsite,order,migtype,gfp");
+               }
                ret = __cmd_kmem(session);
        } else
                usage_with_options(kmem_usage, kmem_options);
index 1f9338f..74878cd 100644 (file)
@@ -651,6 +651,7 @@ static int process_sample_event(struct perf_tool *tool,
                                struct perf_evsel *evsel,
                                struct machine *machine)
 {
+       int err = 0;
        struct thread *thread;
        struct perf_kvm_stat *kvm = container_of(tool, struct perf_kvm_stat,
                                                 tool);
@@ -666,9 +667,10 @@ static int process_sample_event(struct perf_tool *tool,
        }
 
        if (!handle_kvm_event(kvm, thread, evsel, sample))
-               return -1;
+               err = -1;
 
-       return 0;
+       thread__put(thread);
+       return err;
 }
 
 static int cpu_isa_config(struct perf_kvm_stat *kvm)
@@ -1309,6 +1311,8 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
                        "show events other than"
                        " HLT (x86 only) or Wait state (s390 only)"
                        " that take longer than duration usecs"),
+               OPT_UINTEGER(0, "proc-map-timeout", &kvm->opts.proc_map_timeout,
+                               "per thread proc mmap processing timeout in ms"),
                OPT_END()
        };
        const char * const live_usage[] = {
@@ -1336,6 +1340,7 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
        kvm->opts.target.uses_mmap = false;
        kvm->opts.target.uid_str = NULL;
        kvm->opts.target.uid = UINT_MAX;
+       kvm->opts.proc_map_timeout = 500;
 
        symbol__init(NULL);
        disable_buildid_cache();
@@ -1391,7 +1396,7 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
        perf_session__set_id_hdr_size(kvm->session);
        ordered_events__set_copy_on_queue(&kvm->session->ordered_events, true);
        machine__synthesize_threads(&kvm->session->machines.host, &kvm->opts.target,
-                                   kvm->evlist->threads, false);
+                                   kvm->evlist->threads, false, kvm->opts.proc_map_timeout);
        err = kvm_live_open_events(kvm);
        if (err)
                goto out;
index d49c2ab..de16aae 100644 (file)
@@ -769,6 +769,7 @@ static void dump_threads(void)
                t = perf_session__findnew(session, st->tid);
                pr_info("%10d: %s\n", st->tid, thread__comm_str(t));
                node = rb_next(node);
+               thread__put(t);
        };
 }
 
@@ -810,6 +811,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
                                struct perf_evsel *evsel,
                                struct machine *machine)
 {
+       int err = 0;
        struct thread *thread = machine__findnew_thread(machine, sample->pid,
                                                        sample->tid);
 
@@ -821,10 +823,12 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
 
        if (evsel->handler != NULL) {
                tracepoint_handler f = evsel->handler;
-               return f(evsel, sample);
+               err = f(evsel, sample);
        }
 
-       return 0;
+       thread__put(thread);
+
+       return err;
 }
 
 static void sort_result(void)
index 675216e..da2ec06 100644 (file)
@@ -74,7 +74,7 @@ dump_raw_samples(struct perf_tool *tool,
        }
 
        if (al.filtered || (mem->hide_unresolved && al.sym == NULL))
-               return 0;
+               goto out_put;
 
        if (al.map != NULL)
                al.map->dso->hit = 1;
@@ -103,7 +103,8 @@ dump_raw_samples(struct perf_tool *tool,
                symbol_conf.field_sep,
                al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???",
                al.sym ? al.sym->name : "???");
-
+out_put:
+       addr_location__put(&al);
        return 0;
 }
 
index f7b1af6..1272559 100644 (file)
 
 #define DEFAULT_VAR_FILTER "!__k???tab_* & !__crc_*"
 #define DEFAULT_FUNC_FILTER "!_*"
+#define DEFAULT_LIST_FILTER "*:*"
 
 /* Session management structure */
 static struct {
+       int command;    /* Command short_name */
        bool list_events;
-       bool force_add;
-       bool show_lines;
-       bool show_vars;
-       bool show_ext_vars;
-       bool show_funcs;
-       bool mod_events;
        bool uprobes;
        bool quiet;
        bool target_used;
        int nevents;
        struct perf_probe_event events[MAX_PROBES];
-       struct strlist *dellist;
        struct line_range line_range;
        char *target;
-       int max_probe_points;
        struct strfilter *filter;
 } params;
 
@@ -93,6 +87,28 @@ static int parse_probe_event(const char *str)
        return ret;
 }
 
+static int params_add_filter(const char *str)
+{
+       const char *err = NULL;
+       int ret = 0;
+
+       pr_debug2("Add filter: %s\n", str);
+       if (!params.filter) {
+               params.filter = strfilter__new(str, &err);
+               if (!params.filter)
+                       ret = err ? -EINVAL : -ENOMEM;
+       } else
+               ret = strfilter__or(params.filter, str, &err);
+
+       if (ret == -EINVAL) {
+               pr_err("Filter parse error at %td.\n", err - str + 1);
+               pr_err("Source: \"%s\"\n", str);
+               pr_err("         %*c\n", (int)(err - str + 1), '^');
+       }
+
+       return ret;
+}
+
 static int set_target(const char *ptr)
 {
        int found = 0;
@@ -152,34 +168,11 @@ static int parse_probe_event_argv(int argc, const char **argv)
 
                len += sprintf(&buf[len], "%s ", argv[i]);
        }
-       params.mod_events = true;
        ret = parse_probe_event(buf);
        free(buf);
        return ret;
 }
 
-static int opt_add_probe_event(const struct option *opt __maybe_unused,
-                             const char *str, int unset __maybe_unused)
-{
-       if (str) {
-               params.mod_events = true;
-               return parse_probe_event(str);
-       } else
-               return 0;
-}
-
-static int opt_del_probe_event(const struct option *opt __maybe_unused,
-                              const char *str, int unset __maybe_unused)
-{
-       if (str) {
-               params.mod_events = true;
-               if (!params.dellist)
-                       params.dellist = strlist__new(true, NULL);
-               strlist__add(params.dellist, str);
-       }
-       return 0;
-}
-
 static int opt_set_target(const struct option *opt, const char *str,
                        int unset __maybe_unused)
 {
@@ -217,8 +210,10 @@ static int opt_set_target(const struct option *opt, const char *str,
        return ret;
 }
 
+/* Command option callbacks */
+
 #ifdef HAVE_DWARF_SUPPORT
-static int opt_show_lines(const struct option *opt __maybe_unused,
+static int opt_show_lines(const struct option *opt,
                          const char *str, int unset __maybe_unused)
 {
        int ret = 0;
@@ -226,19 +221,19 @@ static int opt_show_lines(const struct option *opt __maybe_unused,
        if (!str)
                return 0;
 
-       if (params.show_lines) {
+       if (params.command == 'L') {
                pr_warning("Warning: more than one --line options are"
                           " detected. Only the first one is valid.\n");
                return 0;
        }
 
-       params.show_lines = true;
+       params.command = opt->short_name;
        ret = parse_line_range_desc(str, &params.line_range);
 
        return ret;
 }
 
-static int opt_show_vars(const struct option *opt __maybe_unused,
+static int opt_show_vars(const struct option *opt,
                         const char *str, int unset __maybe_unused)
 {
        struct perf_probe_event *pev = &params.events[params.nevents];
@@ -252,29 +247,39 @@ static int opt_show_vars(const struct option *opt __maybe_unused,
                pr_err("  Error: '--vars' doesn't accept arguments.\n");
                return -EINVAL;
        }
-       params.show_vars = true;
+       params.command = opt->short_name;
 
        return ret;
 }
 #endif
+static int opt_add_probe_event(const struct option *opt,
+                             const char *str, int unset __maybe_unused)
+{
+       if (str) {
+               params.command = opt->short_name;
+               return parse_probe_event(str);
+       }
+
+       return 0;
+}
+
+static int opt_set_filter_with_command(const struct option *opt,
+                                      const char *str, int unset)
+{
+       if (!unset)
+               params.command = opt->short_name;
+
+       if (str)
+               return params_add_filter(str);
+
+       return 0;
+}
 
 static int opt_set_filter(const struct option *opt __maybe_unused,
                          const char *str, int unset __maybe_unused)
 {
-       const char *err;
-
-       if (str) {
-               pr_debug2("Set filter: %s\n", str);
-               if (params.filter)
-                       strfilter__delete(params.filter);
-               params.filter = strfilter__new(str, &err);
-               if (!params.filter) {
-                       pr_err("Filter parse error at %td.\n", err - str + 1);
-                       pr_err("Source: \"%s\"\n", str);
-                       pr_err("         %*c\n", (int)(err - str + 1), '^');
-                       return -EINVAL;
-               }
-       }
+       if (str)
+               return params_add_filter(str);
 
        return 0;
 }
@@ -290,8 +295,6 @@ static void cleanup_params(void)
 
        for (i = 0; i < params.nevents; i++)
                clear_perf_probe_event(params.events + i);
-       if (params.dellist)
-               strlist__delete(params.dellist);
        line_range__clear(&params.line_range);
        free(params.target);
        if (params.filter)
@@ -316,22 +319,24 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
                "perf probe [<options>] 'PROBEDEF' ['PROBEDEF' ...]",
                "perf probe [<options>] --add 'PROBEDEF' [--add 'PROBEDEF' ...]",
                "perf probe [<options>] --del '[GROUP:]EVENT' ...",
-               "perf probe --list",
+               "perf probe --list [GROUP:]EVENT ...",
 #ifdef HAVE_DWARF_SUPPORT
                "perf probe [<options>] --line 'LINEDESC'",
                "perf probe [<options>] --vars 'PROBEPOINT'",
 #endif
+               "perf probe [<options>] --funcs",
                NULL
-};
+       };
        struct option options[] = {
        OPT_INCR('v', "verbose", &verbose,
                    "be more verbose (show parsed arguments, etc)"),
        OPT_BOOLEAN('q', "quiet", &params.quiet,
                    "be quiet (do not show any mesages)"),
-       OPT_BOOLEAN('l', "list", &params.list_events,
-                   "list up current probe events"),
+       OPT_CALLBACK_DEFAULT('l', "list", NULL, "[GROUP:]EVENT",
+                            "list up probe events",
+                            opt_set_filter_with_command, DEFAULT_LIST_FILTER),
        OPT_CALLBACK('d', "del", NULL, "[GROUP:]EVENT", "delete a probe event.",
-               opt_del_probe_event),
+                    opt_set_filter_with_command),
        OPT_CALLBACK('a', "add", NULL,
 #ifdef HAVE_DWARF_SUPPORT
                "[EVENT=]FUNC[@SRC][+OFF|%return|:RL|;PT]|SRC:AL|SRC;PT"
@@ -356,7 +361,7 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
                "\t\tARG:\tProbe argument (kprobe-tracer argument format.)\n",
 #endif
                opt_add_probe_event),
-       OPT_BOOLEAN('f', "force", &params.force_add, "forcibly add events"
+       OPT_BOOLEAN('f', "force", &probe_conf.force_add, "forcibly add events"
                    " with existing name"),
 #ifdef HAVE_DWARF_SUPPORT
        OPT_CALLBACK('L', "line", NULL,
@@ -365,8 +370,10 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
        OPT_CALLBACK('V', "vars", NULL,
                     "FUNC[@SRC][+OFF|%return|:RL|;PT]|SRC:AL|SRC;PT",
                     "Show accessible variables on PROBEDEF", opt_show_vars),
-       OPT_BOOLEAN('\0', "externs", &params.show_ext_vars,
+       OPT_BOOLEAN('\0', "externs", &probe_conf.show_ext_vars,
                    "Show external variables too (with --vars only)"),
+       OPT_BOOLEAN('\0', "range", &probe_conf.show_location_range,
+               "Show variables location range in scope (with --vars only)"),
        OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
                   "file", "vmlinux pathname"),
        OPT_STRING('s', "source", &symbol_conf.source_prefix,
@@ -374,12 +381,15 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
        OPT_CALLBACK('m', "module", NULL, "modname|path",
                "target module name (for online) or path (for offline)",
                opt_set_target),
+       OPT_BOOLEAN('\0', "no-inlines", &probe_conf.no_inlines,
+               "Don't search inlined functions"),
 #endif
        OPT__DRY_RUN(&probe_event_dry_run),
-       OPT_INTEGER('\0', "max-probes", &params.max_probe_points,
+       OPT_INTEGER('\0', "max-probes", &probe_conf.max_probes,
                 "Set how many probe points can be found for a probe."),
-       OPT_BOOLEAN('F', "funcs", &params.show_funcs,
-                   "Show potential probe-able functions."),
+       OPT_CALLBACK_DEFAULT('F', "funcs", NULL, "[FILTER]",
+                            "Show potential probe-able functions.",
+                            opt_set_filter_with_command, DEFAULT_FUNC_FILTER),
        OPT_CALLBACK('\0', "filter", NULL,
                     "[!]FILTER", "Set a filter (with --vars/funcs only)\n"
                     "\t\t\t(default: \"" DEFAULT_VAR_FILTER "\" for --vars,\n"
@@ -402,6 +412,7 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
        set_option_flag(options, 'L', "line", PARSE_OPT_EXCLUSIVE);
        set_option_flag(options, 'V', "vars", PARSE_OPT_EXCLUSIVE);
 #endif
+       set_option_flag(options, 'F', "funcs", PARSE_OPT_EXCLUSIVE);
 
        argc = parse_options(argc, argv, options, probe_usage,
                             PARSE_OPT_STOP_AT_NON_OPTION);
@@ -410,11 +421,16 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
                        pr_warning("  Error: '-' is not supported.\n");
                        usage_with_options(probe_usage, options);
                }
+               if (params.command && params.command != 'a') {
+                       pr_warning("  Error: another command except --add is set.\n");
+                       usage_with_options(probe_usage, options);
+               }
                ret = parse_probe_event_argv(argc, argv);
                if (ret < 0) {
                        pr_err_with_code("  Error: Command Parse Error.", ret);
                        return ret;
                }
+               params.command = 'a';
        }
 
        if (params.quiet) {
@@ -425,89 +441,70 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
                verbose = -1;
        }
 
-       if (params.max_probe_points == 0)
-               params.max_probe_points = MAX_PROBES;
-
-       if ((!params.nevents && !params.dellist && !params.list_events &&
-            !params.show_lines && !params.show_funcs))
-               usage_with_options(probe_usage, options);
+       if (probe_conf.max_probes == 0)
+               probe_conf.max_probes = MAX_PROBES;
 
        /*
         * Only consider the user's kernel image path if given.
         */
        symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
 
-       if (params.list_events) {
+       switch (params.command) {
+       case 'l':
                if (params.uprobes) {
                        pr_warning("  Error: Don't use --list with --exec.\n");
                        usage_with_options(probe_usage, options);
                }
-               ret = show_perf_probe_events();
+               ret = show_perf_probe_events(params.filter);
                if (ret < 0)
                        pr_err_with_code("  Error: Failed to show event list.", ret);
                return ret;
-       }
-       if (params.show_funcs) {
-               if (!params.filter)
-                       params.filter = strfilter__new(DEFAULT_FUNC_FILTER,
-                                                      NULL);
+       case 'F':
                ret = show_available_funcs(params.target, params.filter,
                                        params.uprobes);
-               strfilter__delete(params.filter);
-               params.filter = NULL;
                if (ret < 0)
                        pr_err_with_code("  Error: Failed to show functions.", ret);
                return ret;
-       }
-
 #ifdef HAVE_DWARF_SUPPORT
-       if (params.show_lines) {
+       case 'L':
                ret = show_line_range(&params.line_range, params.target,
                                      params.uprobes);
                if (ret < 0)
                        pr_err_with_code("  Error: Failed to show lines.", ret);
                return ret;
-       }
-       if (params.show_vars) {
+       case 'V':
                if (!params.filter)
                        params.filter = strfilter__new(DEFAULT_VAR_FILTER,
                                                       NULL);
 
                ret = show_available_vars(params.events, params.nevents,
-                                         params.max_probe_points,
-                                         params.target,
-                                         params.filter,
-                                         params.show_ext_vars);
-               strfilter__delete(params.filter);
-               params.filter = NULL;
+                                         params.filter);
                if (ret < 0)
                        pr_err_with_code("  Error: Failed to show vars.", ret);
                return ret;
-       }
 #endif
-
-       if (params.dellist) {
-               ret = del_perf_probe_events(params.dellist);
+       case 'd':
+               ret = del_perf_probe_events(params.filter);
                if (ret < 0) {
                        pr_err_with_code("  Error: Failed to delete events.", ret);
                        return ret;
                }
-       }
-
-       if (params.nevents) {
+               break;
+       case 'a':
                /* Ensure the last given target is used */
                if (params.target && !params.target_used) {
                        pr_warning("  Error: -x/-m must follow the probe definitions.\n");
                        usage_with_options(probe_usage, options);
                }
 
-               ret = add_perf_probe_events(params.events, params.nevents,
-                                           params.max_probe_points,
-                                           params.force_add);
+               ret = add_perf_probe_events(params.events, params.nevents);
                if (ret < 0) {
                        pr_err_with_code("  Error: Failed to add events.", ret);
                        return ret;
                }
+               break;
+       default:
+               usage_with_options(probe_usage, options);
        }
        return 0;
 }
@@ -522,5 +519,5 @@ int cmd_probe(int argc, const char **argv, const char *prefix)
                cleanup_params();
        }
 
-       return ret;
+       return ret < 0 ? ret : 0;
 }
index c3efdfb..de165a1 100644 (file)
@@ -27,6 +27,8 @@
 #include "util/cpumap.h"
 #include "util/thread_map.h"
 #include "util/data.h"
+#include "util/auxtrace.h"
+#include "util/parse-branch-options.h"
 
 #include <unistd.h>
 #include <sched.h>
@@ -38,6 +40,7 @@ struct record {
        struct record_opts      opts;
        u64                     bytes_written;
        struct perf_data_file   file;
+       struct auxtrace_record  *itr;
        struct perf_evlist      *evlist;
        struct perf_session     *session;
        const char              *progname;
@@ -110,9 +113,12 @@ out:
        return rc;
 }
 
-static volatile int done = 0;
+static volatile int done;
 static volatile int signr = -1;
-static volatile int child_finished = 0;
+static volatile int child_finished;
+static volatile int auxtrace_snapshot_enabled;
+static volatile int auxtrace_snapshot_err;
+static volatile int auxtrace_record__snapshot_started;
 
 static void sig_handler(int sig)
 {
@@ -133,6 +139,133 @@ static void record__sig_exit(void)
        raise(signr);
 }
 
+#ifdef HAVE_AUXTRACE_SUPPORT
+
+static int record__process_auxtrace(struct perf_tool *tool,
+                                   union perf_event *event, void *data1,
+                                   size_t len1, void *data2, size_t len2)
+{
+       struct record *rec = container_of(tool, struct record, tool);
+       struct perf_data_file *file = &rec->file;
+       size_t padding;
+       u8 pad[8] = {0};
+
+       if (!perf_data_file__is_pipe(file)) {
+               off_t file_offset;
+               int fd = perf_data_file__fd(file);
+               int err;
+
+               file_offset = lseek(fd, 0, SEEK_CUR);
+               if (file_offset == -1)
+                       return -1;
+               err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
+                                                    event, file_offset);
+               if (err)
+                       return err;
+       }
+
+       /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
+       padding = (len1 + len2) & 7;
+       if (padding)
+               padding = 8 - padding;
+
+       record__write(rec, event, event->header.size);
+       record__write(rec, data1, len1);
+       if (len2)
+               record__write(rec, data2, len2);
+       record__write(rec, &pad, padding);
+
+       return 0;
+}
+
+static int record__auxtrace_mmap_read(struct record *rec,
+                                     struct auxtrace_mmap *mm)
+{
+       int ret;
+
+       ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool,
+                                 record__process_auxtrace);
+       if (ret < 0)
+               return ret;
+
+       if (ret)
+               rec->samples++;
+
+       return 0;
+}
+
+static int record__auxtrace_mmap_read_snapshot(struct record *rec,
+                                              struct auxtrace_mmap *mm)
+{
+       int ret;
+
+       ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool,
+                                          record__process_auxtrace,
+                                          rec->opts.auxtrace_snapshot_size);
+       if (ret < 0)
+               return ret;
+
+       if (ret)
+               rec->samples++;
+
+       return 0;
+}
+
+static int record__auxtrace_read_snapshot_all(struct record *rec)
+{
+       int i;
+       int rc = 0;
+
+       for (i = 0; i < rec->evlist->nr_mmaps; i++) {
+               struct auxtrace_mmap *mm =
+                               &rec->evlist->mmap[i].auxtrace_mmap;
+
+               if (!mm->base)
+                       continue;
+
+               if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) {
+                       rc = -1;
+                       goto out;
+               }
+       }
+out:
+       return rc;
+}
+
+static void record__read_auxtrace_snapshot(struct record *rec)
+{
+       pr_debug("Recording AUX area tracing snapshot\n");
+       if (record__auxtrace_read_snapshot_all(rec) < 0) {
+               auxtrace_snapshot_err = -1;
+       } else {
+               auxtrace_snapshot_err = auxtrace_record__snapshot_finish(rec->itr);
+               if (!auxtrace_snapshot_err)
+                       auxtrace_snapshot_enabled = 1;
+       }
+}
+
+#else
+
+static inline
+int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
+                              struct auxtrace_mmap *mm __maybe_unused)
+{
+       return 0;
+}
+
+static inline
+void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
+{
+}
+
+static inline
+int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
+{
+       return 0;
+}
+
+#endif
+
 static int record__open(struct record *rec)
 {
        char msg[512];
@@ -169,13 +302,16 @@ try_again:
                goto out;
        }
 
-       if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
+       if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,
+                                opts->auxtrace_mmap_pages,
+                                opts->auxtrace_snapshot_mode) < 0) {
                if (errno == EPERM) {
                        pr_err("Permission error mapping pages.\n"
                               "Consider increasing "
                               "/proc/sys/kernel/perf_event_mlock_kb,\n"
                               "or try again with a smaller value of -m/--mmap_pages.\n"
-                              "(current value: %u)\n", opts->mmap_pages);
+                              "(current value: %u,%u)\n",
+                              opts->mmap_pages, opts->auxtrace_mmap_pages);
                        rc = -errno;
                } else {
                        pr_err("failed to mmap with %d (%s)\n", errno,
@@ -209,12 +345,9 @@ static int process_buildids(struct record *rec)
        struct perf_data_file *file  = &rec->file;
        struct perf_session *session = rec->session;
 
-       u64 size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
-       if (size == 0)
+       if (file->size == 0)
                return 0;
 
-       file->size = size;
-
        /*
         * During this process, it'll load kernel map and replace the
         * dso->long_name to a real pathname it found.  In this case
@@ -270,12 +403,20 @@ static int record__mmap_read_all(struct record *rec)
        int rc = 0;
 
        for (i = 0; i < rec->evlist->nr_mmaps; i++) {
+               struct auxtrace_mmap *mm = &rec->evlist->mmap[i].auxtrace_mmap;
+
                if (rec->evlist->mmap[i].base) {
                        if (record__mmap_read(rec, i) != 0) {
                                rc = -1;
                                goto out;
                        }
                }
+
+               if (mm->base && !rec->opts.auxtrace_snapshot_mode &&
+                   record__auxtrace_mmap_read(rec, mm) != 0) {
+                       rc = -1;
+                       goto out;
+               }
        }
 
        /*
@@ -305,6 +446,9 @@ static void record__init_features(struct record *rec)
 
        if (!rec->opts.branch_stack)
                perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
+
+       if (!rec->opts.full_auxtrace)
+               perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
 }
 
 static volatile int workload_exec_errno;
@@ -323,6 +467,8 @@ static void workload_exec_failed_signal(int signo __maybe_unused,
        child_finished = 1;
 }
 
+static void snapshot_sig_handler(int sig);
+
 static int __cmd_record(struct record *rec, int argc, const char **argv)
 {
        int err;
@@ -343,6 +489,10 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
        signal(SIGCHLD, sig_handler);
        signal(SIGINT, sig_handler);
        signal(SIGTERM, sig_handler);
+       if (rec->opts.auxtrace_snapshot_mode)
+               signal(SIGUSR2, snapshot_sig_handler);
+       else
+               signal(SIGUSR2, SIG_IGN);
 
        session = perf_session__new(file, false, tool);
        if (session == NULL) {
@@ -421,6 +571,13 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
                }
        }
 
+       if (rec->opts.full_auxtrace) {
+               err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
+                                       session, process_synthesized_event);
+               if (err)
+                       goto out_delete_session;
+       }
+
        err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
                                                 machine);
        if (err < 0)
@@ -441,7 +598,8 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
        }
 
        err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
-                                           process_synthesized_event, opts->sample_address);
+                                           process_synthesized_event, opts->sample_address,
+                                           opts->proc_map_timeout);
        if (err != 0)
                goto out_child;
 
@@ -475,14 +633,27 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
                perf_evlist__enable(rec->evlist);
        }
 
+       auxtrace_snapshot_enabled = 1;
        for (;;) {
                int hits = rec->samples;
 
                if (record__mmap_read_all(rec) < 0) {
+                       auxtrace_snapshot_enabled = 0;
                        err = -1;
                        goto out_child;
                }
 
+               if (auxtrace_record__snapshot_started) {
+                       auxtrace_record__snapshot_started = 0;
+                       if (!auxtrace_snapshot_err)
+                               record__read_auxtrace_snapshot(rec);
+                       if (auxtrace_snapshot_err) {
+                               pr_err("AUX area tracing snapshot failed\n");
+                               err = -1;
+                               goto out_child;
+                       }
+               }
+
                if (hits == rec->samples) {
                        if (done || draining)
                                break;
@@ -505,10 +676,12 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
                 * disable events in this case.
                 */
                if (done && !disabled && !target__none(&opts->target)) {
+                       auxtrace_snapshot_enabled = 0;
                        perf_evlist__disable(rec->evlist);
                        disabled = true;
                }
        }
+       auxtrace_snapshot_enabled = 0;
 
        if (forks && workload_exec_errno) {
                char msg[STRERR_BUFSIZE];
@@ -544,16 +717,25 @@ out_child:
 
        if (!err && !file->is_pipe) {
                rec->session->header.data_size += rec->bytes_written;
+               file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
 
-               if (!rec->no_buildid)
+               if (!rec->no_buildid) {
                        process_buildids(rec);
+                       /*
+                        * We take all buildids when the file contains
+                        * AUX area tracing data because we do not decode the
+                        * trace because it would take too long.
+                        */
+                       if (rec->opts.full_auxtrace)
+                               dsos__hit_all(rec->session);
+               }
                perf_session__write_header(rec->session, rec->evlist, fd, true);
        }
 
        if (!err && !quiet) {
                char samples[128];
 
-               if (rec->samples)
+               if (rec->samples && !rec->opts.full_auxtrace)
                        scnprintf(samples, sizeof(samples),
                                  " (%" PRIu64 " samples)", rec->samples);
                else
@@ -569,94 +751,6 @@ out_delete_session:
        return status;
 }
 
-#define BRANCH_OPT(n, m) \
-       { .name = n, .mode = (m) }
-
-#define BRANCH_END { .name = NULL }
-
-struct branch_mode {
-       const char *name;
-       int mode;
-};
-
-static const struct branch_mode branch_modes[] = {
-       BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
-       BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
-       BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
-       BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
-       BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
-       BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
-       BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
-       BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX),
-       BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX),
-       BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX),
-       BRANCH_OPT("cond", PERF_SAMPLE_BRANCH_COND),
-       BRANCH_END
-};
-
-static int
-parse_branch_stack(const struct option *opt, const char *str, int unset)
-{
-#define ONLY_PLM \
-       (PERF_SAMPLE_BRANCH_USER        |\
-        PERF_SAMPLE_BRANCH_KERNEL      |\
-        PERF_SAMPLE_BRANCH_HV)
-
-       uint64_t *mode = (uint64_t *)opt->value;
-       const struct branch_mode *br;
-       char *s, *os = NULL, *p;
-       int ret = -1;
-
-       if (unset)
-               return 0;
-
-       /*
-        * cannot set it twice, -b + --branch-filter for instance
-        */
-       if (*mode)
-               return -1;
-
-       /* str may be NULL in case no arg is passed to -b */
-       if (str) {
-               /* because str is read-only */
-               s = os = strdup(str);
-               if (!s)
-                       return -1;
-
-               for (;;) {
-                       p = strchr(s, ',');
-                       if (p)
-                               *p = '\0';
-
-                       for (br = branch_modes; br->name; br++) {
-                               if (!strcasecmp(s, br->name))
-                                       break;
-                       }
-                       if (!br->name) {
-                               ui__warning("unknown branch filter %s,"
-                                           " check man page\n", s);
-                               goto error;
-                       }
-
-                       *mode |= br->mode;
-
-                       if (!p)
-                               break;
-
-                       s = p + 1;
-               }
-       }
-       ret = 0;
-
-       /* default to any branch */
-       if ((*mode & ~ONLY_PLM) == 0) {
-               *mode = PERF_SAMPLE_BRANCH_ANY;
-       }
-error:
-       free(os);
-       return ret;
-}
-
 static void callchain_debug(void)
 {
        static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
@@ -795,6 +889,49 @@ static int parse_clockid(const struct option *opt, const char *str, int unset)
        return -1;
 }
 
+static int record__parse_mmap_pages(const struct option *opt,
+                                   const char *str,
+                                   int unset __maybe_unused)
+{
+       struct record_opts *opts = opt->value;
+       char *s, *p;
+       unsigned int mmap_pages;
+       int ret;
+
+       if (!str)
+               return -EINVAL;
+
+       s = strdup(str);
+       if (!s)
+               return -ENOMEM;
+
+       p = strchr(s, ',');
+       if (p)
+               *p = '\0';
+
+       if (*s) {
+               ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
+               if (ret)
+                       goto out_free;
+               opts->mmap_pages = mmap_pages;
+       }
+
+       if (!p) {
+               ret = 0;
+               goto out_free;
+       }
+
+       ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
+       if (ret)
+               goto out_free;
+
+       opts->auxtrace_mmap_pages = mmap_pages;
+
+out_free:
+       free(s);
+       return ret;
+}
+
 static const char * const __record_usage[] = {
        "perf record [<options>] [<command>]",
        "perf record [<options>] -- <command> [<options>]",
@@ -823,6 +960,7 @@ static struct record record = {
                        .uses_mmap   = true,
                        .default_per_cpu = true,
                },
+               .proc_map_timeout     = 500,
        },
        .tool = {
                .sample         = process_sample_event,
@@ -875,9 +1013,9 @@ struct option __record_options[] = {
                        &record.opts.no_inherit_set,
                        "child tasks do not inherit counters"),
        OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
-       OPT_CALLBACK('m', "mmap-pages", &record.opts.mmap_pages, "pages",
-                    "number of mmap data pages",
-                    perf_evlist__parse_mmap_pages),
+       OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
+                    "number of mmap data pages and AUX area tracing mmap pages",
+                    record__parse_mmap_pages),
        OPT_BOOLEAN(0, "group", &record.opts.group,
                    "put the counters into a counter group"),
        OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
@@ -891,10 +1029,9 @@ struct option __record_options[] = {
        OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
        OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
                    "per thread counts"),
-       OPT_BOOLEAN('d', "data", &record.opts.sample_address,
-                   "Sample addresses"),
-       OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
-       OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
+       OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
+       OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Record the sample timestamps"),
+       OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"),
        OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
                    "don't sample"),
        OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
@@ -929,6 +1066,10 @@ struct option __record_options[] = {
        OPT_CALLBACK('k', "clockid", &record.opts,
        "clockid", "clockid to use for events, see clock_gettime()",
        parse_clockid),
+       OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
+                         "opts", "AUX area tracing Snapshot Mode", ""),
+       OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
+                       "per thread proc mmap processing timeout in ms"),
        OPT_END()
 };
 
@@ -936,7 +1077,7 @@ struct option *record_options = __record_options;
 
 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
 {
-       int err = -ENOMEM;
+       int err;
        struct record *rec = &record;
        char errbuf[BUFSIZ];
 
@@ -957,6 +1098,19 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
                usage_with_options(record_usage, record_options);
        }
 
+       if (!rec->itr) {
+               rec->itr = auxtrace_record__init(rec->evlist, &err);
+               if (err)
+                       return err;
+       }
+
+       err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
+                                             rec->opts.auxtrace_snapshot_opts);
+       if (err)
+               return err;
+
+       err = -ENOMEM;
+
        symbol__init(NULL);
 
        if (symbol_conf.kptr_restrict)
@@ -1002,6 +1156,10 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
        if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
                usage_with_options(record_usage, record_options);
 
+       err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
+       if (err)
+               goto out_symbol_exit;
+
        if (record_opts__config(&rec->opts)) {
                err = -EINVAL;
                goto out_symbol_exit;
@@ -1011,5 +1169,15 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
 out_symbol_exit:
        perf_evlist__delete(rec->evlist);
        symbol__exit();
+       auxtrace_record__free(rec->itr);
        return err;
 }
+
+static void snapshot_sig_handler(int sig __maybe_unused)
+{
+       if (!auxtrace_snapshot_enabled)
+               return;
+       auxtrace_snapshot_enabled = 0;
+       auxtrace_snapshot_err = auxtrace_record__snapshot_start(record.itr);
+       auxtrace_record__snapshot_started = 1;
+}
index 476cdf7..32626ea 100644 (file)
@@ -36,6 +36,8 @@
 #include "util/data.h"
 #include "arch/common.h"
 
+#include "util/auxtrace.h"
+
 #include <dlfcn.h>
 #include <linux/bitmap.h>
 
@@ -137,10 +139,12 @@ static int process_sample_event(struct perf_tool *tool,
        struct report *rep = container_of(tool, struct report, tool);
        struct addr_location al;
        struct hist_entry_iter iter = {
-               .hide_unresolved = rep->hide_unresolved,
-               .add_entry_cb = hist_iter__report_callback,
+               .evsel                  = evsel,
+               .sample                 = sample,
+               .hide_unresolved        = rep->hide_unresolved,
+               .add_entry_cb           = hist_iter__report_callback,
        };
-       int ret;
+       int ret = 0;
 
        if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
                pr_debug("problem processing %d event, skipping it.\n",
@@ -149,10 +153,10 @@ static int process_sample_event(struct perf_tool *tool,
        }
 
        if (rep->hide_unresolved && al.sym == NULL)
-               return 0;
+               goto out_put;
 
        if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap))
-               return 0;
+               goto out_put;
 
        if (sort__mode == SORT_MODE__BRANCH)
                iter.ops = &hist_iter_branch;
@@ -166,11 +170,11 @@ static int process_sample_event(struct perf_tool *tool,
        if (al.map != NULL)
                al.map->dso->hit = 1;
 
-       ret = hist_entry_iter__add(&iter, &al, evsel, sample, rep->max_stack,
-                                  rep);
+       ret = hist_entry_iter__add(&iter, &al, rep->max_stack, rep);
        if (ret < 0)
                pr_debug("problem adding hist entry, skipping event\n");
-
+out_put:
+       addr_location__put(&al);
        return ret;
 }
 
@@ -316,6 +320,7 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
 {
        struct perf_evsel *pos;
 
+       fprintf(stdout, "#\n# Total Lost Samples: %" PRIu64 "\n#\n", evlist->stats.total_lost_samples);
        evlist__for_each(evlist, pos) {
                struct hists *hists = evsel__hists(pos);
                const char *evname = perf_evsel__name(pos);
@@ -329,16 +334,15 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
                fprintf(stdout, "\n\n");
        }
 
-       if (sort_order == default_sort_order &&
-           parent_pattern == default_parent_pattern) {
+       if (sort_order == NULL &&
+           parent_pattern == default_parent_pattern)
                fprintf(stdout, "#\n# (%s)\n#\n", help);
 
-               if (rep->show_threads) {
-                       bool style = !strcmp(rep->pretty_printing_style, "raw");
-                       perf_read_values_display(stdout, &rep->show_threads_values,
-                                                style);
-                       perf_read_values_destroy(&rep->show_threads_values);
-               }
+       if (rep->show_threads) {
+               bool style = !strcmp(rep->pretty_printing_style, "raw");
+               perf_read_values_display(stdout, &rep->show_threads_values,
+                                        style);
+               perf_read_values_destroy(&rep->show_threads_values);
        }
 
        return 0;
@@ -585,6 +589,7 @@ parse_percent_limit(const struct option *opt, const char *str,
 int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 {
        struct perf_session *session;
+       struct itrace_synth_opts itrace_synth_opts = { .set = 0, };
        struct stat st;
        bool has_br_stack = false;
        int branch_mode = -1;
@@ -607,6 +612,9 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
                        .attr            = perf_event__process_attr,
                        .tracing_data    = perf_event__process_tracing_data,
                        .build_id        = perf_event__process_build_id,
+                       .id_index        = perf_event__process_id_index,
+                       .auxtrace_info   = perf_event__process_auxtrace_info,
+                       .auxtrace        = perf_event__process_auxtrace,
                        .ordered_events  = true,
                        .ordering_requires_timestamps = true,
                },
@@ -717,6 +725,9 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
                     "Don't show entries under that percent", parse_percent_limit),
        OPT_CALLBACK(0, "percentage", NULL, "relative|absolute",
                     "how to display percentage of filtered entries", parse_filter_percentage),
+       OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts",
+                           "Instruction Tracing options",
+                           itrace_parse_synth_opts),
        OPT_END()
        };
        struct perf_data_file file = {
@@ -761,6 +772,8 @@ repeat:
                                               report.queue_size);
        }
 
+       session->itrace_synth_opts = &itrace_synth_opts;
+
        report.session = session;
 
        has_br_stack = perf_header__has_feat(&session->header,
@@ -803,8 +816,8 @@ repeat:
                goto error;
        }
 
-       /* Force tty output for header output. */
-       if (report.header || report.header_only)
+       /* Force tty output for header output and per-thread stat. */
+       if (report.header || report.header_only || report.show_threads)
                use_browser = 0;
 
        if (strcmp(input_name, "-") != 0)
index 5275bab..3396261 100644 (file)
@@ -95,6 +95,7 @@ struct work_atoms {
        u64                     total_lat;
        u64                     nb_atoms;
        u64                     total_runtime;
+       int                     num_merged;
 };
 
 typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *);
@@ -168,9 +169,10 @@ struct perf_sched {
        u64              all_runtime;
        u64              all_count;
        u64              cpu_last_switched[MAX_CPUS];
-       struct rb_root   atom_root, sorted_atom_root;
+       struct rb_root   atom_root, sorted_atom_root, merged_atom_root;
        struct list_head sort_list, cmp_pid;
        bool force;
+       bool skip_merge;
 };
 
 static u64 get_nsecs(void)
@@ -770,7 +772,7 @@ static int replay_fork_event(struct perf_sched *sched,
        if (child == NULL || parent == NULL) {
                pr_debug("thread does not exist on fork event: child %p, parent %p\n",
                                 child, parent);
-               return 0;
+               goto out_put;
        }
 
        if (verbose) {
@@ -781,6 +783,9 @@ static int replay_fork_event(struct perf_sched *sched,
 
        register_pid(sched, parent->tid, thread__comm_str(parent));
        register_pid(sched, child->tid, thread__comm_str(child));
+out_put:
+       thread__put(child);
+       thread__put(parent);
        return 0;
 }
 
@@ -957,7 +962,7 @@ static int latency_switch_event(struct perf_sched *sched,
        struct work_atoms *out_events, *in_events;
        struct thread *sched_out, *sched_in;
        u64 timestamp0, timestamp = sample->time;
-       int cpu = sample->cpu;
+       int cpu = sample->cpu, err = -1;
        s64 delta;
 
        BUG_ON(cpu >= MAX_CPUS || cpu < 0);
@@ -976,15 +981,17 @@ static int latency_switch_event(struct perf_sched *sched,
 
        sched_out = machine__findnew_thread(machine, -1, prev_pid);
        sched_in = machine__findnew_thread(machine, -1, next_pid);
+       if (sched_out == NULL || sched_in == NULL)
+               goto out_put;
 
        out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid);
        if (!out_events) {
                if (thread_atoms_insert(sched, sched_out))
-                       return -1;
+                       goto out_put;
                out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid);
                if (!out_events) {
                        pr_err("out-event: Internal tree error");
-                       return -1;
+                       goto out_put;
                }
        }
        if (add_sched_out_event(out_events, sched_out_state(prev_state), timestamp))
@@ -993,22 +1000,25 @@ static int latency_switch_event(struct perf_sched *sched,
        in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid);
        if (!in_events) {
                if (thread_atoms_insert(sched, sched_in))
-                       return -1;
+                       goto out_put;
                in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid);
                if (!in_events) {
                        pr_err("in-event: Internal tree error");
-                       return -1;
+                       goto out_put;
                }
                /*
                 * Take came in we have not heard about yet,
                 * add in an initial atom in runnable state:
                 */
                if (add_sched_out_event(in_events, 'R', timestamp))
-                       return -1;
+                       goto out_put;
        }
        add_sched_in_event(in_events, timestamp);
-
-       return 0;
+       err = 0;
+out_put:
+       thread__put(sched_out);
+       thread__put(sched_in);
+       return err;
 }
 
 static int latency_runtime_event(struct perf_sched *sched,
@@ -1021,23 +1031,29 @@ static int latency_runtime_event(struct perf_sched *sched,
        struct thread *thread = machine__findnew_thread(machine, -1, pid);
        struct work_atoms *atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid);
        u64 timestamp = sample->time;
-       int cpu = sample->cpu;
+       int cpu = sample->cpu, err = -1;
+
+       if (thread == NULL)
+               return -1;
 
        BUG_ON(cpu >= MAX_CPUS || cpu < 0);
        if (!atoms) {
                if (thread_atoms_insert(sched, thread))
-                       return -1;
+                       goto out_put;
                atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid);
                if (!atoms) {
                        pr_err("in-event: Internal tree error");
-                       return -1;
+                       goto out_put;
                }
                if (add_sched_out_event(atoms, 'R', timestamp))
-                       return -1;
+                       goto out_put;
        }
 
        add_runtime_event(atoms, runtime, timestamp);
-       return 0;
+       err = 0;
+out_put:
+       thread__put(thread);
+       return err;
 }
 
 static int latency_wakeup_event(struct perf_sched *sched,
@@ -1050,19 +1066,22 @@ static int latency_wakeup_event(struct perf_sched *sched,
        struct work_atom *atom;
        struct thread *wakee;
        u64 timestamp = sample->time;
+       int err = -1;
 
        wakee = machine__findnew_thread(machine, -1, pid);
+       if (wakee == NULL)
+               return -1;
        atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid);
        if (!atoms) {
                if (thread_atoms_insert(sched, wakee))
-                       return -1;
+                       goto out_put;
                atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid);
                if (!atoms) {
                        pr_err("wakeup-event: Internal tree error");
-                       return -1;
+                       goto out_put;
                }
                if (add_sched_out_event(atoms, 'S', timestamp))
-                       return -1;
+                       goto out_put;
        }
 
        BUG_ON(list_empty(&atoms->work_list));
@@ -1081,17 +1100,21 @@ static int latency_wakeup_event(struct perf_sched *sched,
         * skip in this case.
         */
        if (sched->profile_cpu == -1 && atom->state != THREAD_SLEEPING)
-               return 0;
+               goto out_ok;
 
        sched->nr_timestamps++;
        if (atom->sched_out_time > timestamp) {
                sched->nr_unordered_timestamps++;
-               return 0;
+               goto out_ok;
        }
 
        atom->state = THREAD_WAIT_CPU;
        atom->wake_up_time = timestamp;
-       return 0;
+out_ok:
+       err = 0;
+out_put:
+       thread__put(wakee);
+       return err;
 }
 
 static int latency_migrate_task_event(struct perf_sched *sched,
@@ -1104,6 +1127,7 @@ static int latency_migrate_task_event(struct perf_sched *sched,
        struct work_atoms *atoms;
        struct work_atom *atom;
        struct thread *migrant;
+       int err = -1;
 
        /*
         * Only need to worry about migration when profiling one CPU.
@@ -1112,18 +1136,20 @@ static int latency_migrate_task_event(struct perf_sched *sched,
                return 0;
 
        migrant = machine__findnew_thread(machine, -1, pid);
+       if (migrant == NULL)
+               return -1;
        atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid);
        if (!atoms) {
                if (thread_atoms_insert(sched, migrant))
-                       return -1;
+                       goto out_put;
                register_pid(sched, migrant->tid, thread__comm_str(migrant));
                atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid);
                if (!atoms) {
                        pr_err("migration-event: Internal tree error");
-                       return -1;
+                       goto out_put;
                }
                if (add_sched_out_event(atoms, 'R', timestamp))
-                       return -1;
+                       goto out_put;
        }
 
        BUG_ON(list_empty(&atoms->work_list));
@@ -1135,8 +1161,10 @@ static int latency_migrate_task_event(struct perf_sched *sched,
 
        if (atom->sched_out_time > timestamp)
                sched->nr_unordered_timestamps++;
-
-       return 0;
+       err = 0;
+out_put:
+       thread__put(migrant);
+       return err;
 }
 
 static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_list)
@@ -1156,7 +1184,10 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_
        sched->all_runtime += work_list->total_runtime;
        sched->all_count   += work_list->nb_atoms;
 
-       ret = printf("  %s:%d ", thread__comm_str(work_list->thread), work_list->thread->tid);
+       if (work_list->num_merged > 1)
+               ret = printf("  %s:(%d) ", thread__comm_str(work_list->thread), work_list->num_merged);
+       else
+               ret = printf("  %s:%d ", thread__comm_str(work_list->thread), work_list->thread->tid);
 
        for (i = 0; i < 24 - ret; i++)
                printf(" ");
@@ -1276,17 +1307,22 @@ static int sort_dimension__add(const char *tok, struct list_head *list)
 static void perf_sched__sort_lat(struct perf_sched *sched)
 {
        struct rb_node *node;
-
+       struct rb_root *root = &sched->atom_root;
+again:
        for (;;) {
                struct work_atoms *data;
-               node = rb_first(&sched->atom_root);
+               node = rb_first(root);
                if (!node)
                        break;
 
-               rb_erase(node, &sched->atom_root);
+               rb_erase(node, root);
                data = rb_entry(node, struct work_atoms, node);
                __thread_latency_insert(&sched->sorted_atom_root, data, &sched->sort_list);
        }
+       if (root == &sched->atom_root) {
+               root = &sched->merged_atom_root;
+               goto again;
+       }
 }
 
 static int process_sched_wakeup_event(struct perf_tool *tool,
@@ -1330,8 +1366,10 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
        }
 
        sched_in = machine__findnew_thread(machine, -1, next_pid);
+       if (sched_in == NULL)
+               return -1;
 
-       sched->curr_thread[this_cpu] = sched_in;
+       sched->curr_thread[this_cpu] = thread__get(sched_in);
 
        printf("  ");
 
@@ -1381,6 +1419,8 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
                printf("\n");
        }
 
+       thread__put(sched_in);
+
        return 0;
 }
 
@@ -1542,6 +1582,59 @@ static void print_bad_events(struct perf_sched *sched)
        }
 }
 
+static void __merge_work_atoms(struct rb_root *root, struct work_atoms *data)
+{
+       struct rb_node **new = &(root->rb_node), *parent = NULL;
+       struct work_atoms *this;
+       const char *comm = thread__comm_str(data->thread), *this_comm;
+
+       while (*new) {
+               int cmp;
+
+               this = container_of(*new, struct work_atoms, node);
+               parent = *new;
+
+               this_comm = thread__comm_str(this->thread);
+               cmp = strcmp(comm, this_comm);
+               if (cmp > 0) {
+                       new = &((*new)->rb_left);
+               } else if (cmp < 0) {
+                       new = &((*new)->rb_right);
+               } else {
+                       this->num_merged++;
+                       this->total_runtime += data->total_runtime;
+                       this->nb_atoms += data->nb_atoms;
+                       this->total_lat += data->total_lat;
+                       list_splice(&data->work_list, &this->work_list);
+                       if (this->max_lat < data->max_lat) {
+                               this->max_lat = data->max_lat;
+                               this->max_lat_at = data->max_lat_at;
+                       }
+                       zfree(&data);
+                       return;
+               }
+       }
+
+       data->num_merged++;
+       rb_link_node(&data->node, parent, new);
+       rb_insert_color(&data->node, root);
+}
+
+static void perf_sched__merge_lat(struct perf_sched *sched)
+{
+       struct work_atoms *data;
+       struct rb_node *node;
+
+       if (sched->skip_merge)
+               return;
+
+       while ((node = rb_first(&sched->atom_root))) {
+               rb_erase(node, &sched->atom_root);
+               data = rb_entry(node, struct work_atoms, node);
+               __merge_work_atoms(&sched->merged_atom_root, data);
+       }
+}
+
 static int perf_sched__lat(struct perf_sched *sched)
 {
        struct rb_node *next;
@@ -1551,6 +1644,7 @@ static int perf_sched__lat(struct perf_sched *sched)
        if (perf_sched__read_events(sched))
                return -1;
 
+       perf_sched__merge_lat(sched);
        perf_sched__sort_lat(sched);
 
        printf("\n -----------------------------------------------------------------------------------------------------------------\n");
@@ -1702,6 +1796,7 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
                .profile_cpu          = -1,
                .next_shortname1      = 'A',
                .next_shortname2      = '0',
+               .skip_merge           = 0,
        };
        const struct option latency_options[] = {
        OPT_STRING('s', "sort", &sched.sort_order, "key[,key2...]",
@@ -1712,6 +1807,8 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
                    "CPU to profile on"),
        OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
                    "dump raw trace in ASCII"),
+       OPT_BOOLEAN('p', "pids", &sched.skip_merge,
+                   "latency stats per pid instead of per comm"),
        OPT_END()
        };
        const struct option replay_options[] = {
index 58f10b8..2480978 100644 (file)
@@ -16,6 +16,7 @@
 #include "util/evsel.h"
 #include "util/sort.h"
 #include "util/data.h"
+#include "util/auxtrace.h"
 #include <linux/bitmap.h>
 
 static char const              *script_name;
@@ -26,6 +27,7 @@ static u64                    nr_unordered;
 static bool                    no_callchain;
 static bool                    latency_format;
 static bool                    system_wide;
+static bool                    print_flags;
 static const char              *cpu_list;
 static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
 
@@ -146,9 +148,10 @@ static const char *output_field2str(enum perf_output_field field)
 
 #define PRINT_FIELD(x)  (output[attr->type].fields & PERF_OUTPUT_##x)
 
-static int perf_evsel__check_stype(struct perf_evsel *evsel,
-                                  u64 sample_type, const char *sample_msg,
-                                  enum perf_output_field field)
+static int perf_evsel__do_check_stype(struct perf_evsel *evsel,
+                                     u64 sample_type, const char *sample_msg,
+                                     enum perf_output_field field,
+                                     bool allow_user_set)
 {
        struct perf_event_attr *attr = &evsel->attr;
        int type = attr->type;
@@ -158,6 +161,8 @@ static int perf_evsel__check_stype(struct perf_evsel *evsel,
                return 0;
 
        if (output[type].user_set) {
+               if (allow_user_set)
+                       return 0;
                evname = perf_evsel__name(evsel);
                pr_err("Samples for '%s' event do not have %s attribute set. "
                       "Cannot print '%s' field.\n",
@@ -175,10 +180,22 @@ static int perf_evsel__check_stype(struct perf_evsel *evsel,
        return 0;
 }
 
+static int perf_evsel__check_stype(struct perf_evsel *evsel,
+                                  u64 sample_type, const char *sample_msg,
+                                  enum perf_output_field field)
+{
+       return perf_evsel__do_check_stype(evsel, sample_type, sample_msg, field,
+                                         false);
+}
+
 static int perf_evsel__check_attr(struct perf_evsel *evsel,
                                  struct perf_session *session)
 {
        struct perf_event_attr *attr = &evsel->attr;
+       bool allow_user_set;
+
+       allow_user_set = perf_header__has_feat(&session->header,
+                                              HEADER_AUXTRACE);
 
        if (PRINT_FIELD(TRACE) &&
                !perf_session__has_traces(session, "record -R"))
@@ -191,8 +208,8 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
        }
 
        if (PRINT_FIELD(ADDR) &&
-               perf_evsel__check_stype(evsel, PERF_SAMPLE_ADDR, "ADDR",
-                                       PERF_OUTPUT_ADDR))
+               perf_evsel__do_check_stype(evsel, PERF_SAMPLE_ADDR, "ADDR",
+                                          PERF_OUTPUT_ADDR, allow_user_set))
                return -EINVAL;
 
        if (PRINT_FIELD(SYM) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) {
@@ -229,8 +246,8 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
                return -EINVAL;
 
        if (PRINT_FIELD(CPU) &&
-               perf_evsel__check_stype(evsel, PERF_SAMPLE_CPU, "CPU",
-                                       PERF_OUTPUT_CPU))
+               perf_evsel__do_check_stype(evsel, PERF_SAMPLE_CPU, "CPU",
+                                          PERF_OUTPUT_CPU, allow_user_set))
                return -EINVAL;
 
        if (PRINT_FIELD(PERIOD) &&
@@ -445,6 +462,25 @@ static void print_sample_bts(union perf_event *event,
        printf("\n");
 }
 
+static void print_sample_flags(u32 flags)
+{
+       const char *chars = PERF_IP_FLAG_CHARS;
+       const int n = strlen(PERF_IP_FLAG_CHARS);
+       char str[33];
+       int i, pos = 0;
+
+       for (i = 0; i < n; i++, flags >>= 1) {
+               if (flags & 1)
+                       str[pos++] = chars[i];
+       }
+       for (; i < 32; i++, flags >>= 1) {
+               if (flags & 1)
+                       str[pos++] = '?';
+       }
+       str[pos] = 0;
+       printf("  %-4s ", str);
+}
+
 static void process_event(union perf_event *event, struct perf_sample *sample,
                          struct perf_evsel *evsel, struct addr_location *al)
 {
@@ -464,6 +500,9 @@ static void process_event(union perf_event *event, struct perf_sample *sample,
                printf("%s: ", evname ? evname : "[unknown]");
        }
 
+       if (print_flags)
+               print_sample_flags(sample->flags);
+
        if (is_bts_event(attr)) {
                print_sample_bts(event, sample, evsel, thread, al);
                return;
@@ -568,13 +607,14 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
        }
 
        if (al.filtered)
-               return 0;
+               goto out_put;
 
        if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
-               return 0;
+               goto out_put;
 
        scripting_ops->process_event(event, sample, evsel, &al);
-
+out_put:
+       addr_location__put(&al);
        return 0;
 }
 
@@ -642,8 +682,8 @@ static int process_comm_event(struct perf_tool *tool,
        print_sample_start(sample, thread, evsel);
        perf_event__fprintf(event, stdout);
        ret = 0;
-
 out:
+       thread__put(thread);
        return ret;
 }
 
@@ -674,6 +714,7 @@ static int process_fork_event(struct perf_tool *tool,
        }
        print_sample_start(sample, thread, evsel);
        perf_event__fprintf(event, stdout);
+       thread__put(thread);
 
        return 0;
 }
@@ -682,6 +723,7 @@ static int process_exit_event(struct perf_tool *tool,
                              struct perf_sample *sample,
                              struct machine *machine)
 {
+       int err = 0;
        struct thread *thread;
        struct perf_script *script = container_of(tool, struct perf_script, tool);
        struct perf_session *session = script->session;
@@ -703,9 +745,10 @@ static int process_exit_event(struct perf_tool *tool,
        perf_event__fprintf(event, stdout);
 
        if (perf_event__process_exit(tool, event, sample, machine) < 0)
-               return -1;
+               err = -1;
 
-       return 0;
+       thread__put(thread);
+       return err;
 }
 
 static int process_mmap_event(struct perf_tool *tool,
@@ -735,7 +778,7 @@ static int process_mmap_event(struct perf_tool *tool,
        }
        print_sample_start(sample, thread, evsel);
        perf_event__fprintf(event, stdout);
-
+       thread__put(thread);
        return 0;
 }
 
@@ -766,7 +809,7 @@ static int process_mmap2_event(struct perf_tool *tool,
        }
        print_sample_start(sample, thread, evsel);
        perf_event__fprintf(event, stdout);
-
+       thread__put(thread);
        return 0;
 }
 
@@ -999,12 +1042,15 @@ static int parse_output_fields(const struct option *opt __maybe_unused,
                }
        }
 
-       tok = strtok(tok, ",");
-       while (tok) {
+       for (tok = strtok(tok, ","); tok; tok = strtok(NULL, ",")) {
                for (i = 0; i < imax; ++i) {
                        if (strcmp(tok, all_output_options[i].str) == 0)
                                break;
                }
+               if (i == imax && strcmp(tok, "flags") == 0) {
+                       print_flags = true;
+                       continue;
+               }
                if (i == imax) {
                        fprintf(stderr, "Invalid field requested.\n");
                        rc = -EINVAL;
@@ -1032,8 +1078,6 @@ static int parse_output_fields(const struct option *opt __maybe_unused,
                        }
                        output[type].fields |= all_output_options[i].field;
                }
-
-               tok = strtok(NULL, ",");
        }
 
        if (type >= 0) {
@@ -1497,6 +1541,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
        char *rec_script_path = NULL;
        char *rep_script_path = NULL;
        struct perf_session *session;
+       struct itrace_synth_opts itrace_synth_opts = { .set = false, };
        char *script_path = NULL;
        const char **__argv;
        int i, j, err = 0;
@@ -1511,6 +1556,10 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
                        .attr            = process_attr,
                        .tracing_data    = perf_event__process_tracing_data,
                        .build_id        = perf_event__process_build_id,
+                       .id_index        = perf_event__process_id_index,
+                       .auxtrace_info   = perf_event__process_auxtrace_info,
+                       .auxtrace        = perf_event__process_auxtrace,
+                       .auxtrace_error  = perf_event__process_auxtrace_error,
                        .ordered_events  = true,
                        .ordering_requires_timestamps = true,
                },
@@ -1549,7 +1598,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
                     "comma separated output fields prepend with 'type:'. "
                     "Valid types: hw,sw,trace,raw. "
                     "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
-                    "addr,symoff,period", parse_output_fields),
+                    "addr,symoff,period,flags", parse_output_fields),
        OPT_BOOLEAN('a', "all-cpus", &system_wide,
                    "system-wide collection from all CPUs"),
        OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
@@ -1570,6 +1619,9 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
        OPT_BOOLEAN('\0', "show-mmap-events", &script.show_mmap_events,
                    "Show the mmap events"),
        OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"),
+       OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts",
+                           "Instruction Tracing options",
+                           itrace_parse_synth_opts),
        OPT_END()
        };
        const char * const script_subcommands[] = { "record", "report", NULL };
@@ -1765,6 +1817,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
 
        script.session = session;
 
+       session->itrace_synth_opts = &itrace_synth_opts;
+
        if (cpu_list) {
                err = perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap);
                if (err < 0)
index f7b8218..fcf99bd 100644 (file)
@@ -73,8 +73,8 @@ static void print_counter(struct perf_evsel *counter, char *prefix);
 static void print_aggr(char *prefix);
 
 /* Default events used for perf stat -T */
-static const char * const transaction_attrs[] = {
-       "task-clock",
+static const char *transaction_attrs = {
+       "task-clock,"
        "{"
        "instructions,"
        "cycles,"
@@ -86,8 +86,8 @@ static const char * const transaction_attrs[] = {
 };
 
 /* More limited version when the CPU does not have all events. */
-static const char * const transaction_limited_attrs[] = {
-       "task-clock",
+static const char * transaction_limited_attrs = {
+       "task-clock,"
        "{"
        "instructions,"
        "cycles,"
@@ -96,30 +96,12 @@ static const char * const transaction_limited_attrs[] = {
        "}"
 };
 
-/* must match transaction_attrs and the beginning limited_attrs */
-enum {
-       T_TASK_CLOCK,
-       T_INSTRUCTIONS,
-       T_CYCLES,
-       T_CYCLES_IN_TX,
-       T_TRANSACTION_START,
-       T_ELISION_START,
-       T_CYCLES_IN_TX_CP,
-};
-
 static struct perf_evlist      *evsel_list;
 
 static struct target target = {
        .uid    = UINT_MAX,
 };
 
-enum aggr_mode {
-       AGGR_NONE,
-       AGGR_GLOBAL,
-       AGGR_SOCKET,
-       AGGR_CORE,
-};
-
 static int                     run_count                       =  1;
 static bool                    no_inherit                      = false;
 static bool                    scale                           =  true;
@@ -147,10 +129,6 @@ static int                 (*aggr_get_id)(struct cpu_map *m, int cpu);
 
 static volatile int done = 0;
 
-struct perf_stat {
-       struct stats      res_stats[3];
-};
-
 static inline void diff_timespec(struct timespec *r, struct timespec *a,
                                 struct timespec *b)
 {
@@ -180,6 +158,8 @@ static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel)
 
        for (i = 0; i < 3; i++)
                init_stats(&ps->res_stats[i]);
+
+       perf_stat_evsel_id_init(evsel);
 }
 
 static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
@@ -198,24 +178,19 @@ static void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
 
 static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel)
 {
-       void *addr;
-       size_t sz;
+       struct perf_counts *counts;
 
-       sz = sizeof(*evsel->counts) +
-            (perf_evsel__nr_cpus(evsel) * sizeof(struct perf_counts_values));
+       counts = perf_counts__new(perf_evsel__nr_cpus(evsel));
+       if (counts)
+               evsel->prev_raw_counts = counts;
 
-       addr = zalloc(sz);
-       if (!addr)
-               return -ENOMEM;
-
-       evsel->prev_raw_counts =  addr;
-
-       return 0;
+       return counts ? 0 : -ENOMEM;
 }
 
 static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel)
 {
-       zfree(&evsel->prev_raw_counts);
+       perf_counts__delete(evsel->prev_raw_counts);
+       evsel->prev_raw_counts = NULL;
 }
 
 static void perf_evlist__free_stats(struct perf_evlist *evlist)
@@ -247,22 +222,6 @@ out_free:
        return -1;
 }
 
-static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
-static struct stats runtime_cycles_stats[MAX_NR_CPUS];
-static struct stats runtime_stalled_cycles_front_stats[MAX_NR_CPUS];
-static struct stats runtime_stalled_cycles_back_stats[MAX_NR_CPUS];
-static struct stats runtime_branches_stats[MAX_NR_CPUS];
-static struct stats runtime_cacherefs_stats[MAX_NR_CPUS];
-static struct stats runtime_l1_dcache_stats[MAX_NR_CPUS];
-static struct stats runtime_l1_icache_stats[MAX_NR_CPUS];
-static struct stats runtime_ll_cache_stats[MAX_NR_CPUS];
-static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS];
-static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS];
-static struct stats runtime_cycles_in_tx_stats[MAX_NR_CPUS];
-static struct stats walltime_nsecs_stats;
-static struct stats runtime_transaction_stats[MAX_NR_CPUS];
-static struct stats runtime_elision_stats[MAX_NR_CPUS];
-
 static void perf_stat__reset_stats(struct perf_evlist *evlist)
 {
        struct perf_evsel *evsel;
@@ -272,23 +231,7 @@ static void perf_stat__reset_stats(struct perf_evlist *evlist)
                perf_evsel__reset_counts(evsel, perf_evsel__nr_cpus(evsel));
        }
 
-       memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats));
-       memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats));
-       memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats));
-       memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats));
-       memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats));
-       memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats));
-       memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats));
-       memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats));
-       memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats));
-       memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats));
-       memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats));
-       memset(runtime_cycles_in_tx_stats, 0,
-                       sizeof(runtime_cycles_in_tx_stats));
-       memset(runtime_transaction_stats, 0,
-               sizeof(runtime_transaction_stats));
-       memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats));
-       memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
+       perf_stat__reset_shadow_stats();
 }
 
 static int create_perf_stat_counter(struct perf_evsel *evsel)
@@ -325,70 +268,6 @@ static inline int nsec_counter(struct perf_evsel *evsel)
        return 0;
 }
 
-static struct perf_evsel *nth_evsel(int n)
-{
-       static struct perf_evsel **array;
-       static int array_len;
-       struct perf_evsel *ev;
-       int j;
-
-       /* Assumes this only called when evsel_list does not change anymore. */
-       if (!array) {
-               evlist__for_each(evsel_list, ev)
-                       array_len++;
-               array = malloc(array_len * sizeof(void *));
-               if (!array)
-                       exit(ENOMEM);
-               j = 0;
-               evlist__for_each(evsel_list, ev)
-                       array[j++] = ev;
-       }
-       if (n < array_len)
-               return array[n];
-       return NULL;
-}
-
-/*
- * Update various tracking values we maintain to print
- * more semantic information such as miss/hit ratios,
- * instruction rates, etc:
- */
-static void update_shadow_stats(struct perf_evsel *counter, u64 *count,
-                               int cpu)
-{
-       if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
-               update_stats(&runtime_nsecs_stats[cpu], count[0]);
-       else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
-               update_stats(&runtime_cycles_stats[cpu], count[0]);
-       else if (transaction_run &&
-                perf_evsel__cmp(counter, nth_evsel(T_CYCLES_IN_TX)))
-               update_stats(&runtime_cycles_in_tx_stats[cpu], count[0]);
-       else if (transaction_run &&
-                perf_evsel__cmp(counter, nth_evsel(T_TRANSACTION_START)))
-               update_stats(&runtime_transaction_stats[cpu], count[0]);
-       else if (transaction_run &&
-                perf_evsel__cmp(counter, nth_evsel(T_ELISION_START)))
-               update_stats(&runtime_elision_stats[cpu], count[0]);
-       else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
-               update_stats(&runtime_stalled_cycles_front_stats[cpu], count[0]);
-       else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
-               update_stats(&runtime_stalled_cycles_back_stats[cpu], count[0]);
-       else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
-               update_stats(&runtime_branches_stats[cpu], count[0]);
-       else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
-               update_stats(&runtime_cacherefs_stats[cpu], count[0]);
-       else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
-               update_stats(&runtime_l1_dcache_stats[cpu], count[0]);
-       else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
-               update_stats(&runtime_l1_icache_stats[cpu], count[0]);
-       else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
-               update_stats(&runtime_ll_cache_stats[cpu], count[0]);
-       else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
-               update_stats(&runtime_dtlb_cache_stats[cpu], count[0]);
-       else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
-               update_stats(&runtime_itlb_cache_stats[cpu], count[0]);
-}
-
 static void zero_per_pkg(struct perf_evsel *counter)
 {
        if (counter->per_pkg_mask)
@@ -449,7 +328,7 @@ static int read_cb(struct perf_evsel *evsel, int cpu, int thread __maybe_unused,
                perf_counts_values__scale(count, scale, NULL);
                evsel->counts->cpu[cpu] = *count;
                if (aggr_mode == AGGR_NONE)
-                       update_shadow_stats(evsel, count->values, cpu);
+                       perf_stat__update_shadow_stats(evsel, count->values, cpu);
                break;
        case AGGR_GLOBAL:
                aggr->val += count->val;
@@ -497,7 +376,7 @@ static int read_counter_aggr(struct perf_evsel *counter)
        /*
         * Save the full runtime - to allow normalization during printout:
         */
-       update_shadow_stats(counter, count, 0);
+       perf_stat__update_shadow_stats(counter, count, 0);
 
        return 0;
 }
@@ -665,7 +544,10 @@ static int __run_perf_stat(int argc, const char **argv)
                                        ui__warning("%s event is not supported by the kernel.\n",
                                                    perf_evsel__name(counter));
                                counter->supported = false;
-                               continue;
+
+                               if ((counter->leader != counter) ||
+                                   !(counter->leader->nr_members > 1))
+                                       continue;
                        }
 
                        perf_evsel__open_strerror(counter, &target,
@@ -875,188 +757,8 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
                fprintf(output, "                                   ");
 }
 
-/* used for get_ratio_color() */
-enum grc_type {
-       GRC_STALLED_CYCLES_FE,
-       GRC_STALLED_CYCLES_BE,
-       GRC_CACHE_MISSES,
-       GRC_MAX_NR
-};
-
-static const char *get_ratio_color(enum grc_type type, double ratio)
-{
-       static const double grc_table[GRC_MAX_NR][3] = {
-               [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
-               [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
-               [GRC_CACHE_MISSES]      = { 20.0, 10.0, 5.0 },
-       };
-       const char *color = PERF_COLOR_NORMAL;
-
-       if (ratio > grc_table[type][0])
-               color = PERF_COLOR_RED;
-       else if (ratio > grc_table[type][1])
-               color = PERF_COLOR_MAGENTA;
-       else if (ratio > grc_table[type][2])
-               color = PERF_COLOR_YELLOW;
-
-       return color;
-}
-
-static void print_stalled_cycles_frontend(int cpu,
-                                         struct perf_evsel *evsel
-                                         __maybe_unused, double avg)
-{
-       double total, ratio = 0.0;
-       const char *color;
-
-       total = avg_stats(&runtime_cycles_stats[cpu]);
-
-       if (total)
-               ratio = avg / total * 100.0;
-
-       color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
-
-       fprintf(output, " #  ");
-       color_fprintf(output, color, "%6.2f%%", ratio);
-       fprintf(output, " frontend cycles idle   ");
-}
-
-static void print_stalled_cycles_backend(int cpu,
-                                        struct perf_evsel *evsel
-                                        __maybe_unused, double avg)
-{
-       double total, ratio = 0.0;
-       const char *color;
-
-       total = avg_stats(&runtime_cycles_stats[cpu]);
-
-       if (total)
-               ratio = avg / total * 100.0;
-
-       color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
-
-       fprintf(output, " #  ");
-       color_fprintf(output, color, "%6.2f%%", ratio);
-       fprintf(output, " backend  cycles idle   ");
-}
-
-static void print_branch_misses(int cpu,
-                               struct perf_evsel *evsel __maybe_unused,
-                               double avg)
-{
-       double total, ratio = 0.0;
-       const char *color;
-
-       total = avg_stats(&runtime_branches_stats[cpu]);
-
-       if (total)
-               ratio = avg / total * 100.0;
-
-       color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-
-       fprintf(output, " #  ");
-       color_fprintf(output, color, "%6.2f%%", ratio);
-       fprintf(output, " of all branches        ");
-}
-
-static void print_l1_dcache_misses(int cpu,
-                                  struct perf_evsel *evsel __maybe_unused,
-                                  double avg)
-{
-       double total, ratio = 0.0;
-       const char *color;
-
-       total = avg_stats(&runtime_l1_dcache_stats[cpu]);
-
-       if (total)
-               ratio = avg / total * 100.0;
-
-       color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-
-       fprintf(output, " #  ");
-       color_fprintf(output, color, "%6.2f%%", ratio);
-       fprintf(output, " of all L1-dcache hits  ");
-}
-
-static void print_l1_icache_misses(int cpu,
-                                  struct perf_evsel *evsel __maybe_unused,
-                                  double avg)
-{
-       double total, ratio = 0.0;
-       const char *color;
-
-       total = avg_stats(&runtime_l1_icache_stats[cpu]);
-
-       if (total)
-               ratio = avg / total * 100.0;
-
-       color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-
-       fprintf(output, " #  ");
-       color_fprintf(output, color, "%6.2f%%", ratio);
-       fprintf(output, " of all L1-icache hits  ");
-}
-
-static void print_dtlb_cache_misses(int cpu,
-                                   struct perf_evsel *evsel __maybe_unused,
-                                   double avg)
-{
-       double total, ratio = 0.0;
-       const char *color;
-
-       total = avg_stats(&runtime_dtlb_cache_stats[cpu]);
-
-       if (total)
-               ratio = avg / total * 100.0;
-
-       color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-
-       fprintf(output, " #  ");
-       color_fprintf(output, color, "%6.2f%%", ratio);
-       fprintf(output, " of all dTLB cache hits ");
-}
-
-static void print_itlb_cache_misses(int cpu,
-                                   struct perf_evsel *evsel __maybe_unused,
-                                   double avg)
-{
-       double total, ratio = 0.0;
-       const char *color;
-
-       total = avg_stats(&runtime_itlb_cache_stats[cpu]);
-
-       if (total)
-               ratio = avg / total * 100.0;
-
-       color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-
-       fprintf(output, " #  ");
-       color_fprintf(output, color, "%6.2f%%", ratio);
-       fprintf(output, " of all iTLB cache hits ");
-}
-
-static void print_ll_cache_misses(int cpu,
-                                 struct perf_evsel *evsel __maybe_unused,
-                                 double avg)
-{
-       double total, ratio = 0.0;
-       const char *color;
-
-       total = avg_stats(&runtime_ll_cache_stats[cpu]);
-
-       if (total)
-               ratio = avg / total * 100.0;
-
-       color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-
-       fprintf(output, " #  ");
-       color_fprintf(output, color, "%6.2f%%", ratio);
-       fprintf(output, " of all LL-cache hits   ");
-}
-
 static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 {
-       double total, ratio = 0.0, total2;
        double sc =  evsel->scale;
        const char *fmt;
        int cpu = cpu_map__id_to_cpu(id);
@@ -1090,138 +792,7 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
        if (csv_output || interval)
                return;
 
-       if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
-               total = avg_stats(&runtime_cycles_stats[cpu]);
-               if (total) {
-                       ratio = avg / total;
-                       fprintf(output, " #   %5.2f  insns per cycle        ", ratio);
-               } else {
-                       fprintf(output, "                                   ");
-               }
-               total = avg_stats(&runtime_stalled_cycles_front_stats[cpu]);
-               total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[cpu]));
-
-               if (total && avg) {
-                       ratio = total / avg;
-                       fprintf(output, "\n");
-                       if (aggr_mode == AGGR_NONE)
-                               fprintf(output, "        ");
-                       fprintf(output, "                                                  #   %5.2f  stalled cycles per insn", ratio);
-               }
-
-       } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
-                       runtime_branches_stats[cpu].n != 0) {
-               print_branch_misses(cpu, evsel, avg);
-       } else if (
-               evsel->attr.type == PERF_TYPE_HW_CACHE &&
-               evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1D |
-                                       ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
-                                       ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
-                       runtime_l1_dcache_stats[cpu].n != 0) {
-               print_l1_dcache_misses(cpu, evsel, avg);
-       } else if (
-               evsel->attr.type == PERF_TYPE_HW_CACHE &&
-               evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1I |
-                                       ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
-                                       ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
-                       runtime_l1_icache_stats[cpu].n != 0) {
-               print_l1_icache_misses(cpu, evsel, avg);
-       } else if (
-               evsel->attr.type == PERF_TYPE_HW_CACHE &&
-               evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_DTLB |
-                                       ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
-                                       ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
-                       runtime_dtlb_cache_stats[cpu].n != 0) {
-               print_dtlb_cache_misses(cpu, evsel, avg);
-       } else if (
-               evsel->attr.type == PERF_TYPE_HW_CACHE &&
-               evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_ITLB |
-                                       ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
-                                       ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
-                       runtime_itlb_cache_stats[cpu].n != 0) {
-               print_itlb_cache_misses(cpu, evsel, avg);
-       } else if (
-               evsel->attr.type == PERF_TYPE_HW_CACHE &&
-               evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_LL |
-                                       ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
-                                       ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
-                       runtime_ll_cache_stats[cpu].n != 0) {
-               print_ll_cache_misses(cpu, evsel, avg);
-       } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
-                       runtime_cacherefs_stats[cpu].n != 0) {
-               total = avg_stats(&runtime_cacherefs_stats[cpu]);
-
-               if (total)
-                       ratio = avg * 100 / total;
-
-               fprintf(output, " # %8.3f %% of all cache refs    ", ratio);
-
-       } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
-               print_stalled_cycles_frontend(cpu, evsel, avg);
-       } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
-               print_stalled_cycles_backend(cpu, evsel, avg);
-       } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
-               total = avg_stats(&runtime_nsecs_stats[cpu]);
-
-               if (total) {
-                       ratio = avg / total;
-                       fprintf(output, " # %8.3f GHz                    ", ratio);
-               } else {
-                       fprintf(output, "                                   ");
-               }
-       } else if (transaction_run &&
-                  perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_IN_TX))) {
-               total = avg_stats(&runtime_cycles_stats[cpu]);
-               if (total)
-                       fprintf(output,
-                               " #   %5.2f%% transactional cycles   ",
-                               100.0 * (avg / total));
-       } else if (transaction_run &&
-                  perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_IN_TX_CP))) {
-               total = avg_stats(&runtime_cycles_stats[cpu]);
-               total2 = avg_stats(&runtime_cycles_in_tx_stats[cpu]);
-               if (total2 < avg)
-                       total2 = avg;
-               if (total)
-                       fprintf(output,
-                               " #   %5.2f%% aborted cycles         ",
-                               100.0 * ((total2-avg) / total));
-       } else if (transaction_run &&
-                  perf_evsel__cmp(evsel, nth_evsel(T_TRANSACTION_START)) &&
-                  avg > 0 &&
-                  runtime_cycles_in_tx_stats[cpu].n != 0) {
-               total = avg_stats(&runtime_cycles_in_tx_stats[cpu]);
-
-               if (total)
-                       ratio = total / avg;
-
-               fprintf(output, " # %8.0f cycles / transaction   ", ratio);
-       } else if (transaction_run &&
-                  perf_evsel__cmp(evsel, nth_evsel(T_ELISION_START)) &&
-                  avg > 0 &&
-                  runtime_cycles_in_tx_stats[cpu].n != 0) {
-               total = avg_stats(&runtime_cycles_in_tx_stats[cpu]);
-
-               if (total)
-                       ratio = total / avg;
-
-               fprintf(output, " # %8.0f cycles / elision       ", ratio);
-       } else if (runtime_nsecs_stats[cpu].n != 0) {
-               char unit = 'M';
-
-               total = avg_stats(&runtime_nsecs_stats[cpu]);
-
-               if (total)
-                       ratio = 1000.0 * avg / total;
-               if (ratio < 0.001) {
-                       ratio *= 1000;
-                       unit = 'K';
-               }
-
-               fprintf(output, " # %8.3f %c/sec                  ", ratio, unit);
-       } else {
-               fprintf(output, "                                   ");
-       }
+       perf_stat__print_shadow_stats(output, evsel, avg, cpu, aggr_mode);
 }
 
 static void print_aggr(char *prefix)
@@ -1536,17 +1107,6 @@ static int perf_stat_init_aggr_mode(void)
        return 0;
 }
 
-static int setup_events(const char * const *attrs, unsigned len)
-{
-       unsigned i;
-
-       for (i = 0; i < len; i++) {
-               if (parse_events(evsel_list, attrs[i]))
-                       return -1;
-       }
-       return 0;
-}
-
 /*
  * Add default attributes, if there were no attributes specified or
  * if -d/--detailed, -d -d or -d -d -d is used:
@@ -1668,12 +1228,10 @@ static int add_default_attributes(void)
                int err;
                if (pmu_have_event("cpu", "cycles-ct") &&
                    pmu_have_event("cpu", "el-start"))
-                       err = setup_events(transaction_attrs,
-                                       ARRAY_SIZE(transaction_attrs));
+                       err = parse_events(evsel_list, transaction_attrs, NULL);
                else
-                       err = setup_events(transaction_limited_attrs,
-                                ARRAY_SIZE(transaction_limited_attrs));
-               if (err < 0) {
+                       err = parse_events(evsel_list, transaction_limited_attrs, NULL);
+               if (err) {
                        fprintf(stderr, "Cannot set up transaction events\n");
                        return -1;
                }
index e50fe11..30e5962 100644 (file)
@@ -61,13 +61,13 @@ struct timechart {
                                tasks_only,
                                with_backtrace,
                                topology;
+       bool                    force;
        /* IO related settings */
-       u64                     io_events;
        bool                    io_only,
                                skip_eagain;
+       u64                     io_events;
        u64                     min_time,
                                merge_dist;
-       bool                    force;
 };
 
 struct per_pidcomm;
@@ -523,7 +523,7 @@ static const char *cat_backtrace(union perf_event *event,
                                 * Discard all.
                                 */
                                zfree(&p);
-                               goto exit;
+                               goto exit_put;
                        }
                        continue;
                }
@@ -538,7 +538,8 @@ static const char *cat_backtrace(union perf_event *event,
                else
                        fprintf(f, "..... %016" PRIx64 "\n", ip);
        }
-
+exit_put:
+       addr_location__put(&al);
 exit:
        fclose(f);
 
index 1cb3436..619a869 100644 (file)
@@ -235,10 +235,13 @@ static void perf_top__show_details(struct perf_top *top)
 
        more = symbol__annotate_printf(symbol, he->ms.map, top->sym_evsel,
                                       0, top->sym_pcnt_filter, top->print_entries, 4);
-       if (top->zero)
-               symbol__annotate_zero_histogram(symbol, top->sym_evsel->idx);
-       else
-               symbol__annotate_decay_histogram(symbol, top->sym_evsel->idx);
+
+       if (top->evlist->enabled) {
+               if (top->zero)
+                       symbol__annotate_zero_histogram(symbol, top->sym_evsel->idx);
+               else
+                       symbol__annotate_decay_histogram(symbol, top->sym_evsel->idx);
+       }
        if (more != 0)
                printf("%d lines not displayed, maybe increase display entries [e]\n", more);
 out_unlock:
@@ -276,11 +279,13 @@ static void perf_top__print_sym_table(struct perf_top *top)
                return;
        }
 
-       if (top->zero) {
-               hists__delete_entries(hists);
-       } else {
-               hists__decay_entries(hists, top->hide_user_symbols,
-                                    top->hide_kernel_symbols);
+       if (top->evlist->enabled) {
+               if (top->zero) {
+                       hists__delete_entries(hists);
+               } else {
+                       hists__decay_entries(hists, top->hide_user_symbols,
+                                            top->hide_kernel_symbols);
+               }
        }
 
        hists__collapse_resort(hists, NULL);
@@ -545,11 +550,13 @@ static void perf_top__sort_new_samples(void *arg)
 
        hists = evsel__hists(t->sym_evsel);
 
-       if (t->zero) {
-               hists__delete_entries(hists);
-       } else {
-               hists__decay_entries(hists, t->hide_user_symbols,
-                                    t->hide_kernel_symbols);
+       if (t->evlist->enabled) {
+               if (t->zero) {
+                       hists__delete_entries(hists);
+               } else {
+                       hists__decay_entries(hists, t->hide_user_symbols,
+                                            t->hide_kernel_symbols);
+               }
        }
 
        hists__collapse_resort(hists, NULL);
@@ -579,8 +586,27 @@ static void *display_thread_tui(void *arg)
                hists->uid_filter_str = top->record_opts.target.uid_str;
        }
 
-       perf_evlist__tui_browse_hists(top->evlist, help, &hbt, top->min_percent,
-                                     &top->session->header.env);
+       while (true)  {
+               int key = perf_evlist__tui_browse_hists(top->evlist, help, &hbt,
+                                                       top->min_percent,
+                                                       &top->session->header.env);
+
+               if (key != 'f')
+                       break;
+
+               perf_evlist__toggle_enable(top->evlist);
+               /*
+                * No need to refresh, resort/decay histogram entries
+                * if we are not collecting samples:
+                */
+               if (top->evlist->enabled) {
+                       hbt.refresh = top->delay_secs;
+                       help = "Press 'f' to disable the events or 'h' to see other hotkeys";
+               } else {
+                       help = "Press 'f' again to re-enable the events";
+                       hbt.refresh = 0;
+               }
+       }
 
        done = 1;
        return NULL;
@@ -733,7 +759,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
 "Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
 "Check /proc/sys/kernel/kptr_restrict.\n\n"
 "Kernel%s samples will not be resolved.\n",
-                         !RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION]) ?
+                         al.map && !RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION]) ?
                          " modules" : "");
                if (use_browser <= 0)
                        sleep(5);
@@ -775,7 +801,9 @@ static void perf_event__process_sample(struct perf_tool *tool,
        if (al.sym == NULL || !al.sym->ignore) {
                struct hists *hists = evsel__hists(evsel);
                struct hist_entry_iter iter = {
-                       .add_entry_cb = hist_iter__top_callback,
+                       .evsel          = evsel,
+                       .sample         = sample,
+                       .add_entry_cb   = hist_iter__top_callback,
                };
 
                if (symbol_conf.cumulate_callchain)
@@ -785,15 +813,14 @@ static void perf_event__process_sample(struct perf_tool *tool,
 
                pthread_mutex_lock(&hists->lock);
 
-               err = hist_entry_iter__add(&iter, &al, evsel, sample,
-                                          top->max_stack, top);
+               err = hist_entry_iter__add(&iter, &al, top->max_stack, top);
                if (err < 0)
                        pr_err("Problem incrementing symbol period, skipping event\n");
 
                pthread_mutex_unlock(&hists->lock);
        }
 
-       return;
+       addr_location__put(&al);
 }
 
 static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
@@ -950,7 +977,7 @@ static int __cmd_top(struct perf_top *top)
                goto out_delete;
 
        machine__synthesize_threads(&top->session->machines.host, &opts->target,
-                                   top->evlist->threads, false);
+                                   top->evlist->threads, false, opts->proc_map_timeout);
        ret = perf_top__start_counters(top);
        if (ret)
                goto out_delete;
@@ -1060,6 +1087,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
                        .target         = {
                                .uses_mmap   = true,
                        },
+                       .proc_map_timeout    = 500,
                },
                .max_stack           = PERF_MAX_STACK_DEPTH,
                .sym_pcnt_filter     = 5,
@@ -1159,6 +1187,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
        OPT_STRING('w', "column-widths", &symbol_conf.col_width_list_str,
                   "width[,width...]",
                   "don't try to adjust column width, use these fixed values"),
+       OPT_UINTEGER(0, "proc-map-timeout", &opts->proc_map_timeout,
+                       "per thread proc mmap processing timeout in ms"),
        OPT_END()
        };
        const char * const top_usage[] = {
index e124741..de5d277 100644 (file)
@@ -16,7 +16,6 @@
 
 #include <libaudit.h>
 #include <stdlib.h>
-#include <sys/eventfd.h>
 #include <sys/mman.h>
 #include <linux/futex.h>
 
 # define EFD_SEMAPHORE         1
 #endif
 
+#ifndef EFD_NONBLOCK
+# define EFD_NONBLOCK          00004000
+#endif
+
+#ifndef EFD_CLOEXEC
+# define EFD_CLOEXEC           02000000
+#endif
+
+#ifndef O_CLOEXEC
+# define O_CLOEXEC             02000000
+#endif
+
+#ifndef SOCK_DCCP
+# define SOCK_DCCP             6
+#endif
+
+#ifndef SOCK_CLOEXEC
+# define SOCK_CLOEXEC          02000000
+#endif
+
+#ifndef SOCK_NONBLOCK
+# define SOCK_NONBLOCK         00004000
+#endif
+
+#ifndef MSG_CMSG_CLOEXEC
+# define MSG_CMSG_CLOEXEC      0x40000000
+#endif
+
+#ifndef PERF_FLAG_FD_NO_GROUP
+# define PERF_FLAG_FD_NO_GROUP         (1UL << 0)
+#endif
+
+#ifndef PERF_FLAG_FD_OUTPUT
+# define PERF_FLAG_FD_OUTPUT           (1UL << 1)
+#endif
+
+#ifndef PERF_FLAG_PID_CGROUP
+# define PERF_FLAG_PID_CGROUP          (1UL << 2) /* pid=cgroup id, per-cpu mode only */
+#endif
+
+#ifndef PERF_FLAG_FD_CLOEXEC
+# define PERF_FLAG_FD_CLOEXEC          (1UL << 3) /* O_CLOEXEC */
+#endif
+
+
 struct tp_field {
        int offset;
        union {
@@ -331,6 +375,14 @@ static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
 
 #define SCA_HEX syscall_arg__scnprintf_hex
 
+static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
+                                        struct syscall_arg *arg)
+{
+       return scnprintf(bf, size, "%d", arg->val);
+}
+
+#define SCA_INT syscall_arg__scnprintf_int
+
 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
                                               struct syscall_arg *arg)
 {
@@ -783,6 +835,34 @@ static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
 
 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
 
+static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
+                                               struct syscall_arg *arg)
+{
+       int printed = 0, flags = arg->val;
+
+       if (flags == 0)
+               return 0;
+
+#define        P_FLAG(n) \
+       if (flags & PERF_FLAG_##n) { \
+               printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+               flags &= ~PERF_FLAG_##n; \
+       }
+
+       P_FLAG(FD_NO_GROUP);
+       P_FLAG(FD_OUTPUT);
+       P_FLAG(PID_CGROUP);
+       P_FLAG(FD_CLOEXEC);
+#undef P_FLAG
+
+       if (flags)
+               printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+       return printed;
+}
+
+#define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
+
 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
                                                   struct syscall_arg *arg)
 {
@@ -1050,6 +1130,11 @@ static struct syscall_fmt {
        { .name     = "openat",     .errmsg = true,
          .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
                             [2] = SCA_OPEN_FLAGS, /* flags */ }, },
+       { .name     = "perf_event_open", .errmsg = true,
+         .arg_scnprintf = { [1] = SCA_INT, /* pid */
+                            [2] = SCA_INT, /* cpu */
+                            [3] = SCA_FD,  /* group_fd */
+                            [4] = SCA_PERF_FLAGS,  /* flags */ }, },
        { .name     = "pipe2",      .errmsg = true,
          .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
        { .name     = "poll",       .errmsg = true, .timeout = true, },
@@ -1433,7 +1518,8 @@ static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
                return -ENOMEM;
 
        err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
-                                           evlist->threads, trace__tool_process, false);
+                                           evlist->threads, trace__tool_process, false,
+                                           trace->opts.proc_map_timeout);
        if (err)
                symbol__exit();
 
@@ -1712,7 +1798,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
        void *args;
        size_t printed = 0;
        struct thread *thread;
-       int id = perf_evsel__sc_tp_uint(evsel, id, sample);
+       int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
        struct syscall *sc = trace__syscall_info(trace, evsel, id);
        struct thread_trace *ttrace;
 
@@ -1725,14 +1811,14 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
        thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
        ttrace = thread__trace(thread, trace->output);
        if (ttrace == NULL)
-               return -1;
+               goto out_put;
 
        args = perf_evsel__sc_tp_ptr(evsel, args, sample);
 
        if (ttrace->entry_str == NULL) {
                ttrace->entry_str = malloc(1024);
                if (!ttrace->entry_str)
-                       return -1;
+                       goto out_put;
        }
 
        if (!trace->summary_only)
@@ -1757,8 +1843,10 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
                thread__put(trace->current);
                trace->current = thread__get(thread);
        }
-
-       return 0;
+       err = 0;
+out_put:
+       thread__put(thread);
+       return err;
 }
 
 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
@@ -1768,7 +1856,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
        long ret;
        u64 duration = 0;
        struct thread *thread;
-       int id = perf_evsel__sc_tp_uint(evsel, id, sample);
+       int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
        struct syscall *sc = trace__syscall_info(trace, evsel, id);
        struct thread_trace *ttrace;
 
@@ -1781,7 +1869,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
        thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
        ttrace = thread__trace(thread, trace->output);
        if (ttrace == NULL)
-               return -1;
+               goto out_put;
 
        if (trace->summary)
                thread__update_stats(ttrace, id, sample);
@@ -1835,8 +1923,10 @@ signed_print:
        fputc('\n', trace->output);
 out:
        ttrace->entry_pending = false;
-
-       return 0;
+       err = 0;
+out_put:
+       thread__put(thread);
+       return err;
 }
 
 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
@@ -1863,6 +1953,7 @@ static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evs
 
        ttrace->runtime_ms += runtime_ms;
        trace->runtime_ms += runtime_ms;
+       thread__put(thread);
        return 0;
 
 out_dump:
@@ -1872,6 +1963,7 @@ out_dump:
               (pid_t)perf_evsel__intval(evsel, sample, "pid"),
               runtime,
               perf_evsel__intval(evsel, sample, "vruntime"));
+       thread__put(thread);
        return 0;
 }
 
@@ -1924,11 +2016,12 @@ static int trace__pgfault(struct trace *trace,
        struct addr_location al;
        char map_type = 'd';
        struct thread_trace *ttrace;
+       int err = -1;
 
        thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
        ttrace = thread__trace(thread, trace->output);
        if (ttrace == NULL)
-               return -1;
+               goto out_put;
 
        if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
                ttrace->pfmaj++;
@@ -1936,7 +2029,7 @@ static int trace__pgfault(struct trace *trace,
                ttrace->pfmin++;
 
        if (trace->summary_only)
-               return 0;
+               goto out;
 
        thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
                              sample->ip, &al);
@@ -1967,8 +2060,11 @@ static int trace__pgfault(struct trace *trace,
        print_location(trace->output, sample, &al, true, false);
 
        fprintf(trace->output, " (%c%c)\n", map_type, al.level);
-
-       return 0;
+out:
+       err = 0;
+out_put:
+       thread__put(thread);
+       return err;
 }
 
 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
@@ -2241,10 +2337,11 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
        if (err < 0)
                goto out_error_mmap;
 
+       if (!target__none(&trace->opts.target))
+               perf_evlist__enable(evlist);
+
        if (forks)
                perf_evlist__start_workload(evlist);
-       else
-               perf_evlist__enable(evlist);
 
        trace->multiple_threads = evlist->threads->map[0] == -1 ||
                                  evlist->threads->nr > 1 ||
@@ -2272,6 +2369,11 @@ next_event:
 
                        if (interrupted)
                                goto out_disable;
+
+                       if (done && !draining) {
+                               perf_evlist__disable(evlist);
+                               draining = true;
+                       }
                }
        }
 
@@ -2646,6 +2748,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
                        .user_interval = ULLONG_MAX,
                        .no_buffering  = true,
                        .mmap_pages    = UINT_MAX,
+                       .proc_map_timeout  = 500,
                },
                .output = stdout,
                .show_comm = true,
@@ -2660,16 +2763,15 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
        OPT_BOOLEAN(0, "comm", &trace.show_comm,
                    "show the thread COMM next to its id"),
        OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
-       OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
-                   "list of events to trace"),
+       OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
        OPT_STRING('o', "output", &output_name, "file", "output file name"),
        OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
        OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
                    "trace events on existing process id"),
        OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
                    "trace events on existing thread id"),
-       OPT_CALLBACK(0, "filter-pids", &trace, "float",
-                    "show only events with duration > N.M ms", trace__set_filter_pids),
+       OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
+                    "pids to filter (by the kernel)", trace__set_filter_pids),
        OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
                    "system-wide collection from all CPUs"),
        OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
@@ -2696,6 +2798,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
                     "Trace pagefaults", parse_pagefaults, "maj"),
        OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
        OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
+       OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
+                       "per thread proc mmap processing timeout in ms"),
        OPT_END()
        };
        const char * const trace_subcommands[] = { "record", NULL };
@@ -2706,11 +2810,10 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
        signal(SIGFPE, sighandler_dump_stack);
 
        trace.evlist = perf_evlist__new();
-       if (trace.evlist == NULL)
-               return -ENOMEM;
 
        if (trace.evlist == NULL) {
                pr_err("Not enough memory to run!\n");
+               err = -ENOMEM;
                goto out;
        }
 
index 59a98c6..317001c 100644 (file)
@@ -32,7 +32,7 @@ ifeq ($(ARCH),x86)
     LIBUNWIND_LIBS = -lunwind -lunwind-x86_64
     $(call detected,CONFIG_X86_64)
   else
-    LIBUNWIND_LIBS = -lunwind -lunwind-x86
+    LIBUNWIND_LIBS = -lunwind-x86 -llzma -lunwind
   endif
   NO_PERF_REGS := 0
 endif
@@ -130,6 +130,8 @@ endif
 
 ifeq ($(DEBUG),0)
   CFLAGS += -O6
+else
+  CFLAGS += $(call cc-option,-Og,-O0)
 endif
 
 ifdef PARSER_DEBUG
@@ -268,6 +270,10 @@ else
   endif # libelf support
 endif # NO_LIBELF
 
+ifdef NO_DWARF
+  NO_LIBDW_DWARF_UNWIND := 1
+endif
+
 ifndef NO_LIBELF
   CFLAGS += -DHAVE_LIBELF_SUPPORT
   EXTLIBS += -lelf
@@ -610,6 +616,11 @@ ifdef LIBBABELTRACE
   endif
 endif
 
+ifndef NO_AUXTRACE
+  $(call detected,CONFIG_AUXTRACE)
+  CFLAGS += -DHAVE_AUXTRACE_SUPPORT
+endif
+
 # Among the variables below, these:
 #   perfexecdir
 #   template_dir
index c16ce83..0ebef09 100644 (file)
@@ -177,3 +177,22 @@ $(if $($(1)),$(call _ge_attempt,$($(1)),$(1)),$(call _ge_attempt,$(2)))
 endef
 _ge_attempt = $(if $(get-executable),$(get-executable),$(call _gea_err,$(2)))
 _gea_err  = $(if $(1),$(error Please set '$(1)' appropriately))
+
+# try-run
+# Usage: option = $(call try-run, $(CC)...-o "$$TMP",option-ok,otherwise)
+# Exit code chooses option. "$$TMP" is can be used as temporary file and
+# is automatically cleaned up.
+try-run = $(shell set -e;              \
+       TMP="$(TMPOUT).$$$$.tmp";       \
+       TMPO="$(TMPOUT).$$$$.o";        \
+       if ($(1)) >/dev/null 2>&1;      \
+       then echo "$(2)";               \
+       else echo "$(3)";               \
+       fi;                             \
+       rm -f "$$TMP" "$$TMPO")
+
+# cc-option
+# Usage: cflags-y += $(call cc-option,-march=winchip-c6,-march=i586)
+
+cc-option = $(call try-run,\
+       $(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2))
index 6ef6816..83a25ce 100644 (file)
@@ -6,11 +6,9 @@
 #include <sys/syscall.h>
 #include <linux/types.h>
 #include <linux/perf_event.h>
+#include <asm/barrier.h>
 
 #if defined(__i386__)
-#define mb()           asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
-#define wmb()          asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
-#define rmb()          asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
 #define cpu_relax()    asm volatile("rep; nop" ::: "memory");
 #define CPUINFO_PROC   {"model name"}
 #ifndef __NR_perf_event_open
@@ -25,9 +23,6 @@
 #endif
 
 #if defined(__x86_64__)
-#define mb()           asm volatile("mfence" ::: "memory")
-#define wmb()          asm volatile("sfence" ::: "memory")
-#define rmb()          asm volatile("lfence" ::: "memory")
 #define cpu_relax()    asm volatile("rep; nop" ::: "memory");
 #define CPUINFO_PROC   {"model name"}
 #ifndef __NR_perf_event_open
 
 #ifdef __powerpc__
 #include "../../arch/powerpc/include/uapi/asm/unistd.h"
-#define mb()           asm volatile ("sync" ::: "memory")
-#define wmb()          asm volatile ("sync" ::: "memory")
-#define rmb()          asm volatile ("sync" ::: "memory")
 #define CPUINFO_PROC   {"cpu"}
 #endif
 
 #ifdef __s390__
-#define mb()           asm volatile("bcr 15,0" ::: "memory")
-#define wmb()          asm volatile("bcr 15,0" ::: "memory")
-#define rmb()          asm volatile("bcr 15,0" ::: "memory")
 #define CPUINFO_PROC   {"vendor_id"}
 #endif
 
 #ifdef __sh__
-#if defined(__SH4A__) || defined(__SH5__)
-# define mb()          asm volatile("synco" ::: "memory")
-# define wmb()         asm volatile("synco" ::: "memory")
-# define rmb()         asm volatile("synco" ::: "memory")
-#else
-# define mb()          asm volatile("" ::: "memory")
-# define wmb()         asm volatile("" ::: "memory")
-# define rmb()         asm volatile("" ::: "memory")
-#endif
 #define CPUINFO_PROC   {"cpu type"}
 #endif
 
 #ifdef __hppa__
-#define mb()           asm volatile("" ::: "memory")
-#define wmb()          asm volatile("" ::: "memory")
-#define rmb()          asm volatile("" ::: "memory")
 #define CPUINFO_PROC   {"cpu"}
 #endif
 
 #ifdef __sparc__
-#ifdef __LP64__
-#define mb()           asm volatile("ba,pt %%xcc, 1f\n"        \
-                                    "membar #StoreLoad\n"      \
-                                    "1:\n":::"memory")
-#else
-#define mb()           asm volatile("":::"memory")
-#endif
-#define wmb()          asm volatile("":::"memory")
-#define rmb()          asm volatile("":::"memory")
 #define CPUINFO_PROC   {"cpu"}
 #endif
 
 #ifdef __alpha__
-#define mb()           asm volatile("mb" ::: "memory")
-#define wmb()          asm volatile("wmb" ::: "memory")
-#define rmb()          asm volatile("mb" ::: "memory")
 #define CPUINFO_PROC   {"cpu model"}
 #endif
 
 #ifdef __ia64__
-#define mb()           asm volatile ("mf" ::: "memory")
-#define wmb()          asm volatile ("mf" ::: "memory")
-#define rmb()          asm volatile ("mf" ::: "memory")
 #define cpu_relax()    asm volatile ("hint @pause" ::: "memory")
 #define CPUINFO_PROC   {"model name"}
 #endif
 
 #ifdef __arm__
-/*
- * Use the __kuser_memory_barrier helper in the CPU helper page. See
- * arch/arm/kernel/entry-armv.S in the kernel source for details.
- */
-#define mb()           ((void(*)(void))0xffff0fa0)()
-#define wmb()          ((void(*)(void))0xffff0fa0)()
-#define rmb()          ((void(*)(void))0xffff0fa0)()
 #define CPUINFO_PROC   {"model name", "Processor"}
 #endif
 
 #ifdef __aarch64__
-#define mb()           asm volatile("dmb ish" ::: "memory")
-#define wmb()          asm volatile("dmb ishst" ::: "memory")
-#define rmb()          asm volatile("dmb ishld" ::: "memory")
 #define cpu_relax()    asm volatile("yield" ::: "memory")
 #endif
 
 #ifdef __mips__
-#define mb()           asm volatile(                                   \
-                               ".set   mips2\n\t"                      \
-                               "sync\n\t"                              \
-                               ".set   mips0"                          \
-                               : /* no output */                       \
-                               : /* no input */                        \
-                               : "memory")
-#define wmb()  mb()
-#define rmb()  mb()
 #define CPUINFO_PROC   {"cpu model"}
 #endif
 
 #ifdef __arc__
-#define mb()           asm volatile("" ::: "memory")
-#define wmb()          asm volatile("" ::: "memory")
-#define rmb()          asm volatile("" ::: "memory")
 #define CPUINFO_PROC   {"Processor"}
 #endif
 
 #ifdef __metag__
-#define mb()           asm volatile("" ::: "memory")
-#define wmb()          asm volatile("" ::: "memory")
-#define rmb()          asm volatile("" ::: "memory")
 #define CPUINFO_PROC   {"CPU"}
 #endif
 
 #ifdef __xtensa__
-#define mb()           asm volatile("memw" ::: "memory")
-#define wmb()          asm volatile("memw" ::: "memory")
-#define rmb()          asm volatile("" ::: "memory")
 #define CPUINFO_PROC   {"core ID"}
 #endif
 
 #ifdef __tile__
-#define mb()           asm volatile ("mf" ::: "memory")
-#define wmb()          asm volatile ("mf" ::: "memory")
-#define rmb()          asm volatile ("mf" ::: "memory")
 #define cpu_relax()    asm volatile ("mfspr zero, PASS" ::: "memory")
 #define CPUINFO_PROC    {"model name"}
 #endif
 
-#define barrier() asm volatile ("" ::: "memory")
-
 #ifndef cpu_relax
 #define cpu_relax() barrier()
 #endif
index e14bb63..4a5827f 100644 (file)
@@ -54,16 +54,22 @@ struct record_opts {
        bool         period;
        bool         sample_intr_regs;
        bool         running_time;
+       bool         full_auxtrace;
+       bool         auxtrace_snapshot_mode;
        unsigned int freq;
        unsigned int mmap_pages;
+       unsigned int auxtrace_mmap_pages;
        unsigned int user_freq;
        u64          branch_stack;
        u64          default_interval;
        u64          user_interval;
+       size_t       auxtrace_snapshot_size;
+       const char   *auxtrace_snapshot_opts;
        bool         sample_transaction;
        unsigned     initial_delay;
        bool         use_clockid;
        clockid_t    clockid;
+       unsigned int proc_map_timeout;
 };
 
 struct option;
index 6a8801b..ee41e70 100644 (file)
@@ -3,9 +3,9 @@ perf-y += parse-events.o
 perf-y += dso-data.o
 perf-y += attr.o
 perf-y += vmlinux-kallsyms.o
-perf-y += open-syscall.o
-perf-y += open-syscall-all-cpus.o
-perf-y += open-syscall-tp-fields.o
+perf-y += openat-syscall.o
+perf-y += openat-syscall-all-cpus.o
+perf-y += openat-syscall-tp-fields.o
 perf-y += mmap-basic.o
 perf-y += perf-record.o
 perf-y += rdpmc.o
@@ -34,7 +34,7 @@ perf-y += kmod-path.o
 
 perf-$(CONFIG_X86) += perf-time-to-tsc.o
 
-ifeq ($(ARCH),$(filter $(ARCH),x86 arm))
+ifeq ($(ARCH),$(filter $(ARCH),x86 arm arm64))
 perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
 endif
 
index 4f40981..87b9961 100644 (file)
@@ -23,12 +23,12 @@ static struct test {
                .func = test__vmlinux_matches_kallsyms,
        },
        {
-               .desc = "detect open syscall event",
-               .func = test__open_syscall_event,
+               .desc = "detect openat syscall event",
+               .func = test__openat_syscall_event,
        },
        {
-               .desc = "detect open syscall event on all cpus",
-               .func = test__open_syscall_event_on_all_cpus,
+               .desc = "detect openat syscall event on all cpus",
+               .func = test__openat_syscall_event_on_all_cpus,
        },
        {
                .desc = "read samples using the mmap interface",
@@ -73,8 +73,8 @@ static struct test {
                .func = test__perf_evsel__tp_sched_test,
        },
        {
-               .desc = "Generate and check syscalls:sys_enter_open event fields",
-               .func = test__syscall_open_tp_fields,
+               .desc = "Generate and check syscalls:sys_enter_openat event fields",
+               .func = test__syscall_openat_tp_fields,
        },
        {
                .desc = "struct perf_event_attr setup",
@@ -126,7 +126,7 @@ static struct test {
                .desc = "Test parsing with no sample_id_all bit set",
                .func = test__parse_no_sample_id_all,
        },
-#if defined(__x86_64__) || defined(__i386__) || defined(__arm__)
+#if defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__)
 #ifdef HAVE_DWARF_UNWIND_SUPPORT
        {
                .desc = "Test dwarf unwind",
@@ -219,7 +219,7 @@ static int run_test(struct test *test)
        wait(&status);
 
        if (WIFEXITED(status)) {
-               err = WEXITSTATUS(status);
+               err = (signed char)WEXITSTATUS(status);
                pr_debug("test child finished with %d\n", err);
        } else if (WIFSIGNALED(status)) {
                err = -1;
index f671ec3..22f8a00 100644 (file)
@@ -248,6 +248,7 @@ static int process_sample_event(struct machine *machine,
        struct perf_sample sample;
        struct thread *thread;
        u8 cpumode;
+       int ret;
 
        if (perf_evlist__parse_sample(evlist, event, &sample)) {
                pr_debug("perf_evlist__parse_sample failed\n");
@@ -262,7 +263,9 @@ static int process_sample_event(struct machine *machine,
 
        cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 
-       return read_object_code(sample.ip, READLEN, cpumode, thread, state);
+       ret = read_object_code(sample.ip, READLEN, cpumode, thread, state);
+       thread__put(thread);
+       return ret;
 }
 
 static int process_event(struct machine *machine, struct perf_evlist *evlist,
@@ -448,7 +451,7 @@ static int do_test_code_reading(bool try_kcore)
        }
 
        ret = perf_event__synthesize_thread_map(NULL, threads,
-                                               perf_event__process, machine, false);
+                                               perf_event__process, machine, false, 500);
        if (ret < 0) {
                pr_debug("perf_event__synthesize_thread_map failed\n");
                goto out_err;
@@ -457,13 +460,13 @@ static int do_test_code_reading(bool try_kcore)
        thread = machine__findnew_thread(machine, pid, pid);
        if (!thread) {
                pr_debug("machine__findnew_thread failed\n");
-               goto out_err;
+               goto out_put;
        }
 
        cpus = cpu_map__new(NULL);
        if (!cpus) {
                pr_debug("cpu_map__new failed\n");
-               goto out_err;
+               goto out_put;
        }
 
        while (1) {
@@ -472,7 +475,7 @@ static int do_test_code_reading(bool try_kcore)
                evlist = perf_evlist__new();
                if (!evlist) {
                        pr_debug("perf_evlist__new failed\n");
-                       goto out_err;
+                       goto out_put;
                }
 
                perf_evlist__set_maps(evlist, cpus, threads);
@@ -482,10 +485,10 @@ static int do_test_code_reading(bool try_kcore)
                else
                        str = "cycles";
                pr_debug("Parsing event '%s'\n", str);
-               ret = parse_events(evlist, str);
+               ret = parse_events(evlist, str, NULL);
                if (ret < 0) {
                        pr_debug("parse_events failed\n");
-                       goto out_err;
+                       goto out_put;
                }
 
                perf_evlist__config(evlist, &opts);
@@ -506,7 +509,7 @@ static int do_test_code_reading(bool try_kcore)
                                continue;
                        }
                        pr_debug("perf_evlist__open failed\n");
-                       goto out_err;
+                       goto out_put;
                }
                break;
        }
@@ -514,7 +517,7 @@ static int do_test_code_reading(bool try_kcore)
        ret = perf_evlist__mmap(evlist, UINT_MAX, false);
        if (ret < 0) {
                pr_debug("perf_evlist__mmap failed\n");
-               goto out_err;
+               goto out_put;
        }
 
        perf_evlist__enable(evlist);
@@ -525,7 +528,7 @@ static int do_test_code_reading(bool try_kcore)
 
        ret = process_events(machine, evlist, &state);
        if (ret < 0)
-               goto out_err;
+               goto out_put;
 
        if (!have_vmlinux && !have_kcore && !try_kcore)
                err = TEST_CODE_READING_NO_KERNEL_OBJ;
@@ -535,7 +538,10 @@ static int do_test_code_reading(bool try_kcore)
                err = TEST_CODE_READING_NO_ACCESS;
        else
                err = TEST_CODE_READING_OK;
+out_put:
+       thread__put(thread);
 out_err:
+
        if (evlist) {
                perf_evlist__delete(evlist);
        } else {
index 513e5fe..a218aea 100644 (file)
@@ -99,6 +99,17 @@ struct test_data_offset offsets[] = {
        },
 };
 
+/* move it from util/dso.c for compatibility */
+static int dso__data_fd(struct dso *dso, struct machine *machine)
+{
+       int fd = dso__data_get_fd(dso, machine);
+
+       if (fd >= 0)
+               dso__data_put_fd(dso);
+
+       return fd;
+}
+
 int test__dso_data(void)
 {
        struct machine machine;
@@ -155,7 +166,7 @@ int test__dso_data(void)
                free(buf);
        }
 
-       dso__delete(dso);
+       dso__put(dso);
        unlink(file);
        return 0;
 }
@@ -215,7 +226,7 @@ static void dsos__delete(int cnt)
                struct dso *dso = dsos[i];
 
                unlink(dso->name);
-               dso__delete(dso);
+               dso__put(dso);
        }
 
        free(dsos);
index 0bf06be..40b36c4 100644 (file)
@@ -28,7 +28,7 @@ static int init_live_machine(struct machine *machine)
        pid_t pid = getpid();
 
        return perf_event__synthesize_mmap_events(NULL, &event, pid, pid,
-                                                 mmap_handler, machine, true);
+                                                 mmap_handler, machine, true, 500);
 }
 
 #define MAX_STACK 8
@@ -170,6 +170,7 @@ int test__dwarf_unwind(void)
        }
 
        err = krava_1(thread);
+       thread__put(thread);
 
  out:
        machine__delete_threads(machine);
index b8d8341..3fa7159 100644 (file)
@@ -23,7 +23,7 @@ static int perf_evsel__roundtrip_cache_name_test(void)
                        for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
                                __perf_evsel__hw_cache_type_op_res_name(type, op, i,
                                                                        name, sizeof(name));
-                               err = parse_events(evlist, name);
+                               err = parse_events(evlist, name, NULL);
                                if (err)
                                        ret = err;
                        }
@@ -71,7 +71,7 @@ static int __perf_evsel__name_array_test(const char *names[], int nr_names)
                 return -ENOMEM;
 
        for (i = 0; i < nr_names; ++i) {
-               err = parse_events(evlist, names[i]);
+               err = parse_events(evlist, names[i], NULL);
                if (err) {
                        pr_debug("failed to parse event '%s', err %d\n",
                                 names[i], err);
index a62c091..ce80b27 100644 (file)
@@ -96,6 +96,7 @@ struct machine *setup_fake_machine(struct machines *machines)
                        goto out;
 
                thread__set_comm(thread, fake_threads[i].comm, 0);
+               thread__put(thread);
        }
 
        for (i = 0; i < ARRAY_SIZE(fake_mmap_info); i++) {
@@ -120,8 +121,7 @@ struct machine *setup_fake_machine(struct machines *machines)
                size_t k;
                struct dso *dso;
 
-               dso = __dsos__findnew(&machine->user_dsos,
-                                     fake_symbols[i].dso_name);
+               dso = machine__findnew_dso(machine, fake_symbols[i].dso_name);
                if (dso == NULL)
                        goto out;
 
@@ -134,11 +134,15 @@ struct machine *setup_fake_machine(struct machines *machines)
 
                        sym = symbol__new(fsym->start, fsym->length,
                                          STB_GLOBAL, fsym->name);
-                       if (sym == NULL)
+                       if (sym == NULL) {
+                               dso__put(dso);
                                goto out;
+                       }
 
                        symbols__insert(&dso->symbols[MAP__FUNCTION], sym);
                }
+
+               dso__put(dso);
        }
 
        return machine;
index 1861996..7d82c8b 100644 (file)
@@ -87,6 +87,8 @@ static int add_hist_entries(struct hists *hists, struct machine *machine)
                        },
                };
                struct hist_entry_iter iter = {
+                       .evsel = evsel,
+                       .sample = &sample,
                        .hide_unresolved = false,
                };
 
@@ -104,9 +106,11 @@ static int add_hist_entries(struct hists *hists, struct machine *machine)
                                                  &sample) < 0)
                        goto out;
 
-               if (hist_entry_iter__add(&iter, &al, evsel, &sample,
-                                        PERF_MAX_STACK_DEPTH, NULL) < 0)
+               if (hist_entry_iter__add(&iter, &al, PERF_MAX_STACK_DEPTH,
+                                        NULL) < 0) {
+                       addr_location__put(&al);
                        goto out;
+               }
 
                fake_samples[i].thread = al.thread;
                fake_samples[i].map = al.map;
@@ -695,7 +699,7 @@ int test__hists_cumulate(void)
 
        TEST_ASSERT_VAL("No memory", evlist);
 
-       err = parse_events(evlist, "cpu-clock");
+       err = parse_events(evlist, "cpu-clock", NULL);
        if (err)
                goto out;
 
index 59e53db..ce48775 100644 (file)
@@ -63,6 +63,8 @@ static int add_hist_entries(struct perf_evlist *evlist,
                                },
                        };
                        struct hist_entry_iter iter = {
+                               .evsel = evsel,
+                               .sample = &sample,
                                .ops = &hist_iter_normal,
                                .hide_unresolved = false,
                        };
@@ -81,9 +83,11 @@ static int add_hist_entries(struct perf_evlist *evlist,
                                                          &sample) < 0)
                                goto out;
 
-                       if (hist_entry_iter__add(&iter, &al, evsel, &sample,
-                                                PERF_MAX_STACK_DEPTH, NULL) < 0)
+                       if (hist_entry_iter__add(&iter, &al,
+                                                PERF_MAX_STACK_DEPTH, NULL) < 0) {
+                               addr_location__put(&al);
                                goto out;
+                       }
 
                        fake_samples[i].thread = al.thread;
                        fake_samples[i].map = al.map;
@@ -108,10 +112,10 @@ int test__hists_filter(void)
 
        TEST_ASSERT_VAL("No memory", evlist);
 
-       err = parse_events(evlist, "cpu-clock");
+       err = parse_events(evlist, "cpu-clock", NULL);
        if (err)
                goto out;
-       err = parse_events(evlist, "task-clock");
+       err = parse_events(evlist, "task-clock", NULL);
        if (err)
                goto out;
 
index 278ba83..8c102b0 100644 (file)
@@ -91,8 +91,10 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
 
                        he = __hists__add_entry(hists, &al, NULL,
                                                NULL, NULL, 1, 1, 0, true);
-                       if (he == NULL)
+                       if (he == NULL) {
+                               addr_location__put(&al);
                                goto out;
+                       }
 
                        fake_common_samples[k].thread = al.thread;
                        fake_common_samples[k].map = al.map;
@@ -115,8 +117,10 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
 
                        he = __hists__add_entry(hists, &al, NULL,
                                                NULL, NULL, 1, 1, 0, true);
-                       if (he == NULL)
+                       if (he == NULL) {
+                               addr_location__put(&al);
                                goto out;
+                       }
 
                        fake_samples[i][k].thread = al.thread;
                        fake_samples[i][k].map = al.map;
@@ -282,10 +286,10 @@ int test__hists_link(void)
        if (evlist == NULL)
                 return -ENOMEM;
 
-       err = parse_events(evlist, "cpu-clock");
+       err = parse_events(evlist, "cpu-clock", NULL);
        if (err)
                goto out;
-       err = parse_events(evlist, "task-clock");
+       err = parse_events(evlist, "task-clock", NULL);
        if (err)
                goto out;
 
index b52c9fa..adbebc8 100644 (file)
@@ -57,6 +57,8 @@ static int add_hist_entries(struct hists *hists, struct machine *machine)
                        },
                };
                struct hist_entry_iter iter = {
+                       .evsel = evsel,
+                       .sample = &sample,
                        .ops = &hist_iter_normal,
                        .hide_unresolved = false,
                };
@@ -70,9 +72,11 @@ static int add_hist_entries(struct hists *hists, struct machine *machine)
                                                  &sample) < 0)
                        goto out;
 
-               if (hist_entry_iter__add(&iter, &al, evsel, &sample,
-                                        PERF_MAX_STACK_DEPTH, NULL) < 0)
+               if (hist_entry_iter__add(&iter, &al, PERF_MAX_STACK_DEPTH,
+                                        NULL) < 0) {
+                       addr_location__put(&al);
                        goto out;
+               }
 
                fake_samples[i].thread = al.thread;
                fake_samples[i].map = al.map;
@@ -590,7 +594,7 @@ int test__hists_output(void)
 
        TEST_ASSERT_VAL("No memory", evlist);
 
-       err = parse_events(evlist, "cpu-clock");
+       err = parse_events(evlist, "cpu-clock", NULL);
        if (err)
                goto out;
 
index 7a5ab7b..5b171d1 100644 (file)
@@ -78,8 +78,8 @@ int test__keep_tracking(void)
 
        perf_evlist__set_maps(evlist, cpus, threads);
 
-       CHECK__(parse_events(evlist, "dummy:u"));
-       CHECK__(parse_events(evlist, "cycles:u"));
+       CHECK__(parse_events(evlist, "dummy:u", NULL));
+       CHECK__(parse_events(evlist, "cycles:u", NULL));
 
        perf_evlist__config(evlist, &opts);
 
index e8d7cbb..08c433b 100644 (file)
@@ -34,9 +34,21 @@ static int test(const char *path, bool alloc_name, bool alloc_ext,
        return 0;
 }
 
+static int test_is_kernel_module(const char *path, int cpumode, bool expect)
+{
+       TEST_ASSERT_VAL("is_kernel_module",
+                       (!!is_kernel_module(path, cpumode)) == (!!expect));
+       pr_debug("%s (cpumode: %d) - is_kernel_module: %s\n",
+                       path, cpumode, expect ? "true" : "false");
+       return 0;
+}
+
 #define T(path, an, ae, k, c, n, e) \
        TEST_ASSERT_VAL("failed", !test(path, an, ae, k, c, n, e))
 
+#define M(path, c, e) \
+       TEST_ASSERT_VAL("failed", !test_is_kernel_module(path, c, e))
+
 int test__kmod_path__parse(void)
 {
        /* path                alloc_name  alloc_ext   kmod  comp   name     ext */
@@ -44,30 +56,90 @@ int test__kmod_path__parse(void)
        T("/xxxx/xxxx/x-x.ko", false     , true      , true, false, NULL   , NULL);
        T("/xxxx/xxxx/x-x.ko", true      , false     , true, false, "[x_x]", NULL);
        T("/xxxx/xxxx/x-x.ko", false     , false     , true, false, NULL   , NULL);
+       M("/xxxx/xxxx/x-x.ko", PERF_RECORD_MISC_CPUMODE_UNKNOWN, true);
+       M("/xxxx/xxxx/x-x.ko", PERF_RECORD_MISC_KERNEL, true);
+       M("/xxxx/xxxx/x-x.ko", PERF_RECORD_MISC_USER, false);
 
        /* path                alloc_name  alloc_ext   kmod  comp  name   ext */
        T("/xxxx/xxxx/x.ko.gz", true     , true      , true, true, "[x]", "gz");
        T("/xxxx/xxxx/x.ko.gz", false    , true      , true, true, NULL , "gz");
        T("/xxxx/xxxx/x.ko.gz", true     , false     , true, true, "[x]", NULL);
        T("/xxxx/xxxx/x.ko.gz", false    , false     , true, true, NULL , NULL);
+       M("/xxxx/xxxx/x.ko.gz", PERF_RECORD_MISC_CPUMODE_UNKNOWN, true);
+       M("/xxxx/xxxx/x.ko.gz", PERF_RECORD_MISC_KERNEL, true);
+       M("/xxxx/xxxx/x.ko.gz", PERF_RECORD_MISC_USER, false);
 
        /* path              alloc_name  alloc_ext  kmod   comp  name    ext */
        T("/xxxx/xxxx/x.gz", true      , true     , false, true, "x.gz" ,"gz");
        T("/xxxx/xxxx/x.gz", false     , true     , false, true, NULL   ,"gz");
        T("/xxxx/xxxx/x.gz", true      , false    , false, true, "x.gz" , NULL);
        T("/xxxx/xxxx/x.gz", false     , false    , false, true, NULL   , NULL);
+       M("/xxxx/xxxx/x.gz", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false);
+       M("/xxxx/xxxx/x.gz", PERF_RECORD_MISC_KERNEL, false);
+       M("/xxxx/xxxx/x.gz", PERF_RECORD_MISC_USER, false);
 
        /* path   alloc_name  alloc_ext  kmod   comp  name     ext */
        T("x.gz", true      , true     , false, true, "x.gz", "gz");
        T("x.gz", false     , true     , false, true, NULL  , "gz");
        T("x.gz", true      , false    , false, true, "x.gz", NULL);
        T("x.gz", false     , false    , false, true, NULL  , NULL);
+       M("x.gz", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false);
+       M("x.gz", PERF_RECORD_MISC_KERNEL, false);
+       M("x.gz", PERF_RECORD_MISC_USER, false);
 
        /* path      alloc_name  alloc_ext  kmod  comp  name  ext */
        T("x.ko.gz", true      , true     , true, true, "[x]", "gz");
        T("x.ko.gz", false     , true     , true, true, NULL , "gz");
        T("x.ko.gz", true      , false    , true, true, "[x]", NULL);
        T("x.ko.gz", false     , false    , true, true, NULL , NULL);
+       M("x.ko.gz", PERF_RECORD_MISC_CPUMODE_UNKNOWN, true);
+       M("x.ko.gz", PERF_RECORD_MISC_KERNEL, true);
+       M("x.ko.gz", PERF_RECORD_MISC_USER, false);
+
+       /* path            alloc_name  alloc_ext  kmod  comp   name             ext */
+       T("[test_module]", true      , true     , true, false, "[test_module]", NULL);
+       T("[test_module]", false     , true     , true, false, NULL           , NULL);
+       T("[test_module]", true      , false    , true, false, "[test_module]", NULL);
+       T("[test_module]", false     , false    , true, false, NULL           , NULL);
+       M("[test_module]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, true);
+       M("[test_module]", PERF_RECORD_MISC_KERNEL, true);
+       M("[test_module]", PERF_RECORD_MISC_USER, false);
+
+       /* path            alloc_name  alloc_ext  kmod  comp   name             ext */
+       T("[test.module]", true      , true     , true, false, "[test.module]", NULL);
+       T("[test.module]", false     , true     , true, false, NULL           , NULL);
+       T("[test.module]", true      , false    , true, false, "[test.module]", NULL);
+       T("[test.module]", false     , false    , true, false, NULL           , NULL);
+       M("[test.module]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, true);
+       M("[test.module]", PERF_RECORD_MISC_KERNEL, true);
+       M("[test.module]", PERF_RECORD_MISC_USER, false);
+
+       /* path     alloc_name  alloc_ext  kmod   comp   name      ext */
+       T("[vdso]", true      , true     , false, false, "[vdso]", NULL);
+       T("[vdso]", false     , true     , false, false, NULL    , NULL);
+       T("[vdso]", true      , false    , false, false, "[vdso]", NULL);
+       T("[vdso]", false     , false    , false, false, NULL    , NULL);
+       M("[vdso]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false);
+       M("[vdso]", PERF_RECORD_MISC_KERNEL, false);
+       M("[vdso]", PERF_RECORD_MISC_USER, false);
+
+       /* path         alloc_name  alloc_ext  kmod   comp   name          ext */
+       T("[vsyscall]", true      , true     , false, false, "[vsyscall]", NULL);
+       T("[vsyscall]", false     , true     , false, false, NULL        , NULL);
+       T("[vsyscall]", true      , false    , false, false, "[vsyscall]", NULL);
+       T("[vsyscall]", false     , false    , false, false, NULL        , NULL);
+       M("[vsyscall]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false);
+       M("[vsyscall]", PERF_RECORD_MISC_KERNEL, false);
+       M("[vsyscall]", PERF_RECORD_MISC_USER, false);
+
+       /* path                alloc_name  alloc_ext  kmod   comp   name      ext */
+       T("[kernel.kallsyms]", true      , true     , false, false, "[kernel.kallsyms]", NULL);
+       T("[kernel.kallsyms]", false     , true     , false, false, NULL               , NULL);
+       T("[kernel.kallsyms]", true      , false    , false, false, "[kernel.kallsyms]", NULL);
+       T("[kernel.kallsyms]", false     , false    , false, false, NULL               , NULL);
+       M("[kernel.kallsyms]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false);
+       M("[kernel.kallsyms]", PERF_RECORD_MISC_KERNEL, false);
+       M("[kernel.kallsyms]", PERF_RECORD_MISC_USER, false);
 
        return 0;
 }
index bff8532..65280d2 100644 (file)
@@ -32,6 +32,7 @@ make_no_backtrace   := NO_BACKTRACE=1
 make_no_libnuma     := NO_LIBNUMA=1
 make_no_libaudit    := NO_LIBAUDIT=1
 make_no_libbionic   := NO_LIBBIONIC=1
+make_no_auxtrace    := NO_AUXTRACE=1
 make_tags           := tags
 make_cscope         := cscope
 make_help           := help
@@ -52,7 +53,7 @@ make_static         := LDFLAGS=-static
 make_minimal        := NO_LIBPERL=1 NO_LIBPYTHON=1 NO_NEWT=1 NO_GTK2=1
 make_minimal        += NO_DEMANGLE=1 NO_LIBELF=1 NO_LIBUNWIND=1 NO_BACKTRACE=1
 make_minimal        += NO_LIBNUMA=1 NO_LIBAUDIT=1 NO_LIBBIONIC=1
-make_minimal        += NO_LIBDW_DWARF_UNWIND=1
+make_minimal        += NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1
 
 # $(run) contains all available tests
 run := make_pure
@@ -74,6 +75,7 @@ run += make_no_backtrace
 run += make_no_libnuma
 run += make_no_libaudit
 run += make_no_libbionic
+run += make_no_auxtrace
 run += make_help
 run += make_doc
 run += make_perf_o
@@ -223,7 +225,19 @@ tarpkg:
        echo "- $@: $$cmd" && echo $$cmd > $@ && \
        ( eval $$cmd ) >> $@ 2>&1
 
-all: $(run) $(run_O) tarpkg
+make_kernelsrc:
+       @echo " - make -C <kernelsrc> tools/perf"
+       $(call clean); \
+       (make -C ../.. tools/perf) > $@ 2>&1 && \
+       test -x perf && rm -f $@ || (cat $@ ; false)
+
+make_kernelsrc_tools:
+       @echo " - make -C <kernelsrc>/tools perf"
+       $(call clean); \
+       (make -C ../../tools perf) > $@ 2>&1 && \
+       test -x perf && rm -f $@ || (cat $@ ; false)
+
+all: $(run) $(run_O) tarpkg make_kernelsrc make_kernelsrc_tools
        @echo OK
 
 out: $(run_O)
index 9b9622a..5855cf4 100644 (file)
@@ -23,10 +23,8 @@ int test__basic_mmap(void)
        struct cpu_map *cpus;
        struct perf_evlist *evlist;
        cpu_set_t cpu_set;
-       const char *syscall_names[] = { "getsid", "getppid", "getpgrp",
-                                       "getpgid", };
-       pid_t (*syscalls[])(void) = { (void *)getsid, getppid, getpgrp,
-                                     (void*)getpgid };
+       const char *syscall_names[] = { "getsid", "getppid", "getpgid", };
+       pid_t (*syscalls[])(void) = { (void *)getsid, getppid, (void*)getpgid };
 #define nsyscalls ARRAY_SIZE(syscall_names)
        unsigned int nr_events[nsyscalls],
                     expected_nr_events[nsyscalls], i, j;
index 2113f1c..7f48efa 100644 (file)
@@ -129,7 +129,7 @@ static int synth_all(struct machine *machine)
 {
        return perf_event__synthesize_threads(NULL,
                                              perf_event__process,
-                                             machine, 0);
+                                             machine, 0, 500);
 }
 
 static int synth_process(struct machine *machine)
@@ -141,7 +141,7 @@ static int synth_process(struct machine *machine)
 
        err = perf_event__synthesize_thread_map(NULL, map,
                                                perf_event__process,
-                                               machine, 0);
+                                               machine, 0, 500);
 
        thread_map__delete(map);
        return err;
@@ -191,6 +191,8 @@ static int mmap_events(synth_cb synth)
                                      PERF_RECORD_MISC_USER, MAP__FUNCTION,
                                      (unsigned long) (td->map + 1), &al);
 
+               thread__put(thread);
+
                if (!al.map) {
                        pr_debug("failed, couldn't find map\n");
                        err = -1;
similarity index 90%
rename from tools/perf/tests/open-syscall-all-cpus.c
rename to tools/perf/tests/openat-syscall-all-cpus.c
index 3ec885c..9a7a116 100644 (file)
@@ -3,13 +3,14 @@
 #include "thread_map.h"
 #include "cpumap.h"
 #include "debug.h"
+#include "stat.h"
 
-int test__open_syscall_event_on_all_cpus(void)
+int test__openat_syscall_event_on_all_cpus(void)
 {
        int err = -1, fd, cpu;
        struct cpu_map *cpus;
        struct perf_evsel *evsel;
-       unsigned int nr_open_calls = 111, i;
+       unsigned int nr_openat_calls = 111, i;
        cpu_set_t cpu_set;
        struct thread_map *threads = thread_map__new(-1, getpid(), UINT_MAX);
        char sbuf[STRERR_BUFSIZE];
@@ -27,7 +28,7 @@ int test__open_syscall_event_on_all_cpus(void)
 
        CPU_ZERO(&cpu_set);
 
-       evsel = perf_evsel__newtp("syscalls", "sys_enter_open");
+       evsel = perf_evsel__newtp("syscalls", "sys_enter_openat");
        if (evsel == NULL) {
                if (tracefs_configured())
                        pr_debug("is tracefs mounted on /sys/kernel/tracing?\n");
@@ -46,7 +47,7 @@ int test__open_syscall_event_on_all_cpus(void)
        }
 
        for (cpu = 0; cpu < cpus->nr; ++cpu) {
-               unsigned int ncalls = nr_open_calls + cpu;
+               unsigned int ncalls = nr_openat_calls + cpu;
                /*
                 * XXX eventually lift this restriction in a way that
                 * keeps perf building on older glibc installations
@@ -66,7 +67,7 @@ int test__open_syscall_event_on_all_cpus(void)
                        goto out_close_fd;
                }
                for (i = 0; i < ncalls; ++i) {
-                       fd = open("/etc/passwd", O_RDONLY);
+                       fd = openat(0, "/etc/passwd", O_RDONLY);
                        close(fd);
                }
                CPU_CLR(cpus->map[cpu], &cpu_set);
@@ -96,7 +97,7 @@ int test__open_syscall_event_on_all_cpus(void)
                        break;
                }
 
-               expected = nr_open_calls + cpu;
+               expected = nr_openat_calls + cpu;
                if (evsel->counts->cpu[cpu].val != expected) {
                        pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %" PRIu64 "\n",
                                 expected, cpus->map[cpu], evsel->counts->cpu[cpu].val);
similarity index 94%
rename from tools/perf/tests/open-syscall-tp-fields.c
rename to tools/perf/tests/openat-syscall-tp-fields.c
index 127dcae..6245221 100644 (file)
@@ -5,7 +5,7 @@
 #include "tests.h"
 #include "debug.h"
 
-int test__syscall_open_tp_fields(void)
+int test__syscall_openat_tp_fields(void)
 {
        struct record_opts opts = {
                .target = {
@@ -29,7 +29,7 @@ int test__syscall_open_tp_fields(void)
                goto out;
        }
 
-       evsel = perf_evsel__newtp("syscalls", "sys_enter_open");
+       evsel = perf_evsel__newtp("syscalls", "sys_enter_openat");
        if (evsel == NULL) {
                pr_debug("%s: perf_evsel__newtp\n", __func__);
                goto out_delete_evlist;
@@ -66,7 +66,7 @@ int test__syscall_open_tp_fields(void)
        /*
         * Generate the event:
         */
-       open(filename, flags);
+       openat(AT_FDCWD, filename, flags);
 
        while (1) {
                int before = nr_events;
similarity index 79%
rename from tools/perf/tests/open-syscall.c
rename to tools/perf/tests/openat-syscall.c
index 07aa319..9f9491b 100644 (file)
@@ -3,11 +3,11 @@
 #include "debug.h"
 #include "tests.h"
 
-int test__open_syscall_event(void)
+int test__openat_syscall_event(void)
 {
        int err = -1, fd;
        struct perf_evsel *evsel;
-       unsigned int nr_open_calls = 111, i;
+       unsigned int nr_openat_calls = 111, i;
        struct thread_map *threads = thread_map__new(-1, getpid(), UINT_MAX);
        char sbuf[STRERR_BUFSIZE];
 
@@ -16,7 +16,7 @@ int test__open_syscall_event(void)
                return -1;
        }
 
-       evsel = perf_evsel__newtp("syscalls", "sys_enter_open");
+       evsel = perf_evsel__newtp("syscalls", "sys_enter_openat");
        if (evsel == NULL) {
                if (tracefs_configured())
                        pr_debug("is tracefs mounted on /sys/kernel/tracing?\n");
@@ -34,8 +34,8 @@ int test__open_syscall_event(void)
                goto out_evsel_delete;
        }
 
-       for (i = 0; i < nr_open_calls; ++i) {
-               fd = open("/etc/passwd", O_RDONLY);
+       for (i = 0; i < nr_openat_calls; ++i) {
+               fd = openat(0, "/etc/passwd", O_RDONLY);
                close(fd);
        }
 
@@ -44,9 +44,9 @@ int test__open_syscall_event(void)
                goto out_close_fd;
        }
 
-       if (evsel->counts->cpu[0].val != nr_open_calls) {
+       if (evsel->counts->cpu[0].val != nr_openat_calls) {
                pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls, got %" PRIu64 "\n",
-                        nr_open_calls, evsel->counts->cpu[0].val);
+                        nr_openat_calls, evsel->counts->cpu[0].val);
                goto out_close_fd;
        }
 
index 3de7449..d76963f 100644 (file)
@@ -427,7 +427,7 @@ static int test__checkevent_list(struct perf_evlist *evlist)
        TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
        TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
 
-       /* syscalls:sys_enter_open:k */
+       /* syscalls:sys_enter_openat:k */
        evsel = perf_evsel__next(evsel);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->attr.type);
        TEST_ASSERT_VAL("wrong sample_type",
@@ -665,7 +665,7 @@ static int test__group3(struct perf_evlist *evlist __maybe_unused)
        TEST_ASSERT_VAL("wrong number of entries", 5 == evlist->nr_entries);
        TEST_ASSERT_VAL("wrong number of groups", 2 == evlist->nr_groups);
 
-       /* group1 syscalls:sys_enter_open:H */
+       /* group1 syscalls:sys_enter_openat:H */
        evsel = leader = perf_evlist__first(evlist);
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->attr.type);
        TEST_ASSERT_VAL("wrong sample_type",
@@ -1293,7 +1293,7 @@ struct evlist_test {
 
 static struct evlist_test test__events[] = {
        {
-               .name  = "syscalls:sys_enter_open",
+               .name  = "syscalls:sys_enter_openat",
                .check = test__checkevent_tracepoint,
                .id    = 0,
        },
@@ -1353,7 +1353,7 @@ static struct evlist_test test__events[] = {
                .id    = 11,
        },
        {
-               .name  = "syscalls:sys_enter_open:k",
+               .name  = "syscalls:sys_enter_openat:k",
                .check = test__checkevent_tracepoint_modifier,
                .id    = 12,
        },
@@ -1408,7 +1408,7 @@ static struct evlist_test test__events[] = {
                .id    = 22,
        },
        {
-               .name  = "r1,syscalls:sys_enter_open:k,1:1:hp",
+               .name  = "r1,syscalls:sys_enter_openat:k,1:1:hp",
                .check = test__checkevent_list,
                .id    = 23,
        },
@@ -1443,7 +1443,7 @@ static struct evlist_test test__events[] = {
                .id    = 29,
        },
        {
-               .name  = "group1{syscalls:sys_enter_open:H,cycles:kppp},group2{cycles,1:3}:G,instructions:u",
+               .name  = "group1{syscalls:sys_enter_openat:H,cycles:kppp},group2{cycles,1:3}:G,instructions:u",
                .check = test__group3,
                .id    = 30,
        },
@@ -1571,7 +1571,7 @@ static int test_event(struct evlist_test *e)
        if (evlist == NULL)
                return -ENOMEM;
 
-       ret = parse_events(evlist, e->name);
+       ret = parse_events(evlist, e->name, NULL);
        if (ret) {
                pr_debug("failed to parse event '%s', err %d\n",
                         e->name, ret);
index f238442..5f49484 100644 (file)
@@ -68,7 +68,7 @@ int test__perf_time_to_tsc(void)
 
        perf_evlist__set_maps(evlist, cpus, threads);
 
-       CHECK__(parse_events(evlist, "cycles:u"));
+       CHECK__(parse_events(evlist, "cycles:u", NULL));
 
        perf_evlist__config(evlist, &opts);
 
index eeb68bb..faa04e9 100644 (file)
@@ -152,7 +152,8 @@ int test__pmu(void)
                if (ret)
                        break;
 
-               ret = perf_pmu__config_terms(&formats, &attr, terms, false);
+               ret = perf_pmu__config_terms(&formats, &attr, terms,
+                                            false, NULL);
                if (ret)
                        break;
 
index cc68648..0d31403 100644 (file)
@@ -347,7 +347,7 @@ int test__switch_tracking(void)
        perf_evlist__set_maps(evlist, cpus, threads);
 
        /* First event */
-       err = parse_events(evlist, "cpu-clock:u");
+       err = parse_events(evlist, "cpu-clock:u", NULL);
        if (err) {
                pr_debug("Failed to parse event dummy:u\n");
                goto out_err;
@@ -356,7 +356,7 @@ int test__switch_tracking(void)
        cpu_clocks_evsel = perf_evlist__last(evlist);
 
        /* Second event */
-       err = parse_events(evlist, "cycles:u");
+       err = parse_events(evlist, "cycles:u", NULL);
        if (err) {
                pr_debug("Failed to parse event cycles:u\n");
                goto out_err;
@@ -371,7 +371,7 @@ int test__switch_tracking(void)
                goto out;
        }
 
-       err = parse_events(evlist, sched_switch);
+       err = parse_events(evlist, sched_switch, NULL);
        if (err) {
                pr_debug("Failed to parse event %s\n", sched_switch);
                goto out_err;
@@ -401,7 +401,7 @@ int test__switch_tracking(void)
        perf_evsel__set_sample_bit(cycles_evsel, TIME);
 
        /* Fourth event */
-       err = parse_events(evlist, "dummy:u");
+       err = parse_events(evlist, "dummy:u", NULL);
        if (err) {
                pr_debug("Failed to parse event dummy:u\n");
                goto out_err;
index 52758a3..8e5038b 100644 (file)
@@ -9,6 +9,15 @@ do {                                                                    \
        }                                                                \
 } while (0)
 
+#define TEST_ASSERT_EQUAL(text, val, expected)                          \
+do {                                                                    \
+       if (val != expected) {                                           \
+               pr_debug("FAILED %s:%d %s (%d != %d)\n",                 \
+                        __FILE__, __LINE__, text, val, expected);       \
+               return -1;                                               \
+       }                                                                \
+} while (0)
+
 enum {
        TEST_OK   =  0,
        TEST_FAIL = -1,
@@ -17,14 +26,14 @@ enum {
 
 /* Tests */
 int test__vmlinux_matches_kallsyms(void);
-int test__open_syscall_event(void);
-int test__open_syscall_event_on_all_cpus(void);
+int test__openat_syscall_event(void);
+int test__openat_syscall_event_on_all_cpus(void);
 int test__basic_mmap(void);
 int test__PERF_RECORD(void);
 int test__rdpmc(void);
 int test__perf_evsel__roundtrip_name_test(void);
 int test__perf_evsel__tp_sched_test(void);
-int test__syscall_open_tp_fields(void);
+int test__syscall_openat_tp_fields(void);
 int test__pmu(void);
 int test__attr(void);
 int test__dso_data(void);
@@ -53,7 +62,7 @@ int test__fdarray__filter(void);
 int test__fdarray__add(void);
 int test__kmod_path__parse(void);
 
-#if defined(__x86_64__) || defined(__i386__) || defined(__arm__)
+#if defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__)
 #ifdef HAVE_DWARF_UNWIND_SUPPORT
 struct thread;
 struct perf_sample;
index b028499..01fabb1 100644 (file)
@@ -43,7 +43,7 @@ int test__thread_mg_share(void)
                        leader && t1 && t2 && t3 && other);
 
        mg = leader->mg;
-       TEST_ASSERT_VAL("wrong refcnt", mg->refcnt == 4);
+       TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&mg->refcnt), 4);
 
        /* test the map groups pointer is shared */
        TEST_ASSERT_VAL("map groups don't match", mg == t1->mg);
@@ -58,33 +58,40 @@ int test__thread_mg_share(void)
        other_leader = machine__find_thread(machine, 4, 4);
        TEST_ASSERT_VAL("failed to find other leader", other_leader);
 
+       /*
+        * Ok, now that all the rbtree related operations were done,
+        * lets remove all of them from there so that we can do the
+        * refcounting tests.
+        */
+       machine__remove_thread(machine, leader);
+       machine__remove_thread(machine, t1);
+       machine__remove_thread(machine, t2);
+       machine__remove_thread(machine, t3);
+       machine__remove_thread(machine, other);
+       machine__remove_thread(machine, other_leader);
+
        other_mg = other->mg;
-       TEST_ASSERT_VAL("wrong refcnt", other_mg->refcnt == 2);
+       TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&other_mg->refcnt), 2);
 
        TEST_ASSERT_VAL("map groups don't match", other_mg == other_leader->mg);
 
        /* release thread group */
-       thread__delete(leader);
-       TEST_ASSERT_VAL("wrong refcnt", mg->refcnt == 3);
+       thread__put(leader);
+       TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&mg->refcnt), 3);
 
-       thread__delete(t1);
-       TEST_ASSERT_VAL("wrong refcnt", mg->refcnt == 2);
+       thread__put(t1);
+       TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&mg->refcnt), 2);
 
-       thread__delete(t2);
-       TEST_ASSERT_VAL("wrong refcnt", mg->refcnt == 1);
+       thread__put(t2);
+       TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&mg->refcnt), 1);
 
-       thread__delete(t3);
+       thread__put(t3);
 
        /* release other group  */
-       thread__delete(other_leader);
-       TEST_ASSERT_VAL("wrong refcnt", other_mg->refcnt == 1);
+       thread__put(other_leader);
+       TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&other_mg->refcnt), 1);
 
-       thread__delete(other);
-
-       /*
-        * Cannot call machine__delete_threads(machine) now,
-        * because we've already released all the threads.
-        */
+       thread__put(other);
 
        machines__exit(&machines);
        return 0;
index 3d90880..b34c5fc 100644 (file)
@@ -23,9 +23,10 @@ int test__vmlinux_matches_kallsyms(void)
        int err = -1;
        struct rb_node *nd;
        struct symbol *sym;
-       struct map *kallsyms_map, *vmlinux_map;
+       struct map *kallsyms_map, *vmlinux_map, *map;
        struct machine kallsyms, vmlinux;
        enum map_type type = MAP__FUNCTION;
+       struct maps *maps = &vmlinux.kmaps.maps[type];
        u64 mem_start, mem_end;
 
        /*
@@ -184,8 +185,8 @@ detour:
 
        pr_info("Maps only in vmlinux:\n");
 
-       for (nd = rb_first(&vmlinux.kmaps.maps[type]); nd; nd = rb_next(nd)) {
-               struct map *pos = rb_entry(nd, struct map, rb_node), *pair;
+       for (map = maps__first(maps); map; map = map__next(map)) {
+               struct map *
                /*
                 * If it is the kernel, kallsyms is always "[kernel.kallsyms]", while
                 * the kernel will have the path for the vmlinux file being used,
@@ -193,22 +194,22 @@ detour:
                 * both cases.
                 */
                pair = map_groups__find_by_name(&kallsyms.kmaps, type,
-                                               (pos->dso->kernel ?
-                                                       pos->dso->short_name :
-                                                       pos->dso->name));
+                                               (map->dso->kernel ?
+                                                       map->dso->short_name :
+                                                       map->dso->name));
                if (pair)
                        pair->priv = 1;
                else
-                       map__fprintf(pos, stderr);
+                       map__fprintf(map, stderr);
        }
 
        pr_info("Maps in vmlinux with a different name in kallsyms:\n");
 
-       for (nd = rb_first(&vmlinux.kmaps.maps[type]); nd; nd = rb_next(nd)) {
-               struct map *pos = rb_entry(nd, struct map, rb_node), *pair;
+       for (map = maps__first(maps); map; map = map__next(map)) {
+               struct map *pair;
 
-               mem_start = vmlinux_map->unmap_ip(vmlinux_map, pos->start);
-               mem_end = vmlinux_map->unmap_ip(vmlinux_map, pos->end);
+               mem_start = vmlinux_map->unmap_ip(vmlinux_map, map->start);
+               mem_end = vmlinux_map->unmap_ip(vmlinux_map, map->end);
 
                pair = map_groups__find(&kallsyms.kmaps, type, mem_start);
                if (pair == NULL || pair->priv)
@@ -217,7 +218,7 @@ detour:
                if (pair->start == mem_start) {
                        pair->priv = 1;
                        pr_info(" %" PRIx64 "-%" PRIx64 " %" PRIx64 " %s in kallsyms as",
-                               pos->start, pos->end, pos->pgoff, pos->dso->name);
+                               map->start, map->end, map->pgoff, map->dso->name);
                        if (mem_end != pair->end)
                                pr_info(":\n*%" PRIx64 "-%" PRIx64 " %" PRIx64,
                                        pair->start, pair->end, pair->pgoff);
@@ -228,12 +229,11 @@ detour:
 
        pr_info("Maps only in kallsyms:\n");
 
-       for (nd = rb_first(&kallsyms.kmaps.maps[type]);
-            nd; nd = rb_next(nd)) {
-               struct map *pos = rb_entry(nd, struct map, rb_node);
+       maps = &kallsyms.kmaps.maps[type];
 
-               if (!pos->priv)
-                       map__fprintf(pos, stderr);
+       for (map = maps__first(maps); map; map = map__next(map)) {
+               if (!map->priv)
+                       map__fprintf(map, stderr);
        }
 out:
        machine__exit(&kallsyms);
index e5250eb..5995a8b 100644 (file)
 #include "../../util/evsel.h"
 #include <pthread.h>
 
+struct disasm_line_samples {
+       double          percent;
+       u64             nr;
+};
+
 struct browser_disasm_line {
-       struct rb_node  rb_node;
-       u32             idx;
-       int             idx_asm;
-       int             jump_sources;
+       struct rb_node                  rb_node;
+       u32                             idx;
+       int                             idx_asm;
+       int                             jump_sources;
        /*
         * actual length of this array is saved on the nr_events field
         * of the struct annotate_browser
         */
-       double          percent[1];
+       struct disasm_line_samples      samples[1];
 };
 
 static struct annotate_browser_opt {
@@ -28,7 +33,8 @@ static struct annotate_browser_opt {
             use_offset,
             jump_arrows,
             show_linenr,
-            show_nr_jumps;
+            show_nr_jumps,
+            show_total_period;
 } annotate_browser__opts = {
        .use_offset     = true,
        .jump_arrows    = true,
@@ -105,15 +111,20 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
        char bf[256];
 
        for (i = 0; i < ab->nr_events; i++) {
-               if (bdl->percent[i] > percent_max)
-                       percent_max = bdl->percent[i];
+               if (bdl->samples[i].percent > percent_max)
+                       percent_max = bdl->samples[i].percent;
        }
 
        if (dl->offset != -1 && percent_max != 0.0) {
                for (i = 0; i < ab->nr_events; i++) {
-                       ui_browser__set_percent_color(browser, bdl->percent[i],
+                       ui_browser__set_percent_color(browser,
+                                                     bdl->samples[i].percent,
                                                      current_entry);
-                       slsmg_printf("%6.2f ", bdl->percent[i]);
+                       if (annotate_browser__opts.show_total_period)
+                               slsmg_printf("%6" PRIu64 " ",
+                                            bdl->samples[i].nr);
+                       else
+                               slsmg_printf("%6.2f ", bdl->samples[i].percent);
                }
        } else {
                ui_browser__set_percent_color(browser, 0, current_entry);
@@ -273,9 +284,9 @@ static int disasm__cmp(struct browser_disasm_line *a,
        int i;
 
        for (i = 0; i < nr_pcnt; i++) {
-               if (a->percent[i] == b->percent[i])
+               if (a->samples[i].percent == b->samples[i].percent)
                        continue;
-               return a->percent[i] < b->percent[i];
+               return a->samples[i].percent < b->samples[i].percent;
        }
        return 0;
 }
@@ -366,14 +377,17 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser,
                next = disasm__get_next_ip_line(&notes->src->source, pos);
 
                for (i = 0; i < browser->nr_events; i++) {
-                       bpos->percent[i] = disasm__calc_percent(notes,
+                       u64 nr_samples;
+
+                       bpos->samples[i].percent = disasm__calc_percent(notes,
                                                evsel->idx + i,
                                                pos->offset,
                                                next ? next->offset : len,
-                                               &path);
+                                               &path, &nr_samples);
+                       bpos->samples[i].nr = nr_samples;
 
-                       if (max_percent < bpos->percent[i])
-                               max_percent = bpos->percent[i];
+                       if (max_percent < bpos->samples[i].percent)
+                               max_percent = bpos->samples[i].percent;
                }
 
                if (max_percent < 0.01) {
@@ -737,6 +751,7 @@ static int annotate_browser__run(struct annotate_browser *browser,
                "n             Search next string\n"
                "o             Toggle disassembler output/simplified view\n"
                "s             Toggle source code view\n"
+               "t             Toggle total period view\n"
                "/             Search string\n"
                "k             Toggle line numbers\n"
                "r             Run available scripts\n"
@@ -812,6 +827,11 @@ show_sup_ins:
                                ui_helpline__puts("Actions are only available for 'callq', 'retq' & jump instructions.");
                        }
                        continue;
+               case 't':
+                       annotate_browser__opts.show_total_period =
+                         !annotate_browser__opts.show_total_period;
+                       annotate_browser__update_addr_width(browser);
+                       continue;
                case K_LEFT:
                case K_ESC:
                case 'q':
@@ -832,12 +852,20 @@ out:
 int map_symbol__tui_annotate(struct map_symbol *ms, struct perf_evsel *evsel,
                             struct hist_browser_timer *hbt)
 {
+       /* Set default value for show_total_period.  */
+       annotate_browser__opts.show_total_period =
+         symbol_conf.show_total_period;
+
        return symbol__tui_annotate(ms->sym, ms->map, evsel, hbt);
 }
 
 int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel,
                             struct hist_browser_timer *hbt)
 {
+       /* reset abort key so that it can get Ctrl-C as a key */
+       SLang_reset_tty();
+       SLang_init_tty(0, 0, 0);
+
        return map_symbol__tui_annotate(&he->ms, evsel, hbt);
 }
 
@@ -925,7 +953,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
 
        if (perf_evsel__is_group_event(evsel)) {
                nr_pcnt = evsel->nr_members;
-               sizeof_bdl += sizeof(double) * (nr_pcnt - 1);
+               sizeof_bdl += sizeof(struct disasm_line_samples) *
+                 (nr_pcnt - 1);
        }
 
        if (symbol__annotate(sym, map, sizeof_bdl) < 0) {
@@ -1002,6 +1031,7 @@ static struct annotate_config {
        ANNOTATE_CFG(show_linenr),
        ANNOTATE_CFG(show_nr_jumps),
        ANNOTATE_CFG(use_offset),
+       ANNOTATE_CFG(show_total_period),
 };
 
 #undef ANNOTATE_CFG
index 995b7a8..c42adb6 100644 (file)
@@ -25,6 +25,9 @@ struct hist_browser {
        struct hists        *hists;
        struct hist_entry   *he_selection;
        struct map_symbol   *selection;
+       struct hist_browser_timer *hbt;
+       struct pstack       *pstack;
+       struct perf_session_env *env;
        int                  print_seq;
        bool                 show_dso;
        bool                 show_headers;
@@ -60,7 +63,7 @@ static int hist_browser__get_folding(struct hist_browser *browser)
                struct hist_entry *he =
                        rb_entry(nd, struct hist_entry, rb_node);
 
-               if (he->ms.unfolded)
+               if (he->unfolded)
                        unfolded_rows += he->nr_rows;
        }
        return unfolded_rows;
@@ -136,24 +139,19 @@ static char tree__folded_sign(bool unfolded)
        return unfolded ? '-' : '+';
 }
 
-static char map_symbol__folded(const struct map_symbol *ms)
-{
-       return ms->has_children ? tree__folded_sign(ms->unfolded) : ' ';
-}
-
 static char hist_entry__folded(const struct hist_entry *he)
 {
-       return map_symbol__folded(&he->ms);
+       return he->has_children ? tree__folded_sign(he->unfolded) : ' ';
 }
 
 static char callchain_list__folded(const struct callchain_list *cl)
 {
-       return map_symbol__folded(&cl->ms);
+       return cl->has_children ? tree__folded_sign(cl->unfolded) : ' ';
 }
 
-static void map_symbol__set_folding(struct map_symbol *ms, bool unfold)
+static void callchain_list__set_folding(struct callchain_list *cl, bool unfold)
 {
-       ms->unfolded = unfold ? ms->has_children : false;
+       cl->unfolded = unfold ? cl->has_children : false;
 }
 
 static int callchain_node__count_rows_rb_tree(struct callchain_node *node)
@@ -189,7 +187,7 @@ static int callchain_node__count_rows(struct callchain_node *node)
 
        list_for_each_entry(chain, &node->val, list) {
                ++n;
-               unfolded = chain->ms.unfolded;
+               unfolded = chain->unfolded;
        }
 
        if (unfolded)
@@ -211,15 +209,27 @@ static int callchain__count_rows(struct rb_root *chain)
        return n;
 }
 
-static bool map_symbol__toggle_fold(struct map_symbol *ms)
+static bool hist_entry__toggle_fold(struct hist_entry *he)
 {
-       if (!ms)
+       if (!he)
                return false;
 
-       if (!ms->has_children)
+       if (!he->has_children)
                return false;
 
-       ms->unfolded = !ms->unfolded;
+       he->unfolded = !he->unfolded;
+       return true;
+}
+
+static bool callchain_list__toggle_fold(struct callchain_list *cl)
+{
+       if (!cl)
+               return false;
+
+       if (!cl->has_children)
+               return false;
+
+       cl->unfolded = !cl->unfolded;
        return true;
 }
 
@@ -235,10 +245,10 @@ static void callchain_node__init_have_children_rb_tree(struct callchain_node *no
                list_for_each_entry(chain, &child->val, list) {
                        if (first) {
                                first = false;
-                               chain->ms.has_children = chain->list.next != &child->val ||
+                               chain->has_children = chain->list.next != &child->val ||
                                                         !RB_EMPTY_ROOT(&child->rb_root);
                        } else
-                               chain->ms.has_children = chain->list.next == &child->val &&
+                               chain->has_children = chain->list.next == &child->val &&
                                                         !RB_EMPTY_ROOT(&child->rb_root);
                }
 
@@ -252,11 +262,11 @@ static void callchain_node__init_have_children(struct callchain_node *node,
        struct callchain_list *chain;
 
        chain = list_entry(node->val.next, struct callchain_list, list);
-       chain->ms.has_children = has_sibling;
+       chain->has_children = has_sibling;
 
        if (!list_empty(&node->val)) {
                chain = list_entry(node->val.prev, struct callchain_list, list);
-               chain->ms.has_children = !RB_EMPTY_ROOT(&node->rb_root);
+               chain->has_children = !RB_EMPTY_ROOT(&node->rb_root);
        }
 
        callchain_node__init_have_children_rb_tree(node);
@@ -276,7 +286,7 @@ static void callchain__init_have_children(struct rb_root *root)
 static void hist_entry__init_have_children(struct hist_entry *he)
 {
        if (!he->init_have_children) {
-               he->ms.has_children = !RB_EMPTY_ROOT(&he->sorted_chain);
+               he->has_children = !RB_EMPTY_ROOT(&he->sorted_chain);
                callchain__init_have_children(&he->sorted_chain);
                he->init_have_children = true;
        }
@@ -284,14 +294,22 @@ static void hist_entry__init_have_children(struct hist_entry *he)
 
 static bool hist_browser__toggle_fold(struct hist_browser *browser)
 {
-       if (map_symbol__toggle_fold(browser->selection)) {
-               struct hist_entry *he = browser->he_selection;
+       struct hist_entry *he = browser->he_selection;
+       struct map_symbol *ms = browser->selection;
+       struct callchain_list *cl = container_of(ms, struct callchain_list, ms);
+       bool has_children;
 
+       if (ms == &he->ms)
+               has_children = hist_entry__toggle_fold(he);
+       else
+               has_children = callchain_list__toggle_fold(cl);
+
+       if (has_children) {
                hist_entry__init_have_children(he);
                browser->b.nr_entries -= he->nr_rows;
                browser->nr_callchain_rows -= he->nr_rows;
 
-               if (he->ms.unfolded)
+               if (he->unfolded)
                        he->nr_rows = callchain__count_rows(&he->sorted_chain);
                else
                        he->nr_rows = 0;
@@ -318,8 +336,8 @@ static int callchain_node__set_folding_rb_tree(struct callchain_node *node, bool
 
                list_for_each_entry(chain, &child->val, list) {
                        ++n;
-                       map_symbol__set_folding(&chain->ms, unfold);
-                       has_children = chain->ms.has_children;
+                       callchain_list__set_folding(chain, unfold);
+                       has_children = chain->has_children;
                }
 
                if (has_children)
@@ -337,8 +355,8 @@ static int callchain_node__set_folding(struct callchain_node *node, bool unfold)
 
        list_for_each_entry(chain, &node->val, list) {
                ++n;
-               map_symbol__set_folding(&chain->ms, unfold);
-               has_children = chain->ms.has_children;
+               callchain_list__set_folding(chain, unfold);
+               has_children = chain->has_children;
        }
 
        if (has_children)
@@ -363,9 +381,9 @@ static int callchain__set_folding(struct rb_root *chain, bool unfold)
 static void hist_entry__set_folding(struct hist_entry *he, bool unfold)
 {
        hist_entry__init_have_children(he);
-       map_symbol__set_folding(&he->ms, unfold);
+       he->unfolded = unfold ? he->has_children : false;
 
-       if (he->ms.has_children) {
+       if (he->has_children) {
                int n = callchain__set_folding(&he->sorted_chain, unfold);
                he->nr_rows = unfold ? n : 0;
        } else
@@ -406,11 +424,11 @@ static void ui_browser__warn_lost_events(struct ui_browser *browser)
                "Or reduce the sampling frequency.");
 }
 
-static int hist_browser__run(struct hist_browser *browser,
-                            struct hist_browser_timer *hbt)
+static int hist_browser__run(struct hist_browser *browser, const char *help)
 {
        int key;
        char title[160];
+       struct hist_browser_timer *hbt = browser->hbt;
        int delay_secs = hbt ? hbt->refresh : 0;
 
        browser->b.entries = &browser->hists->entries;
@@ -418,8 +436,7 @@ static int hist_browser__run(struct hist_browser *browser,
 
        hists__browser_title(browser->hists, hbt, title, sizeof(title));
 
-       if (ui_browser__show(&browser->b, title,
-                            "Press '?' for help on key bindings") < 0)
+       if (ui_browser__show(&browser->b, title, help) < 0)
                return -1;
 
        while (1) {
@@ -1016,7 +1033,7 @@ do_offset:
        if (offset > 0) {
                do {
                        h = rb_entry(nd, struct hist_entry, rb_node);
-                       if (h->ms.unfolded) {
+                       if (h->unfolded) {
                                u16 remaining = h->nr_rows - h->row_offset;
                                if (offset > remaining) {
                                        offset -= remaining;
@@ -1037,7 +1054,7 @@ do_offset:
        } else if (offset < 0) {
                while (1) {
                        h = rb_entry(nd, struct hist_entry, rb_node);
-                       if (h->ms.unfolded) {
+                       if (h->unfolded) {
                                if (first) {
                                        if (-offset > h->row_offset) {
                                                offset += h->row_offset;
@@ -1074,7 +1091,7 @@ do_offset:
                                 * row_offset at its last entry.
                                 */
                                h = rb_entry(nd, struct hist_entry, rb_node);
-                               if (h->ms.unfolded)
+                               if (h->unfolded)
                                        h->row_offset = h->nr_rows;
                                break;
                        }
@@ -1195,7 +1212,9 @@ static int hist_browser__dump(struct hist_browser *browser)
        return 0;
 }
 
-static struct hist_browser *hist_browser__new(struct hists *hists)
+static struct hist_browser *hist_browser__new(struct hists *hists,
+                                             struct hist_browser_timer *hbt,
+                                             struct perf_session_env *env)
 {
        struct hist_browser *browser = zalloc(sizeof(*browser));
 
@@ -1206,6 +1225,8 @@ static struct hist_browser *hist_browser__new(struct hists *hists)
                browser->b.seek = ui_browser__hists_seek;
                browser->b.use_navkeypressed = true;
                browser->show_headers = symbol_conf.show_hist_headers;
+               browser->hbt = hbt;
+               browser->env = env;
        }
 
        return browser;
@@ -1395,6 +1416,257 @@ close_file_and_continue:
        return ret;
 }
 
+struct popup_action {
+       struct thread           *thread;
+       struct dso              *dso;
+       struct map_symbol       ms;
+
+       int (*fn)(struct hist_browser *browser, struct popup_action *act);
+};
+
+static int
+do_annotate(struct hist_browser *browser, struct popup_action *act)
+{
+       struct perf_evsel *evsel;
+       struct annotation *notes;
+       struct hist_entry *he;
+       int err;
+
+       if (!objdump_path && perf_session_env__lookup_objdump(browser->env))
+               return 0;
+
+       notes = symbol__annotation(act->ms.sym);
+       if (!notes->src)
+               return 0;
+
+       evsel = hists_to_evsel(browser->hists);
+       err = map_symbol__tui_annotate(&act->ms, evsel, browser->hbt);
+       he = hist_browser__selected_entry(browser);
+       /*
+        * offer option to annotate the other branch source or target
+        * (if they exists) when returning from annotate
+        */
+       if ((err == 'q' || err == CTRL('c')) && he->branch_info)
+               return 1;
+
+       ui_browser__update_nr_entries(&browser->b, browser->hists->nr_entries);
+       if (err)
+               ui_browser__handle_resize(&browser->b);
+       return 0;
+}
+
+static int
+add_annotate_opt(struct hist_browser *browser __maybe_unused,
+                struct popup_action *act, char **optstr,
+                struct map *map, struct symbol *sym)
+{
+       if (sym == NULL || map->dso->annotate_warned)
+               return 0;
+
+       if (asprintf(optstr, "Annotate %s", sym->name) < 0)
+               return 0;
+
+       act->ms.map = map;
+       act->ms.sym = sym;
+       act->fn = do_annotate;
+       return 1;
+}
+
+static int
+do_zoom_thread(struct hist_browser *browser, struct popup_action *act)
+{
+       struct thread *thread = act->thread;
+
+       if (browser->hists->thread_filter) {
+               pstack__remove(browser->pstack, &browser->hists->thread_filter);
+               perf_hpp__set_elide(HISTC_THREAD, false);
+               thread__zput(browser->hists->thread_filter);
+               ui_helpline__pop();
+       } else {
+               ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s(%d) thread\"",
+                                  thread->comm_set ? thread__comm_str(thread) : "",
+                                  thread->tid);
+               browser->hists->thread_filter = thread__get(thread);
+               perf_hpp__set_elide(HISTC_THREAD, false);
+               pstack__push(browser->pstack, &browser->hists->thread_filter);
+       }
+
+       hists__filter_by_thread(browser->hists);
+       hist_browser__reset(browser);
+       return 0;
+}
+
+static int
+add_thread_opt(struct hist_browser *browser, struct popup_action *act,
+              char **optstr, struct thread *thread)
+{
+       if (thread == NULL)
+               return 0;
+
+       if (asprintf(optstr, "Zoom %s %s(%d) thread",
+                    browser->hists->thread_filter ? "out of" : "into",
+                    thread->comm_set ? thread__comm_str(thread) : "",
+                    thread->tid) < 0)
+               return 0;
+
+       act->thread = thread;
+       act->fn = do_zoom_thread;
+       return 1;
+}
+
+static int
+do_zoom_dso(struct hist_browser *browser, struct popup_action *act)
+{
+       struct dso *dso = act->dso;
+
+       if (browser->hists->dso_filter) {
+               pstack__remove(browser->pstack, &browser->hists->dso_filter);
+               perf_hpp__set_elide(HISTC_DSO, false);
+               browser->hists->dso_filter = NULL;
+               ui_helpline__pop();
+       } else {
+               if (dso == NULL)
+                       return 0;
+               ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s DSO\"",
+                                  dso->kernel ? "the Kernel" : dso->short_name);
+               browser->hists->dso_filter = dso;
+               perf_hpp__set_elide(HISTC_DSO, true);
+               pstack__push(browser->pstack, &browser->hists->dso_filter);
+       }
+
+       hists__filter_by_dso(browser->hists);
+       hist_browser__reset(browser);
+       return 0;
+}
+
+static int
+add_dso_opt(struct hist_browser *browser, struct popup_action *act,
+           char **optstr, struct dso *dso)
+{
+       if (dso == NULL)
+               return 0;
+
+       if (asprintf(optstr, "Zoom %s %s DSO",
+                    browser->hists->dso_filter ? "out of" : "into",
+                    dso->kernel ? "the Kernel" : dso->short_name) < 0)
+               return 0;
+
+       act->dso = dso;
+       act->fn = do_zoom_dso;
+       return 1;
+}
+
+static int
+do_browse_map(struct hist_browser *browser __maybe_unused,
+             struct popup_action *act)
+{
+       map__browse(act->ms.map);
+       return 0;
+}
+
+static int
+add_map_opt(struct hist_browser *browser __maybe_unused,
+           struct popup_action *act, char **optstr, struct map *map)
+{
+       if (map == NULL)
+               return 0;
+
+       if (asprintf(optstr, "Browse map details") < 0)
+               return 0;
+
+       act->ms.map = map;
+       act->fn = do_browse_map;
+       return 1;
+}
+
+static int
+do_run_script(struct hist_browser *browser __maybe_unused,
+             struct popup_action *act)
+{
+       char script_opt[64];
+       memset(script_opt, 0, sizeof(script_opt));
+
+       if (act->thread) {
+               scnprintf(script_opt, sizeof(script_opt), " -c %s ",
+                         thread__comm_str(act->thread));
+       } else if (act->ms.sym) {
+               scnprintf(script_opt, sizeof(script_opt), " -S %s ",
+                         act->ms.sym->name);
+       }
+
+       script_browse(script_opt);
+       return 0;
+}
+
+static int
+add_script_opt(struct hist_browser *browser __maybe_unused,
+              struct popup_action *act, char **optstr,
+              struct thread *thread, struct symbol *sym)
+{
+       if (thread) {
+               if (asprintf(optstr, "Run scripts for samples of thread [%s]",
+                            thread__comm_str(thread)) < 0)
+                       return 0;
+       } else if (sym) {
+               if (asprintf(optstr, "Run scripts for samples of symbol [%s]",
+                            sym->name) < 0)
+                       return 0;
+       } else {
+               if (asprintf(optstr, "Run scripts for all samples") < 0)
+                       return 0;
+       }
+
+       act->thread = thread;
+       act->ms.sym = sym;
+       act->fn = do_run_script;
+       return 1;
+}
+
+static int
+do_switch_data(struct hist_browser *browser __maybe_unused,
+              struct popup_action *act __maybe_unused)
+{
+       if (switch_data_file()) {
+               ui__warning("Won't switch the data files due to\n"
+                           "no valid data file get selected!\n");
+               return 0;
+       }
+
+       return K_SWITCH_INPUT_DATA;
+}
+
+static int
+add_switch_opt(struct hist_browser *browser,
+              struct popup_action *act, char **optstr)
+{
+       if (!is_report_browser(browser->hbt))
+               return 0;
+
+       if (asprintf(optstr, "Switch to another data file in PWD") < 0)
+               return 0;
+
+       act->fn = do_switch_data;
+       return 1;
+}
+
+static int
+do_exit_browser(struct hist_browser *browser __maybe_unused,
+               struct popup_action *act __maybe_unused)
+{
+       return 0;
+}
+
+static int
+add_exit_opt(struct hist_browser *browser __maybe_unused,
+            struct popup_action *act, char **optstr)
+{
+       if (asprintf(optstr, "Exit") < 0)
+               return 0;
+
+       act->fn = do_exit_browser;
+       return 1;
+}
+
 static void hist_browser__update_nr_entries(struct hist_browser *hb)
 {
        u64 nr_entries = 0;
@@ -1421,14 +1693,14 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
                                    struct perf_session_env *env)
 {
        struct hists *hists = evsel__hists(evsel);
-       struct hist_browser *browser = hist_browser__new(hists);
+       struct hist_browser *browser = hist_browser__new(hists, hbt, env);
        struct branch_info *bi;
-       struct pstack *fstack;
-       char *options[16];
+#define MAX_OPTIONS  16
+       char *options[MAX_OPTIONS];
+       struct popup_action actions[MAX_OPTIONS];
        int nr_options = 0;
        int key = -1;
        char buf[64];
-       char script_opt[64];
        int delay_secs = hbt ? hbt->refresh : 0;
        struct perf_hpp_fmt *fmt;
 
@@ -1463,23 +1735,29 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
        "t             Zoom into current Thread\n"
        "V             Verbose (DSO names in callchains, etc)\n"
        "z             Toggle zeroing of samples\n"
+       "f             Enable/Disable events\n"
        "/             Filter symbol by name";
 
        if (browser == NULL)
                return -1;
 
+       /* reset abort key so that it can get Ctrl-C as a key */
+       SLang_reset_tty();
+       SLang_init_tty(0, 0, 0);
+
        if (min_pcnt) {
                browser->min_pcnt = min_pcnt;
                hist_browser__update_nr_entries(browser);
        }
 
-       fstack = pstack__new(2);
-       if (fstack == NULL)
+       browser->pstack = pstack__new(2);
+       if (browser->pstack == NULL)
                goto out;
 
        ui_helpline__push(helpline);
 
        memset(options, 0, sizeof(options));
+       memset(actions, 0, sizeof(actions));
 
        perf_hpp__for_each_format(fmt)
                perf_hpp__reset_width(fmt, hists);
@@ -1489,16 +1767,12 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 
        while (1) {
                struct thread *thread = NULL;
-               const struct dso *dso = NULL;
-               int choice = 0,
-                   annotate = -2, zoom_dso = -2, zoom_thread = -2,
-                   annotate_f = -2, annotate_t = -2, browse_map = -2;
-               int scripts_comm = -2, scripts_symbol = -2,
-                   scripts_all = -2, switch_data = -2;
+               struct dso *dso = NULL;
+               int choice = 0;
 
                nr_options = 0;
 
-               key = hist_browser__run(browser, hbt);
+               key = hist_browser__run(browser, helpline);
 
                if (browser->he_selection != NULL) {
                        thread = hist_browser__selected_thread(browser);
@@ -1526,17 +1800,25 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
                            browser->selection->sym == NULL ||
                            browser->selection->map->dso->annotate_warned)
                                continue;
-                       goto do_annotate;
+
+                       actions->ms.map = browser->selection->map;
+                       actions->ms.sym = browser->selection->sym;
+                       do_annotate(browser, actions);
+                       continue;
                case 'P':
                        hist_browser__dump(browser);
                        continue;
                case 'd':
-                       goto zoom_dso;
+                       actions->dso = dso;
+                       do_zoom_dso(browser, actions);
+                       continue;
                case 'V':
                        browser->show_dso = !browser->show_dso;
                        continue;
                case 't':
-                       goto zoom_thread;
+                       actions->thread = thread;
+                       do_zoom_thread(browser, actions);
+                       continue;
                case '/':
                        if (ui_browser__input_window("Symbol to show",
                                        "Please enter the name of symbol you want to see",
@@ -1548,12 +1830,18 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
                        }
                        continue;
                case 'r':
-                       if (is_report_browser(hbt))
-                               goto do_scripts;
+                       if (is_report_browser(hbt)) {
+                               actions->thread = NULL;
+                               actions->ms.sym = NULL;
+                               do_run_script(browser, actions);
+                       }
                        continue;
                case 's':
-                       if (is_report_browser(hbt))
-                               goto do_data_switch;
+                       if (is_report_browser(hbt)) {
+                               key = do_switch_data(browser, actions);
+                               if (key == K_SWITCH_INPUT_DATA)
+                                       goto out_free_stack;
+                       }
                        continue;
                case 'i':
                        /* env->arch is NULL for live-mode (i.e. perf top) */
@@ -1583,7 +1871,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
                case K_LEFT: {
                        const void *top;
 
-                       if (pstack__empty(fstack)) {
+                       if (pstack__empty(browser->pstack)) {
                                /*
                                 * Go back to the perf_evsel_menu__run or other user
                                 */
@@ -1591,11 +1879,17 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
                                        goto out_free_stack;
                                continue;
                        }
-                       top = pstack__pop(fstack);
-                       if (top == &browser->hists->dso_filter)
-                               goto zoom_out_dso;
+                       top = pstack__peek(browser->pstack);
+                       if (top == &browser->hists->dso_filter) {
+                               /*
+                                * No need to set actions->dso here since
+                                * it's just to remove the current filter.
+                                * Ditto for thread below.
+                                */
+                               do_zoom_dso(browser, actions);
+                       }
                        if (top == &browser->hists->thread_filter)
-                               goto zoom_out_thread;
+                               do_zoom_thread(browser, actions);
                        continue;
                }
                case K_ESC:
@@ -1607,7 +1901,12 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
                case 'q':
                case CTRL('c'):
                        goto out_free_stack;
+               case 'f':
+                       if (!is_report_browser(hbt))
+                               goto out_free_stack;
+                       /* Fall thru */
                default:
+                       helpline = "Press '?' for help on key bindings";
                        continue;
                }
 
@@ -1623,196 +1922,71 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
                        if (bi == NULL)
                                goto skip_annotation;
 
-                       if (bi->from.sym != NULL &&
-                           !bi->from.map->dso->annotate_warned &&
-                           asprintf(&options[nr_options], "Annotate %s", bi->from.sym->name) > 0) {
-                               annotate_f = nr_options++;
-                       }
-
-                       if (bi->to.sym != NULL &&
-                           !bi->to.map->dso->annotate_warned &&
-                           (bi->to.sym != bi->from.sym ||
-                            bi->to.map->dso != bi->from.map->dso) &&
-                           asprintf(&options[nr_options], "Annotate %s", bi->to.sym->name) > 0) {
-                               annotate_t = nr_options++;
-                       }
+                       nr_options += add_annotate_opt(browser,
+                                                      &actions[nr_options],
+                                                      &options[nr_options],
+                                                      bi->from.map,
+                                                      bi->from.sym);
+                       if (bi->to.sym != bi->from.sym)
+                               nr_options += add_annotate_opt(browser,
+                                                       &actions[nr_options],
+                                                       &options[nr_options],
+                                                       bi->to.map,
+                                                       bi->to.sym);
                } else {
-                       if (browser->selection->sym != NULL &&
-                           !browser->selection->map->dso->annotate_warned) {
-                               struct annotation *notes;
-
-                               notes = symbol__annotation(browser->selection->sym);
-
-                               if (notes->src &&
-                                   asprintf(&options[nr_options], "Annotate %s",
-                                                browser->selection->sym->name) > 0) {
-                                       annotate = nr_options++;
-                               }
-                       }
+                       nr_options += add_annotate_opt(browser,
+                                                      &actions[nr_options],
+                                                      &options[nr_options],
+                                                      browser->selection->map,
+                                                      browser->selection->sym);
                }
 skip_annotation:
-               if (thread != NULL &&
-                   asprintf(&options[nr_options], "Zoom %s %s(%d) thread",
-                            (browser->hists->thread_filter ? "out of" : "into"),
-                            (thread->comm_set ? thread__comm_str(thread) : ""),
-                            thread->tid) > 0)
-                       zoom_thread = nr_options++;
-
-               if (dso != NULL &&
-                   asprintf(&options[nr_options], "Zoom %s %s DSO",
-                            (browser->hists->dso_filter ? "out of" : "into"),
-                            (dso->kernel ? "the Kernel" : dso->short_name)) > 0)
-                       zoom_dso = nr_options++;
-
-               if (browser->selection != NULL &&
-                   browser->selection->map != NULL &&
-                   asprintf(&options[nr_options], "Browse map details") > 0)
-                       browse_map = nr_options++;
+               nr_options += add_thread_opt(browser, &actions[nr_options],
+                                            &options[nr_options], thread);
+               nr_options += add_dso_opt(browser, &actions[nr_options],
+                                         &options[nr_options], dso);
+               nr_options += add_map_opt(browser, &actions[nr_options],
+                                         &options[nr_options],
+                                         browser->selection->map);
 
                /* perf script support */
                if (browser->he_selection) {
-                       struct symbol *sym;
-
-                       if (asprintf(&options[nr_options], "Run scripts for samples of thread [%s]",
-                                    thread__comm_str(browser->he_selection->thread)) > 0)
-                               scripts_comm = nr_options++;
-
-                       sym = browser->he_selection->ms.sym;
-                       if (sym && sym->namelen &&
-                               asprintf(&options[nr_options], "Run scripts for samples of symbol [%s]",
-                                               sym->name) > 0)
-                               scripts_symbol = nr_options++;
+                       nr_options += add_script_opt(browser,
+                                                    &actions[nr_options],
+                                                    &options[nr_options],
+                                                    thread, NULL);
+                       nr_options += add_script_opt(browser,
+                                                    &actions[nr_options],
+                                                    &options[nr_options],
+                                                    NULL, browser->selection->sym);
                }
-
-               if (asprintf(&options[nr_options], "Run scripts for all samples") > 0)
-                       scripts_all = nr_options++;
-
-               if (is_report_browser(hbt) && asprintf(&options[nr_options],
-                               "Switch to another data file in PWD") > 0)
-                       switch_data = nr_options++;
+               nr_options += add_script_opt(browser, &actions[nr_options],
+                                            &options[nr_options], NULL, NULL);
+               nr_options += add_switch_opt(browser, &actions[nr_options],
+                                            &options[nr_options]);
 add_exit_option:
-               options[nr_options++] = (char *)"Exit";
-retry_popup_menu:
-               choice = ui__popup_menu(nr_options, options);
-
-               if (choice == nr_options - 1)
-                       break;
-
-               if (choice == -1) {
-                       free_popup_options(options, nr_options - 1);
-                       continue;
-               }
-
-               if (choice == annotate || choice == annotate_t || choice == annotate_f) {
-                       struct hist_entry *he;
-                       struct annotation *notes;
-                       struct map_symbol ms;
-                       int err;
-do_annotate:
-                       if (!objdump_path && perf_session_env__lookup_objdump(env))
-                               continue;
-
-                       he = hist_browser__selected_entry(browser);
-                       if (he == NULL)
-                               continue;
-
-                       if (choice == annotate_f) {
-                               ms.map = he->branch_info->from.map;
-                               ms.sym = he->branch_info->from.sym;
-                       } else if (choice == annotate_t) {
-                               ms.map = he->branch_info->to.map;
-                               ms.sym = he->branch_info->to.sym;
-                       } else {
-                               ms = *browser->selection;
-                       }
+               nr_options += add_exit_opt(browser, &actions[nr_options],
+                                          &options[nr_options]);
 
-                       notes = symbol__annotation(ms.sym);
-                       if (!notes->src)
-                               continue;
-
-                       err = map_symbol__tui_annotate(&ms, evsel, hbt);
-                       /*
-                        * offer option to annotate the other branch source or target
-                        * (if they exists) when returning from annotate
-                        */
-                       if ((err == 'q' || err == CTRL('c'))
-                           && annotate_t != -2 && annotate_f != -2)
-                               goto retry_popup_menu;
-
-                       ui_browser__update_nr_entries(&browser->b, browser->hists->nr_entries);
-                       if (err)
-                               ui_browser__handle_resize(&browser->b);
-
-               } else if (choice == browse_map)
-                       map__browse(browser->selection->map);
-               else if (choice == zoom_dso) {
-zoom_dso:
-                       if (browser->hists->dso_filter) {
-                               pstack__remove(fstack, &browser->hists->dso_filter);
-zoom_out_dso:
-                               ui_helpline__pop();
-                               browser->hists->dso_filter = NULL;
-                               perf_hpp__set_elide(HISTC_DSO, false);
-                       } else {
-                               if (dso == NULL)
-                                       continue;
-                               ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s DSO\"",
-                                                  dso->kernel ? "the Kernel" : dso->short_name);
-                               browser->hists->dso_filter = dso;
-                               perf_hpp__set_elide(HISTC_DSO, true);
-                               pstack__push(fstack, &browser->hists->dso_filter);
-                       }
-                       hists__filter_by_dso(hists);
-                       hist_browser__reset(browser);
-               } else if (choice == zoom_thread) {
-zoom_thread:
-                       if (browser->hists->thread_filter) {
-                               pstack__remove(fstack, &browser->hists->thread_filter);
-zoom_out_thread:
-                               ui_helpline__pop();
-                               thread__zput(browser->hists->thread_filter);
-                               perf_hpp__set_elide(HISTC_THREAD, false);
-                       } else {
-                               ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s(%d) thread\"",
-                                                  thread->comm_set ? thread__comm_str(thread) : "",
-                                                  thread->tid);
-                               browser->hists->thread_filter = thread__get(thread);
-                               perf_hpp__set_elide(HISTC_THREAD, false);
-                               pstack__push(fstack, &browser->hists->thread_filter);
-                       }
-                       hists__filter_by_thread(hists);
-                       hist_browser__reset(browser);
-               }
-               /* perf scripts support */
-               else if (choice == scripts_all || choice == scripts_comm ||
-                               choice == scripts_symbol) {
-do_scripts:
-                       memset(script_opt, 0, 64);
+               do {
+                       struct popup_action *act;
 
-                       if (choice == scripts_comm)
-                               sprintf(script_opt, " -c %s ", thread__comm_str(browser->he_selection->thread));
+                       choice = ui__popup_menu(nr_options, options);
+                       if (choice == -1 || choice >= nr_options)
+                               break;
 
-                       if (choice == scripts_symbol)
-                               sprintf(script_opt, " -S %s ", browser->he_selection->ms.sym->name);
+                       act = &actions[choice];
+                       key = act->fn(browser, act);
+               } while (key == 1);
 
-                       script_browse(script_opt);
-               }
-               /* Switch to another data file */
-               else if (choice == switch_data) {
-do_data_switch:
-                       if (!switch_data_file()) {
-                               key = K_SWITCH_INPUT_DATA;
-                               break;
-                       } else
-                               ui__warning("Won't switch the data files due to\n"
-                                       "no valid data file get selected!\n");
-               }
+               if (key == K_SWITCH_INPUT_DATA)
+                       break;
        }
 out_free_stack:
-       pstack__delete(fstack);
+       pstack__delete(browser->pstack);
 out:
        hist_browser__delete(browser);
-       free_popup_options(options, nr_options - 1);
+       free_popup_options(options, MAX_OPTIONS);
        return key;
 }
 
index b77e1d7..60d1f29 100644 (file)
@@ -129,7 +129,7 @@ int ui__init(void)
        err = SLsmg_init_smg();
        if (err < 0)
                goto out;
-       err = SLang_init_tty(0, 0, 0);
+       err = SLang_init_tty(-1, 0, 0);
        if (err < 0)
                goto out;
 
index 797490a..586a59d 100644 (file)
@@ -68,12 +68,15 @@ libperf-y += rblist.o
 libperf-y += intlist.o
 libperf-y += vdso.o
 libperf-y += stat.o
+libperf-y += stat-shadow.o
 libperf-y += record.o
 libperf-y += srcline.o
 libperf-y += data.o
 libperf-$(CONFIG_X86) += tsc.o
 libperf-y += cloexec.o
 libperf-y += thread-stack.o
+libperf-$(CONFIG_AUXTRACE) += auxtrace.o
+libperf-y += parse-branch-options.o
 
 libperf-$(CONFIG_LIBELF) += symbol-elf.o
 libperf-$(CONFIG_LIBELF) += probe-event.o
@@ -101,23 +104,23 @@ CFLAGS_exec_cmd.o += -DPERF_EXEC_PATH="BUILD_STR($(perfexecdir_SQ))" -DPREFIX="B
 
 $(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c
        $(call rule_mkdir)
-       @$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) util/parse-events.l
+       $(Q)$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) util/parse-events.l
 
 $(OUTPUT)util/parse-events-bison.c: util/parse-events.y
        $(call rule_mkdir)
-       @$(call echo-cmd,bison)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $@ -p parse_events_
+       $(Q)$(call echo-cmd,bison)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $@ -p parse_events_
 
 $(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c
        $(call rule_mkdir)
-       @$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/pmu-flex.h util/pmu.l
+       $(Q)$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/pmu-flex.h util/pmu.l
 
 $(OUTPUT)util/pmu-bison.c: util/pmu.y
        $(call rule_mkdir)
-       @$(call echo-cmd,bison)$(BISON) -v util/pmu.y -d -o $@ -p perf_pmu_
+       $(Q)$(call echo-cmd,bison)$(BISON) -v util/pmu.y -d -o $@ -p perf_pmu_
 
 CFLAGS_parse-events-flex.o  += -w
 CFLAGS_pmu-flex.o           += -w
-CFLAGS_parse-events-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w
+CFLAGS_parse-events-bison.o += -DYYENABLE_NLS=0 -w
 CFLAGS_pmu-bison.o          += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w
 
 $(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c
index 7f5bdfc..03b7bc7 100644 (file)
@@ -506,6 +506,17 @@ static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map,
        return 0;
 }
 
+static struct annotation *symbol__get_annotation(struct symbol *sym)
+{
+       struct annotation *notes = symbol__annotation(sym);
+
+       if (notes->src == NULL) {
+               if (symbol__alloc_hist(sym) < 0)
+                       return NULL;
+       }
+       return notes;
+}
+
 static int symbol__inc_addr_samples(struct symbol *sym, struct map *map,
                                    int evidx, u64 addr)
 {
@@ -513,13 +524,9 @@ static int symbol__inc_addr_samples(struct symbol *sym, struct map *map,
 
        if (sym == NULL)
                return 0;
-
-       notes = symbol__annotation(sym);
-       if (notes->src == NULL) {
-               if (symbol__alloc_hist(sym) < 0)
-                       return -ENOMEM;
-       }
-
+       notes = symbol__get_annotation(sym);
+       if (notes == NULL)
+               return -ENOMEM;
        return __symbol__inc_addr_samples(sym, map, notes, evidx, addr);
 }
 
@@ -647,14 +654,15 @@ struct disasm_line *disasm__get_next_ip_line(struct list_head *head, struct disa
 }
 
 double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset,
-                           s64 end, const char **path)
+                           s64 end, const char **path, u64 *nr_samples)
 {
        struct source_line *src_line = notes->src->lines;
        double percent = 0.0;
+       *nr_samples = 0;
 
        if (src_line) {
                size_t sizeof_src_line = sizeof(*src_line) +
-                               sizeof(src_line->p) * (src_line->nr_pcnt - 1);
+                               sizeof(src_line->samples) * (src_line->nr_pcnt - 1);
 
                while (offset < end) {
                        src_line = (void *)notes->src->lines +
@@ -663,7 +671,8 @@ double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset,
                        if (*path == NULL)
                                *path = src_line->path;
 
-                       percent += src_line->p[evidx].percent;
+                       percent += src_line->samples[evidx].percent;
+                       *nr_samples += src_line->samples[evidx].nr;
                        offset++;
                }
        } else {
@@ -673,8 +682,10 @@ double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset,
                while (offset < end)
                        hits += h->addr[offset++];
 
-               if (h->sum)
+               if (h->sum) {
+                       *nr_samples = hits;
                        percent = 100.0 * hits / h->sum;
+               }
        }
 
        return percent;
@@ -689,8 +700,10 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st
 
        if (dl->offset != -1) {
                const char *path = NULL;
+               u64 nr_samples;
                double percent, max_percent = 0.0;
                double *ppercents = &percent;
+               u64 *psamples = &nr_samples;
                int i, nr_percent = 1;
                const char *color;
                struct annotation *notes = symbol__annotation(sym);
@@ -703,8 +716,10 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st
                if (perf_evsel__is_group_event(evsel)) {
                        nr_percent = evsel->nr_members;
                        ppercents = calloc(nr_percent, sizeof(double));
-                       if (ppercents == NULL)
+                       psamples = calloc(nr_percent, sizeof(u64));
+                       if (ppercents == NULL || psamples == NULL) {
                                return -1;
+                       }
                }
 
                for (i = 0; i < nr_percent; i++) {
@@ -712,9 +727,10 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st
                                        notes->src->lines ? i : evsel->idx + i,
                                        offset,
                                        next ? next->offset : (s64) len,
-                                       &path);
+                                       &path, &nr_samples);
 
                        ppercents[i] = percent;
+                       psamples[i] = nr_samples;
                        if (percent > max_percent)
                                max_percent = percent;
                }
@@ -752,8 +768,14 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st
 
                for (i = 0; i < nr_percent; i++) {
                        percent = ppercents[i];
+                       nr_samples = psamples[i];
                        color = get_percent_color(percent);
-                       color_fprintf(stdout, color, " %7.2f", percent);
+
+                       if (symbol_conf.show_total_period)
+                               color_fprintf(stdout, color, " %7" PRIu64,
+                                             nr_samples);
+                       else
+                               color_fprintf(stdout, color, " %7.2f", percent);
                }
 
                printf(" :      ");
@@ -763,6 +785,9 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st
                if (ppercents != &percent)
                        free(ppercents);
 
+               if (psamples != &nr_samples)
+                       free(psamples);
+
        } else if (max_lines && printed >= max_lines)
                return 1;
        else {
@@ -1096,7 +1121,7 @@ static void insert_source_line(struct rb_root *root, struct source_line *src_lin
                ret = strcmp(iter->path, src_line->path);
                if (ret == 0) {
                        for (i = 0; i < src_line->nr_pcnt; i++)
-                               iter->p[i].percent_sum += src_line->p[i].percent;
+                               iter->samples[i].percent_sum += src_line->samples[i].percent;
                        return;
                }
 
@@ -1107,7 +1132,7 @@ static void insert_source_line(struct rb_root *root, struct source_line *src_lin
        }
 
        for (i = 0; i < src_line->nr_pcnt; i++)
-               src_line->p[i].percent_sum = src_line->p[i].percent;
+               src_line->samples[i].percent_sum = src_line->samples[i].percent;
 
        rb_link_node(&src_line->node, parent, p);
        rb_insert_color(&src_line->node, root);
@@ -1118,9 +1143,9 @@ static int cmp_source_line(struct source_line *a, struct source_line *b)
        int i;
 
        for (i = 0; i < a->nr_pcnt; i++) {
-               if (a->p[i].percent_sum == b->p[i].percent_sum)
+               if (a->samples[i].percent_sum == b->samples[i].percent_sum)
                        continue;
-               return a->p[i].percent_sum > b->p[i].percent_sum;
+               return a->samples[i].percent_sum > b->samples[i].percent_sum;
        }
 
        return 0;
@@ -1172,7 +1197,7 @@ static void symbol__free_source_line(struct symbol *sym, int len)
        int i;
 
        sizeof_src_line = sizeof(*src_line) +
-                         (sizeof(src_line->p) * (src_line->nr_pcnt - 1));
+                         (sizeof(src_line->samples) * (src_line->nr_pcnt - 1));
 
        for (i = 0; i < len; i++) {
                free_srcline(src_line->path);
@@ -1204,7 +1229,7 @@ static int symbol__get_source_line(struct symbol *sym, struct map *map,
                        h_sum += h->sum;
                }
                nr_pcnt = evsel->nr_members;
-               sizeof_src_line += (nr_pcnt - 1) * sizeof(src_line->p);
+               sizeof_src_line += (nr_pcnt - 1) * sizeof(src_line->samples);
        }
 
        if (!h_sum)
@@ -1224,10 +1249,10 @@ static int symbol__get_source_line(struct symbol *sym, struct map *map,
 
                for (k = 0; k < nr_pcnt; k++) {
                        h = annotation__histogram(notes, evidx + k);
-                       src_line->p[k].percent = 100.0 * h->addr[i] / h->sum;
+                       src_line->samples[k].percent = 100.0 * h->addr[i] / h->sum;
 
-                       if (src_line->p[k].percent > percent_max)
-                               percent_max = src_line->p[k].percent;
+                       if (src_line->samples[k].percent > percent_max)
+                               percent_max = src_line->samples[k].percent;
                }
 
                if (percent_max <= 0.5)
@@ -1267,7 +1292,7 @@ static void print_summary(struct rb_root *root, const char *filename)
 
                src_line = rb_entry(node, struct source_line, node);
                for (i = 0; i < src_line->nr_pcnt; i++) {
-                       percent = src_line->p[i].percent_sum;
+                       percent = src_line->samples[i].percent_sum;
                        color = get_percent_color(percent);
                        color_fprintf(stdout, color, " %7.2f", percent);
 
index cadbdc9..7e78e6c 100644 (file)
@@ -72,23 +72,24 @@ struct disasm_line *disasm__get_next_ip_line(struct list_head *head, struct disa
 int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw);
 size_t disasm__fprintf(struct list_head *head, FILE *fp);
 double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset,
-                           s64 end, const char **path);
+                           s64 end, const char **path, u64 *nr_samples);
 
 struct sym_hist {
        u64             sum;
        u64             addr[0];
 };
 
-struct source_line_percent {
+struct source_line_samples {
        double          percent;
        double          percent_sum;
+       double          nr;
 };
 
 struct source_line {
        struct rb_node  node;
        char            *path;
        int             nr_pcnt;
-       struct source_line_percent p[1];
+       struct source_line_samples samples[1];
 };
 
 /** struct annotated_source - symbols with hits have this attached as in sannotation
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
new file mode 100644 (file)
index 0000000..df66966
--- /dev/null
@@ -0,0 +1,1352 @@
+/*
+ * auxtrace.c: AUX area trace support
+ * Copyright (c) 2013-2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <stdbool.h>
+
+#include <linux/kernel.h>
+#include <linux/perf_event.h>
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/log2.h>
+#include <linux/string.h>
+
+#include <sys/param.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+#include <errno.h>
+#include <linux/list.h>
+
+#include "../perf.h"
+#include "util.h"
+#include "evlist.h"
+#include "cpumap.h"
+#include "thread_map.h"
+#include "asm/bug.h"
+#include "auxtrace.h"
+
+#include <linux/hash.h>
+
+#include "event.h"
+#include "session.h"
+#include "debug.h"
+#include "parse-options.h"
+
+int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
+                       struct auxtrace_mmap_params *mp,
+                       void *userpg, int fd)
+{
+       struct perf_event_mmap_page *pc = userpg;
+
+#if BITS_PER_LONG != 64 && !defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT)
+       pr_err("Cannot use AUX area tracing mmaps\n");
+       return -1;
+#endif
+
+       WARN_ONCE(mm->base, "Uninitialized auxtrace_mmap\n");
+
+       mm->userpg = userpg;
+       mm->mask = mp->mask;
+       mm->len = mp->len;
+       mm->prev = 0;
+       mm->idx = mp->idx;
+       mm->tid = mp->tid;
+       mm->cpu = mp->cpu;
+
+       if (!mp->len) {
+               mm->base = NULL;
+               return 0;
+       }
+
+       pc->aux_offset = mp->offset;
+       pc->aux_size = mp->len;
+
+       mm->base = mmap(NULL, mp->len, mp->prot, MAP_SHARED, fd, mp->offset);
+       if (mm->base == MAP_FAILED) {
+               pr_debug2("failed to mmap AUX area\n");
+               mm->base = NULL;
+               return -1;
+       }
+
+       return 0;
+}
+
+void auxtrace_mmap__munmap(struct auxtrace_mmap *mm)
+{
+       if (mm->base) {
+               munmap(mm->base, mm->len);
+               mm->base = NULL;
+       }
+}
+
+void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp,
+                               off_t auxtrace_offset,
+                               unsigned int auxtrace_pages,
+                               bool auxtrace_overwrite)
+{
+       if (auxtrace_pages) {
+               mp->offset = auxtrace_offset;
+               mp->len = auxtrace_pages * (size_t)page_size;
+               mp->mask = is_power_of_2(mp->len) ? mp->len - 1 : 0;
+               mp->prot = PROT_READ | (auxtrace_overwrite ? 0 : PROT_WRITE);
+               pr_debug2("AUX area mmap length %zu\n", mp->len);
+       } else {
+               mp->len = 0;
+       }
+}
+
+void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
+                                  struct perf_evlist *evlist, int idx,
+                                  bool per_cpu)
+{
+       mp->idx = idx;
+
+       if (per_cpu) {
+               mp->cpu = evlist->cpus->map[idx];
+               if (evlist->threads)
+                       mp->tid = evlist->threads->map[0];
+               else
+                       mp->tid = -1;
+       } else {
+               mp->cpu = -1;
+               mp->tid = evlist->threads->map[idx];
+       }
+}
+
+#define AUXTRACE_INIT_NR_QUEUES        32
+
+static struct auxtrace_queue *auxtrace_alloc_queue_array(unsigned int nr_queues)
+{
+       struct auxtrace_queue *queue_array;
+       unsigned int max_nr_queues, i;
+
+       max_nr_queues = UINT_MAX / sizeof(struct auxtrace_queue);
+       if (nr_queues > max_nr_queues)
+               return NULL;
+
+       queue_array = calloc(nr_queues, sizeof(struct auxtrace_queue));
+       if (!queue_array)
+               return NULL;
+
+       for (i = 0; i < nr_queues; i++) {
+               INIT_LIST_HEAD(&queue_array[i].head);
+               queue_array[i].priv = NULL;
+       }
+
+       return queue_array;
+}
+
+int auxtrace_queues__init(struct auxtrace_queues *queues)
+{
+       queues->nr_queues = AUXTRACE_INIT_NR_QUEUES;
+       queues->queue_array = auxtrace_alloc_queue_array(queues->nr_queues);
+       if (!queues->queue_array)
+               return -ENOMEM;
+       return 0;
+}
+
+static int auxtrace_queues__grow(struct auxtrace_queues *queues,
+                                unsigned int new_nr_queues)
+{
+       unsigned int nr_queues = queues->nr_queues;
+       struct auxtrace_queue *queue_array;
+       unsigned int i;
+
+       if (!nr_queues)
+               nr_queues = AUXTRACE_INIT_NR_QUEUES;
+
+       while (nr_queues && nr_queues < new_nr_queues)
+               nr_queues <<= 1;
+
+       if (nr_queues < queues->nr_queues || nr_queues < new_nr_queues)
+               return -EINVAL;
+
+       queue_array = auxtrace_alloc_queue_array(nr_queues);
+       if (!queue_array)
+               return -ENOMEM;
+
+       for (i = 0; i < queues->nr_queues; i++) {
+               list_splice_tail(&queues->queue_array[i].head,
+                                &queue_array[i].head);
+               queue_array[i].priv = queues->queue_array[i].priv;
+       }
+
+       queues->nr_queues = nr_queues;
+       queues->queue_array = queue_array;
+
+       return 0;
+}
+
+static void *auxtrace_copy_data(u64 size, struct perf_session *session)
+{
+       int fd = perf_data_file__fd(session->file);
+       void *p;
+       ssize_t ret;
+
+       if (size > SSIZE_MAX)
+               return NULL;
+
+       p = malloc(size);
+       if (!p)
+               return NULL;
+
+       ret = readn(fd, p, size);
+       if (ret != (ssize_t)size) {
+               free(p);
+               return NULL;
+       }
+
+       return p;
+}
+
+static int auxtrace_queues__add_buffer(struct auxtrace_queues *queues,
+                                      unsigned int idx,
+                                      struct auxtrace_buffer *buffer)
+{
+       struct auxtrace_queue *queue;
+       int err;
+
+       if (idx >= queues->nr_queues) {
+               err = auxtrace_queues__grow(queues, idx + 1);
+               if (err)
+                       return err;
+       }
+
+       queue = &queues->queue_array[idx];
+
+       if (!queue->set) {
+               queue->set = true;
+               queue->tid = buffer->tid;
+               queue->cpu = buffer->cpu;
+       } else if (buffer->cpu != queue->cpu || buffer->tid != queue->tid) {
+               pr_err("auxtrace queue conflict: cpu %d, tid %d vs cpu %d, tid %d\n",
+                      queue->cpu, queue->tid, buffer->cpu, buffer->tid);
+               return -EINVAL;
+       }
+
+       buffer->buffer_nr = queues->next_buffer_nr++;
+
+       list_add_tail(&buffer->list, &queue->head);
+
+       queues->new_data = true;
+       queues->populated = true;
+
+       return 0;
+}
+
+/* Limit buffers to 32MiB on 32-bit */
+#define BUFFER_LIMIT_FOR_32_BIT (32 * 1024 * 1024)
+
+static int auxtrace_queues__split_buffer(struct auxtrace_queues *queues,
+                                        unsigned int idx,
+                                        struct auxtrace_buffer *buffer)
+{
+       u64 sz = buffer->size;
+       bool consecutive = false;
+       struct auxtrace_buffer *b;
+       int err;
+
+       while (sz > BUFFER_LIMIT_FOR_32_BIT) {
+               b = memdup(buffer, sizeof(struct auxtrace_buffer));
+               if (!b)
+                       return -ENOMEM;
+               b->size = BUFFER_LIMIT_FOR_32_BIT;
+               b->consecutive = consecutive;
+               err = auxtrace_queues__add_buffer(queues, idx, b);
+               if (err) {
+                       auxtrace_buffer__free(b);
+                       return err;
+               }
+               buffer->data_offset += BUFFER_LIMIT_FOR_32_BIT;
+               sz -= BUFFER_LIMIT_FOR_32_BIT;
+               consecutive = true;
+       }
+
+       buffer->size = sz;
+       buffer->consecutive = consecutive;
+
+       return 0;
+}
+
+static int auxtrace_queues__add_event_buffer(struct auxtrace_queues *queues,
+                                            struct perf_session *session,
+                                            unsigned int idx,
+                                            struct auxtrace_buffer *buffer)
+{
+       if (session->one_mmap) {
+               buffer->data = buffer->data_offset - session->one_mmap_offset +
+                              session->one_mmap_addr;
+       } else if (perf_data_file__is_pipe(session->file)) {
+               buffer->data = auxtrace_copy_data(buffer->size, session);
+               if (!buffer->data)
+                       return -ENOMEM;
+               buffer->data_needs_freeing = true;
+       } else if (BITS_PER_LONG == 32 &&
+                  buffer->size > BUFFER_LIMIT_FOR_32_BIT) {
+               int err;
+
+               err = auxtrace_queues__split_buffer(queues, idx, buffer);
+               if (err)
+                       return err;
+       }
+
+       return auxtrace_queues__add_buffer(queues, idx, buffer);
+}
+
+int auxtrace_queues__add_event(struct auxtrace_queues *queues,
+                              struct perf_session *session,
+                              union perf_event *event, off_t data_offset,
+                              struct auxtrace_buffer **buffer_ptr)
+{
+       struct auxtrace_buffer *buffer;
+       unsigned int idx;
+       int err;
+
+       buffer = zalloc(sizeof(struct auxtrace_buffer));
+       if (!buffer)
+               return -ENOMEM;
+
+       buffer->pid = -1;
+       buffer->tid = event->auxtrace.tid;
+       buffer->cpu = event->auxtrace.cpu;
+       buffer->data_offset = data_offset;
+       buffer->offset = event->auxtrace.offset;
+       buffer->reference = event->auxtrace.reference;
+       buffer->size = event->auxtrace.size;
+       idx = event->auxtrace.idx;
+
+       err = auxtrace_queues__add_event_buffer(queues, session, idx, buffer);
+       if (err)
+               goto out_err;
+
+       if (buffer_ptr)
+               *buffer_ptr = buffer;
+
+       return 0;
+
+out_err:
+       auxtrace_buffer__free(buffer);
+       return err;
+}
+
+static int auxtrace_queues__add_indexed_event(struct auxtrace_queues *queues,
+                                             struct perf_session *session,
+                                             off_t file_offset, size_t sz)
+{
+       union perf_event *event;
+       int err;
+       char buf[PERF_SAMPLE_MAX_SIZE];
+
+       err = perf_session__peek_event(session, file_offset, buf,
+                                      PERF_SAMPLE_MAX_SIZE, &event, NULL);
+       if (err)
+               return err;
+
+       if (event->header.type == PERF_RECORD_AUXTRACE) {
+               if (event->header.size < sizeof(struct auxtrace_event) ||
+                   event->header.size != sz) {
+                       err = -EINVAL;
+                       goto out;
+               }
+               file_offset += event->header.size;
+               err = auxtrace_queues__add_event(queues, session, event,
+                                                file_offset, NULL);
+       }
+out:
+       return err;
+}
+
+void auxtrace_queues__free(struct auxtrace_queues *queues)
+{
+       unsigned int i;
+
+       for (i = 0; i < queues->nr_queues; i++) {
+               while (!list_empty(&queues->queue_array[i].head)) {
+                       struct auxtrace_buffer *buffer;
+
+                       buffer = list_entry(queues->queue_array[i].head.next,
+                                           struct auxtrace_buffer, list);
+                       list_del(&buffer->list);
+                       auxtrace_buffer__free(buffer);
+               }
+       }
+
+       zfree(&queues->queue_array);
+       queues->nr_queues = 0;
+}
+
+static void auxtrace_heapify(struct auxtrace_heap_item *heap_array,
+                            unsigned int pos, unsigned int queue_nr,
+                            u64 ordinal)
+{
+       unsigned int parent;
+
+       while (pos) {
+               parent = (pos - 1) >> 1;
+               if (heap_array[parent].ordinal <= ordinal)
+                       break;
+               heap_array[pos] = heap_array[parent];
+               pos = parent;
+       }
+       heap_array[pos].queue_nr = queue_nr;
+       heap_array[pos].ordinal = ordinal;
+}
+
+int auxtrace_heap__add(struct auxtrace_heap *heap, unsigned int queue_nr,
+                      u64 ordinal)
+{
+       struct auxtrace_heap_item *heap_array;
+
+       if (queue_nr >= heap->heap_sz) {
+               unsigned int heap_sz = AUXTRACE_INIT_NR_QUEUES;
+
+               while (heap_sz <= queue_nr)
+                       heap_sz <<= 1;
+               heap_array = realloc(heap->heap_array,
+                                    heap_sz * sizeof(struct auxtrace_heap_item));
+               if (!heap_array)
+                       return -ENOMEM;
+               heap->heap_array = heap_array;
+               heap->heap_sz = heap_sz;
+       }
+
+       auxtrace_heapify(heap->heap_array, heap->heap_cnt++, queue_nr, ordinal);
+
+       return 0;
+}
+
+void auxtrace_heap__free(struct auxtrace_heap *heap)
+{
+       zfree(&heap->heap_array);
+       heap->heap_cnt = 0;
+       heap->heap_sz = 0;
+}
+
+void auxtrace_heap__pop(struct auxtrace_heap *heap)
+{
+       unsigned int pos, last, heap_cnt = heap->heap_cnt;
+       struct auxtrace_heap_item *heap_array;
+
+       if (!heap_cnt)
+               return;
+
+       heap->heap_cnt -= 1;
+
+       heap_array = heap->heap_array;
+
+       pos = 0;
+       while (1) {
+               unsigned int left, right;
+
+               left = (pos << 1) + 1;
+               if (left >= heap_cnt)
+                       break;
+               right = left + 1;
+               if (right >= heap_cnt) {
+                       heap_array[pos] = heap_array[left];
+                       return;
+               }
+               if (heap_array[left].ordinal < heap_array[right].ordinal) {
+                       heap_array[pos] = heap_array[left];
+                       pos = left;
+               } else {
+                       heap_array[pos] = heap_array[right];
+                       pos = right;
+               }
+       }
+
+       last = heap_cnt - 1;
+       auxtrace_heapify(heap_array, pos, heap_array[last].queue_nr,
+                        heap_array[last].ordinal);
+}
+
+size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr)
+{
+       if (itr)
+               return itr->info_priv_size(itr);
+       return 0;
+}
+
+static int auxtrace_not_supported(void)
+{
+       pr_err("AUX area tracing is not supported on this architecture\n");
+       return -EINVAL;
+}
+
+int auxtrace_record__info_fill(struct auxtrace_record *itr,
+                              struct perf_session *session,
+                              struct auxtrace_info_event *auxtrace_info,
+                              size_t priv_size)
+{
+       if (itr)
+               return itr->info_fill(itr, session, auxtrace_info, priv_size);
+       return auxtrace_not_supported();
+}
+
+void auxtrace_record__free(struct auxtrace_record *itr)
+{
+       if (itr)
+               itr->free(itr);
+}
+
+int auxtrace_record__snapshot_start(struct auxtrace_record *itr)
+{
+       if (itr && itr->snapshot_start)
+               return itr->snapshot_start(itr);
+       return 0;
+}
+
+int auxtrace_record__snapshot_finish(struct auxtrace_record *itr)
+{
+       if (itr && itr->snapshot_finish)
+               return itr->snapshot_finish(itr);
+       return 0;
+}
+
+int auxtrace_record__find_snapshot(struct auxtrace_record *itr, int idx,
+                                  struct auxtrace_mmap *mm,
+                                  unsigned char *data, u64 *head, u64 *old)
+{
+       if (itr && itr->find_snapshot)
+               return itr->find_snapshot(itr, idx, mm, data, head, old);
+       return 0;
+}
+
+int auxtrace_record__options(struct auxtrace_record *itr,
+                            struct perf_evlist *evlist,
+                            struct record_opts *opts)
+{
+       if (itr)
+               return itr->recording_options(itr, evlist, opts);
+       return 0;
+}
+
+u64 auxtrace_record__reference(struct auxtrace_record *itr)
+{
+       if (itr)
+               return itr->reference(itr);
+       return 0;
+}
+
+int auxtrace_parse_snapshot_options(struct auxtrace_record *itr,
+                                   struct record_opts *opts, const char *str)
+{
+       if (!str)
+               return 0;
+
+       if (itr)
+               return itr->parse_snapshot_options(itr, opts, str);
+
+       pr_err("No AUX area tracing to snapshot\n");
+       return -EINVAL;
+}
+
+struct auxtrace_record *__weak
+auxtrace_record__init(struct perf_evlist *evlist __maybe_unused, int *err)
+{
+       *err = 0;
+       return NULL;
+}
+
+static int auxtrace_index__alloc(struct list_head *head)
+{
+       struct auxtrace_index *auxtrace_index;
+
+       auxtrace_index = malloc(sizeof(struct auxtrace_index));
+       if (!auxtrace_index)
+               return -ENOMEM;
+
+       auxtrace_index->nr = 0;
+       INIT_LIST_HEAD(&auxtrace_index->list);
+
+       list_add_tail(&auxtrace_index->list, head);
+
+       return 0;
+}
+
+void auxtrace_index__free(struct list_head *head)
+{
+       struct auxtrace_index *auxtrace_index, *n;
+
+       list_for_each_entry_safe(auxtrace_index, n, head, list) {
+               list_del(&auxtrace_index->list);
+               free(auxtrace_index);
+       }
+}
+
+static struct auxtrace_index *auxtrace_index__last(struct list_head *head)
+{
+       struct auxtrace_index *auxtrace_index;
+       int err;
+
+       if (list_empty(head)) {
+               err = auxtrace_index__alloc(head);
+               if (err)
+                       return NULL;
+       }
+
+       auxtrace_index = list_entry(head->prev, struct auxtrace_index, list);
+
+       if (auxtrace_index->nr >= PERF_AUXTRACE_INDEX_ENTRY_COUNT) {
+               err = auxtrace_index__alloc(head);
+               if (err)
+                       return NULL;
+               auxtrace_index = list_entry(head->prev, struct auxtrace_index,
+                                           list);
+       }
+
+       return auxtrace_index;
+}
+
+int auxtrace_index__auxtrace_event(struct list_head *head,
+                                  union perf_event *event, off_t file_offset)
+{
+       struct auxtrace_index *auxtrace_index;
+       size_t nr;
+
+       auxtrace_index = auxtrace_index__last(head);
+       if (!auxtrace_index)
+               return -ENOMEM;
+
+       nr = auxtrace_index->nr;
+       auxtrace_index->entries[nr].file_offset = file_offset;
+       auxtrace_index->entries[nr].sz = event->header.size;
+       auxtrace_index->nr += 1;
+
+       return 0;
+}
+
+static int auxtrace_index__do_write(int fd,
+                                   struct auxtrace_index *auxtrace_index)
+{
+       struct auxtrace_index_entry ent;
+       size_t i;
+
+       for (i = 0; i < auxtrace_index->nr; i++) {
+               ent.file_offset = auxtrace_index->entries[i].file_offset;
+               ent.sz = auxtrace_index->entries[i].sz;
+               if (writen(fd, &ent, sizeof(ent)) != sizeof(ent))
+                       return -errno;
+       }
+       return 0;
+}
+
+int auxtrace_index__write(int fd, struct list_head *head)
+{
+       struct auxtrace_index *auxtrace_index;
+       u64 total = 0;
+       int err;
+
+       list_for_each_entry(auxtrace_index, head, list)
+               total += auxtrace_index->nr;
+
+       if (writen(fd, &total, sizeof(total)) != sizeof(total))
+               return -errno;
+
+       list_for_each_entry(auxtrace_index, head, list) {
+               err = auxtrace_index__do_write(fd, auxtrace_index);
+               if (err)
+                       return err;
+       }
+
+       return 0;
+}
+
+static int auxtrace_index__process_entry(int fd, struct list_head *head,
+                                        bool needs_swap)
+{
+       struct auxtrace_index *auxtrace_index;
+       struct auxtrace_index_entry ent;
+       size_t nr;
+
+       if (readn(fd, &ent, sizeof(ent)) != sizeof(ent))
+               return -1;
+
+       auxtrace_index = auxtrace_index__last(head);
+       if (!auxtrace_index)
+               return -1;
+
+       nr = auxtrace_index->nr;
+       if (needs_swap) {
+               auxtrace_index->entries[nr].file_offset =
+                                               bswap_64(ent.file_offset);
+               auxtrace_index->entries[nr].sz = bswap_64(ent.sz);
+       } else {
+               auxtrace_index->entries[nr].file_offset = ent.file_offset;
+               auxtrace_index->entries[nr].sz = ent.sz;
+       }
+
+       auxtrace_index->nr = nr + 1;
+
+       return 0;
+}
+
+int auxtrace_index__process(int fd, u64 size, struct perf_session *session,
+                           bool needs_swap)
+{
+       struct list_head *head = &session->auxtrace_index;
+       u64 nr;
+
+       if (readn(fd, &nr, sizeof(u64)) != sizeof(u64))
+               return -1;
+
+       if (needs_swap)
+               nr = bswap_64(nr);
+
+       if (sizeof(u64) + nr * sizeof(struct auxtrace_index_entry) > size)
+               return -1;
+
+       while (nr--) {
+               int err;
+
+               err = auxtrace_index__process_entry(fd, head, needs_swap);
+               if (err)
+                       return -1;
+       }
+
+       return 0;
+}
+
+static int auxtrace_queues__process_index_entry(struct auxtrace_queues *queues,
+                                               struct perf_session *session,
+                                               struct auxtrace_index_entry *ent)
+{
+       return auxtrace_queues__add_indexed_event(queues, session,
+                                                 ent->file_offset, ent->sz);
+}
+
+int auxtrace_queues__process_index(struct auxtrace_queues *queues,
+                                  struct perf_session *session)
+{
+       struct auxtrace_index *auxtrace_index;
+       struct auxtrace_index_entry *ent;
+       size_t i;
+       int err;
+
+       list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) {
+               for (i = 0; i < auxtrace_index->nr; i++) {
+                       ent = &auxtrace_index->entries[i];
+                       err = auxtrace_queues__process_index_entry(queues,
+                                                                  session,
+                                                                  ent);
+                       if (err)
+                               return err;
+               }
+       }
+       return 0;
+}
+
+struct auxtrace_buffer *auxtrace_buffer__next(struct auxtrace_queue *queue,
+                                             struct auxtrace_buffer *buffer)
+{
+       if (buffer) {
+               if (list_is_last(&buffer->list, &queue->head))
+                       return NULL;
+               return list_entry(buffer->list.next, struct auxtrace_buffer,
+                                 list);
+       } else {
+               if (list_empty(&queue->head))
+                       return NULL;
+               return list_entry(queue->head.next, struct auxtrace_buffer,
+                                 list);
+       }
+}
+
+void *auxtrace_buffer__get_data(struct auxtrace_buffer *buffer, int fd)
+{
+       size_t adj = buffer->data_offset & (page_size - 1);
+       size_t size = buffer->size + adj;
+       off_t file_offset = buffer->data_offset - adj;
+       void *addr;
+
+       if (buffer->data)
+               return buffer->data;
+
+       addr = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, file_offset);
+       if (addr == MAP_FAILED)
+               return NULL;
+
+       buffer->mmap_addr = addr;
+       buffer->mmap_size = size;
+
+       buffer->data = addr + adj;
+
+       return buffer->data;
+}
+
+void auxtrace_buffer__put_data(struct auxtrace_buffer *buffer)
+{
+       if (!buffer->data || !buffer->mmap_addr)
+               return;
+       munmap(buffer->mmap_addr, buffer->mmap_size);
+       buffer->mmap_addr = NULL;
+       buffer->mmap_size = 0;
+       buffer->data = NULL;
+       buffer->use_data = NULL;
+}
+
+void auxtrace_buffer__drop_data(struct auxtrace_buffer *buffer)
+{
+       auxtrace_buffer__put_data(buffer);
+       if (buffer->data_needs_freeing) {
+               buffer->data_needs_freeing = false;
+               zfree(&buffer->data);
+               buffer->use_data = NULL;
+               buffer->size = 0;
+       }
+}
+
+void auxtrace_buffer__free(struct auxtrace_buffer *buffer)
+{
+       auxtrace_buffer__drop_data(buffer);
+       free(buffer);
+}
+
+void auxtrace_synth_error(struct auxtrace_error_event *auxtrace_error, int type,
+                         int code, int cpu, pid_t pid, pid_t tid, u64 ip,
+                         const char *msg)
+{
+       size_t size;
+
+       memset(auxtrace_error, 0, sizeof(struct auxtrace_error_event));
+
+       auxtrace_error->header.type = PERF_RECORD_AUXTRACE_ERROR;
+       auxtrace_error->type = type;
+       auxtrace_error->code = code;
+       auxtrace_error->cpu = cpu;
+       auxtrace_error->pid = pid;
+       auxtrace_error->tid = tid;
+       auxtrace_error->ip = ip;
+       strlcpy(auxtrace_error->msg, msg, MAX_AUXTRACE_ERROR_MSG);
+
+       size = (void *)auxtrace_error->msg - (void *)auxtrace_error +
+              strlen(auxtrace_error->msg) + 1;
+       auxtrace_error->header.size = PERF_ALIGN(size, sizeof(u64));
+}
+
+int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr,
+                                        struct perf_tool *tool,
+                                        struct perf_session *session,
+                                        perf_event__handler_t process)
+{
+       union perf_event *ev;
+       size_t priv_size;
+       int err;
+
+       pr_debug2("Synthesizing auxtrace information\n");
+       priv_size = auxtrace_record__info_priv_size(itr);
+       ev = zalloc(sizeof(struct auxtrace_info_event) + priv_size);
+       if (!ev)
+               return -ENOMEM;
+
+       ev->auxtrace_info.header.type = PERF_RECORD_AUXTRACE_INFO;
+       ev->auxtrace_info.header.size = sizeof(struct auxtrace_info_event) +
+                                       priv_size;
+       err = auxtrace_record__info_fill(itr, session, &ev->auxtrace_info,
+                                        priv_size);
+       if (err)
+               goto out_free;
+
+       err = process(tool, ev, NULL, NULL);
+out_free:
+       free(ev);
+       return err;
+}
+
+static bool auxtrace__dont_decode(struct perf_session *session)
+{
+       return !session->itrace_synth_opts ||
+              session->itrace_synth_opts->dont_decode;
+}
+
+int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused,
+                                     union perf_event *event,
+                                     struct perf_session *session __maybe_unused)
+{
+       enum auxtrace_type type = event->auxtrace_info.type;
+
+       if (dump_trace)
+               fprintf(stdout, " type: %u\n", type);
+
+       switch (type) {
+       case PERF_AUXTRACE_UNKNOWN:
+       default:
+               return -EINVAL;
+       }
+}
+
+s64 perf_event__process_auxtrace(struct perf_tool *tool,
+                                union perf_event *event,
+                                struct perf_session *session)
+{
+       s64 err;
+
+       if (dump_trace)
+               fprintf(stdout, " size: %#"PRIx64"  offset: %#"PRIx64"  ref: %#"PRIx64"  idx: %u  tid: %d  cpu: %d\n",
+                       event->auxtrace.size, event->auxtrace.offset,
+                       event->auxtrace.reference, event->auxtrace.idx,
+                       event->auxtrace.tid, event->auxtrace.cpu);
+
+       if (auxtrace__dont_decode(session))
+               return event->auxtrace.size;
+
+       if (!session->auxtrace || event->header.type != PERF_RECORD_AUXTRACE)
+               return -EINVAL;
+
+       err = session->auxtrace->process_auxtrace_event(session, event, tool);
+       if (err < 0)
+               return err;
+
+       return event->auxtrace.size;
+}
+
+#define PERF_ITRACE_DEFAULT_PERIOD_TYPE                PERF_ITRACE_PERIOD_NANOSECS
+#define PERF_ITRACE_DEFAULT_PERIOD             100000
+#define PERF_ITRACE_DEFAULT_CALLCHAIN_SZ       16
+#define PERF_ITRACE_MAX_CALLCHAIN_SZ           1024
+
+void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts)
+{
+       synth_opts->instructions = true;
+       synth_opts->branches = true;
+       synth_opts->transactions = true;
+       synth_opts->errors = true;
+       synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE;
+       synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
+       synth_opts->callchain_sz = PERF_ITRACE_DEFAULT_CALLCHAIN_SZ;
+}
+
+/*
+ * Please check tools/perf/Documentation/perf-script.txt for information
+ * about the options parsed here, which is introduced after this cset,
+ * when support in 'perf script' for these options is introduced.
+ */
+int itrace_parse_synth_opts(const struct option *opt, const char *str,
+                           int unset)
+{
+       struct itrace_synth_opts *synth_opts = opt->value;
+       const char *p;
+       char *endptr;
+
+       synth_opts->set = true;
+
+       if (unset) {
+               synth_opts->dont_decode = true;
+               return 0;
+       }
+
+       if (!str) {
+               itrace_synth_opts__set_default(synth_opts);
+               return 0;
+       }
+
+       for (p = str; *p;) {
+               switch (*p++) {
+               case 'i':
+                       synth_opts->instructions = true;
+                       while (*p == ' ' || *p == ',')
+                               p += 1;
+                       if (isdigit(*p)) {
+                               synth_opts->period = strtoull(p, &endptr, 10);
+                               p = endptr;
+                               while (*p == ' ' || *p == ',')
+                                       p += 1;
+                               switch (*p++) {
+                               case 'i':
+                                       synth_opts->period_type =
+                                               PERF_ITRACE_PERIOD_INSTRUCTIONS;
+                                       break;
+                               case 't':
+                                       synth_opts->period_type =
+                                               PERF_ITRACE_PERIOD_TICKS;
+                                       break;
+                               case 'm':
+                                       synth_opts->period *= 1000;
+                                       /* Fall through */
+                               case 'u':
+                                       synth_opts->period *= 1000;
+                                       /* Fall through */
+                               case 'n':
+                                       if (*p++ != 's')
+                                               goto out_err;
+                                       synth_opts->period_type =
+                                               PERF_ITRACE_PERIOD_NANOSECS;
+                                       break;
+                               case '\0':
+                                       goto out;
+                               default:
+                                       goto out_err;
+                               }
+                       }
+                       break;
+               case 'b':
+                       synth_opts->branches = true;
+                       break;
+               case 'x':
+                       synth_opts->transactions = true;
+                       break;
+               case 'e':
+                       synth_opts->errors = true;
+                       break;
+               case 'd':
+                       synth_opts->log = true;
+                       break;
+               case 'c':
+                       synth_opts->branches = true;
+                       synth_opts->calls = true;
+                       break;
+               case 'r':
+                       synth_opts->branches = true;
+                       synth_opts->returns = true;
+                       break;
+               case 'g':
+                       synth_opts->callchain = true;
+                       synth_opts->callchain_sz =
+                                       PERF_ITRACE_DEFAULT_CALLCHAIN_SZ;
+                       while (*p == ' ' || *p == ',')
+                               p += 1;
+                       if (isdigit(*p)) {
+                               unsigned int val;
+
+                               val = strtoul(p, &endptr, 10);
+                               p = endptr;
+                               if (!val || val > PERF_ITRACE_MAX_CALLCHAIN_SZ)
+                                       goto out_err;
+                               synth_opts->callchain_sz = val;
+                       }
+                       break;
+               case ' ':
+               case ',':
+                       break;
+               default:
+                       goto out_err;
+               }
+       }
+out:
+       if (synth_opts->instructions) {
+               if (!synth_opts->period_type)
+                       synth_opts->period_type =
+                                       PERF_ITRACE_DEFAULT_PERIOD_TYPE;
+               if (!synth_opts->period)
+                       synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
+       }
+
+       return 0;
+
+out_err:
+       pr_err("Bad Instruction Tracing options '%s'\n", str);
+       return -EINVAL;
+}
+
+static const char * const auxtrace_error_type_name[] = {
+       [PERF_AUXTRACE_ERROR_ITRACE] = "instruction trace",
+};
+
+static const char *auxtrace_error_name(int type)
+{
+       const char *error_type_name = NULL;
+
+       if (type < PERF_AUXTRACE_ERROR_MAX)
+               error_type_name = auxtrace_error_type_name[type];
+       if (!error_type_name)
+               error_type_name = "unknown AUX";
+       return error_type_name;
+}
+
+size_t perf_event__fprintf_auxtrace_error(union perf_event *event, FILE *fp)
+{
+       struct auxtrace_error_event *e = &event->auxtrace_error;
+       int ret;
+
+       ret = fprintf(fp, " %s error type %u",
+                     auxtrace_error_name(e->type), e->type);
+       ret += fprintf(fp, " cpu %d pid %d tid %d ip %#"PRIx64" code %u: %s\n",
+                      e->cpu, e->pid, e->tid, e->ip, e->code, e->msg);
+       return ret;
+}
+
+void perf_session__auxtrace_error_inc(struct perf_session *session,
+                                     union perf_event *event)
+{
+       struct auxtrace_error_event *e = &event->auxtrace_error;
+
+       if (e->type < PERF_AUXTRACE_ERROR_MAX)
+               session->evlist->stats.nr_auxtrace_errors[e->type] += 1;
+}
+
+void events_stats__auxtrace_error_warn(const struct events_stats *stats)
+{
+       int i;
+
+       for (i = 0; i < PERF_AUXTRACE_ERROR_MAX; i++) {
+               if (!stats->nr_auxtrace_errors[i])
+                       continue;
+               ui__warning("%u %s errors\n",
+                           stats->nr_auxtrace_errors[i],
+                           auxtrace_error_name(i));
+       }
+}
+
+int perf_event__process_auxtrace_error(struct perf_tool *tool __maybe_unused,
+                                      union perf_event *event,
+                                      struct perf_session *session)
+{
+       if (auxtrace__dont_decode(session))
+               return 0;
+
+       perf_event__fprintf_auxtrace_error(event, stdout);
+       return 0;
+}
+
+static int __auxtrace_mmap__read(struct auxtrace_mmap *mm,
+                                struct auxtrace_record *itr,
+                                struct perf_tool *tool, process_auxtrace_t fn,
+                                bool snapshot, size_t snapshot_size)
+{
+       u64 head, old = mm->prev, offset, ref;
+       unsigned char *data = mm->base;
+       size_t size, head_off, old_off, len1, len2, padding;
+       union perf_event ev;
+       void *data1, *data2;
+
+       if (snapshot) {
+               head = auxtrace_mmap__read_snapshot_head(mm);
+               if (auxtrace_record__find_snapshot(itr, mm->idx, mm, data,
+                                                  &head, &old))
+                       return -1;
+       } else {
+               head = auxtrace_mmap__read_head(mm);
+       }
+
+       if (old == head)
+               return 0;
+
+       pr_debug3("auxtrace idx %d old %#"PRIx64" head %#"PRIx64" diff %#"PRIx64"\n",
+                 mm->idx, old, head, head - old);
+
+       if (mm->mask) {
+               head_off = head & mm->mask;
+               old_off = old & mm->mask;
+       } else {
+               head_off = head % mm->len;
+               old_off = old % mm->len;
+       }
+
+       if (head_off > old_off)
+               size = head_off - old_off;
+       else
+               size = mm->len - (old_off - head_off);
+
+       if (snapshot && size > snapshot_size)
+               size = snapshot_size;
+
+       ref = auxtrace_record__reference(itr);
+
+       if (head > old || size <= head || mm->mask) {
+               offset = head - size;
+       } else {
+               /*
+                * When the buffer size is not a power of 2, 'head' wraps at the
+                * highest multiple of the buffer size, so we have to subtract
+                * the remainder here.
+                */
+               u64 rem = (0ULL - mm->len) % mm->len;
+
+               offset = head - size - rem;
+       }
+
+       if (size > head_off) {
+               len1 = size - head_off;
+               data1 = &data[mm->len - len1];
+               len2 = head_off;
+               data2 = &data[0];
+       } else {
+               len1 = size;
+               data1 = &data[head_off - len1];
+               len2 = 0;
+               data2 = NULL;
+       }
+
+       /* padding must be written by fn() e.g. record__process_auxtrace() */
+       padding = size & 7;
+       if (padding)
+               padding = 8 - padding;
+
+       memset(&ev, 0, sizeof(ev));
+       ev.auxtrace.header.type = PERF_RECORD_AUXTRACE;
+       ev.auxtrace.header.size = sizeof(ev.auxtrace);
+       ev.auxtrace.size = size + padding;
+       ev.auxtrace.offset = offset;
+       ev.auxtrace.reference = ref;
+       ev.auxtrace.idx = mm->idx;
+       ev.auxtrace.tid = mm->tid;
+       ev.auxtrace.cpu = mm->cpu;
+
+       if (fn(tool, &ev, data1, len1, data2, len2))
+               return -1;
+
+       mm->prev = head;
+
+       if (!snapshot) {
+               auxtrace_mmap__write_tail(mm, head);
+               if (itr->read_finish) {
+                       int err;
+
+                       err = itr->read_finish(itr, mm->idx);
+                       if (err < 0)
+                               return err;
+               }
+       }
+
+       return 1;
+}
+
+int auxtrace_mmap__read(struct auxtrace_mmap *mm, struct auxtrace_record *itr,
+                       struct perf_tool *tool, process_auxtrace_t fn)
+{
+       return __auxtrace_mmap__read(mm, itr, tool, fn, false, 0);
+}
+
+int auxtrace_mmap__read_snapshot(struct auxtrace_mmap *mm,
+                                struct auxtrace_record *itr,
+                                struct perf_tool *tool, process_auxtrace_t fn,
+                                size_t snapshot_size)
+{
+       return __auxtrace_mmap__read(mm, itr, tool, fn, true, snapshot_size);
+}
+
+/**
+ * struct auxtrace_cache - hash table to implement a cache
+ * @hashtable: the hashtable
+ * @sz: hashtable size (number of hlists)
+ * @entry_size: size of an entry
+ * @limit: limit the number of entries to this maximum, when reached the cache
+ *         is dropped and caching begins again with an empty cache
+ * @cnt: current number of entries
+ * @bits: hashtable size (@sz = 2^@bits)
+ */
+struct auxtrace_cache {
+       struct hlist_head *hashtable;
+       size_t sz;
+       size_t entry_size;
+       size_t limit;
+       size_t cnt;
+       unsigned int bits;
+};
+
+struct auxtrace_cache *auxtrace_cache__new(unsigned int bits, size_t entry_size,
+                                          unsigned int limit_percent)
+{
+       struct auxtrace_cache *c;
+       struct hlist_head *ht;
+       size_t sz, i;
+
+       c = zalloc(sizeof(struct auxtrace_cache));
+       if (!c)
+               return NULL;
+
+       sz = 1UL << bits;
+
+       ht = calloc(sz, sizeof(struct hlist_head));
+       if (!ht)
+               goto out_free;
+
+       for (i = 0; i < sz; i++)
+               INIT_HLIST_HEAD(&ht[i]);
+
+       c->hashtable = ht;
+       c->sz = sz;
+       c->entry_size = entry_size;
+       c->limit = (c->sz * limit_percent) / 100;
+       c->bits = bits;
+
+       return c;
+
+out_free:
+       free(c);
+       return NULL;
+}
+
+static void auxtrace_cache__drop(struct auxtrace_cache *c)
+{
+       struct auxtrace_cache_entry *entry;
+       struct hlist_node *tmp;
+       size_t i;
+
+       if (!c)
+               return;
+
+       for (i = 0; i < c->sz; i++) {
+               hlist_for_each_entry_safe(entry, tmp, &c->hashtable[i], hash) {
+                       hlist_del(&entry->hash);
+                       auxtrace_cache__free_entry(c, entry);
+               }
+       }
+
+       c->cnt = 0;
+}
+
+void auxtrace_cache__free(struct auxtrace_cache *c)
+{
+       if (!c)
+               return;
+
+       auxtrace_cache__drop(c);
+       free(c->hashtable);
+       free(c);
+}
+
+void *auxtrace_cache__alloc_entry(struct auxtrace_cache *c)
+{
+       return malloc(c->entry_size);
+}
+
+void auxtrace_cache__free_entry(struct auxtrace_cache *c __maybe_unused,
+                               void *entry)
+{
+       free(entry);
+}
+
+int auxtrace_cache__add(struct auxtrace_cache *c, u32 key,
+                       struct auxtrace_cache_entry *entry)
+{
+       if (c->limit && ++c->cnt > c->limit)
+               auxtrace_cache__drop(c);
+
+       entry->key = key;
+       hlist_add_head(&entry->hash, &c->hashtable[hash_32(key, c->bits)]);
+
+       return 0;
+}
+
+void *auxtrace_cache__lookup(struct auxtrace_cache *c, u32 key)
+{
+       struct auxtrace_cache_entry *entry;
+       struct hlist_head *hlist;
+
+       if (!c)
+               return NULL;
+
+       hlist = &c->hashtable[hash_32(key, c->bits)];
+       hlist_for_each_entry(entry, hlist, hash) {
+               if (entry->key == key)
+                       return entry;
+       }
+
+       return NULL;
+}
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
new file mode 100644 (file)
index 0000000..a171abb
--- /dev/null
@@ -0,0 +1,643 @@
+/*
+ * auxtrace.h: AUX area trace support
+ * Copyright (c) 2013-2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef __PERF_AUXTRACE_H
+#define __PERF_AUXTRACE_H
+
+#include <sys/types.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <linux/list.h>
+#include <linux/perf_event.h>
+#include <linux/types.h>
+
+#include "../perf.h"
+#include "event.h"
+#include "session.h"
+#include "debug.h"
+
+union perf_event;
+struct perf_session;
+struct perf_evlist;
+struct perf_tool;
+struct option;
+struct record_opts;
+struct auxtrace_info_event;
+struct events_stats;
+
+enum auxtrace_type {
+       PERF_AUXTRACE_UNKNOWN,
+};
+
+enum itrace_period_type {
+       PERF_ITRACE_PERIOD_INSTRUCTIONS,
+       PERF_ITRACE_PERIOD_TICKS,
+       PERF_ITRACE_PERIOD_NANOSECS,
+};
+
+/**
+ * struct itrace_synth_opts - AUX area tracing synthesis options.
+ * @set: indicates whether or not options have been set
+ * @inject: indicates the event (not just the sample) must be fully synthesized
+ *          because 'perf inject' will write it out
+ * @instructions: whether to synthesize 'instructions' events
+ * @branches: whether to synthesize 'branches' events
+ * @transactions: whether to synthesize events for transactions
+ * @errors: whether to synthesize decoder error events
+ * @dont_decode: whether to skip decoding entirely
+ * @log: write a decoding log
+ * @calls: limit branch samples to calls (can be combined with @returns)
+ * @returns: limit branch samples to returns (can be combined with @calls)
+ * @callchain: add callchain to 'instructions' events
+ * @callchain_sz: maximum callchain size
+ * @period: 'instructions' events period
+ * @period_type: 'instructions' events period type
+ */
+struct itrace_synth_opts {
+       bool                    set;
+       bool                    inject;
+       bool                    instructions;
+       bool                    branches;
+       bool                    transactions;
+       bool                    errors;
+       bool                    dont_decode;
+       bool                    log;
+       bool                    calls;
+       bool                    returns;
+       bool                    callchain;
+       unsigned int            callchain_sz;
+       unsigned long long      period;
+       enum itrace_period_type period_type;
+};
+
+/**
+ * struct auxtrace_index_entry - indexes a AUX area tracing event within a
+ *                               perf.data file.
+ * @file_offset: offset within the perf.data file
+ * @sz: size of the event
+ */
+struct auxtrace_index_entry {
+       u64                     file_offset;
+       u64                     sz;
+};
+
+#define PERF_AUXTRACE_INDEX_ENTRY_COUNT 256
+
+/**
+ * struct auxtrace_index - index of AUX area tracing events within a perf.data
+ *                         file.
+ * @list: linking a number of arrays of entries
+ * @nr: number of entries
+ * @entries: array of entries
+ */
+struct auxtrace_index {
+       struct list_head        list;
+       size_t                  nr;
+       struct auxtrace_index_entry entries[PERF_AUXTRACE_INDEX_ENTRY_COUNT];
+};
+
+/**
+ * struct auxtrace - session callbacks to allow AUX area data decoding.
+ * @process_event: lets the decoder see all session events
+ * @flush_events: process any remaining data
+ * @free_events: free resources associated with event processing
+ * @free: free resources associated with the session
+ */
+struct auxtrace {
+       int (*process_event)(struct perf_session *session,
+                            union perf_event *event,
+                            struct perf_sample *sample,
+                            struct perf_tool *tool);
+       int (*process_auxtrace_event)(struct perf_session *session,
+                                     union perf_event *event,
+                                     struct perf_tool *tool);
+       int (*flush_events)(struct perf_session *session,
+                           struct perf_tool *tool);
+       void (*free_events)(struct perf_session *session);
+       void (*free)(struct perf_session *session);
+};
+
+/**
+ * struct auxtrace_buffer - a buffer containing AUX area tracing data.
+ * @list: buffers are queued in a list held by struct auxtrace_queue
+ * @size: size of the buffer in bytes
+ * @pid: in per-thread mode, the pid this buffer is associated with
+ * @tid: in per-thread mode, the tid this buffer is associated with
+ * @cpu: in per-cpu mode, the cpu this buffer is associated with
+ * @data: actual buffer data (can be null if the data has not been loaded)
+ * @data_offset: file offset at which the buffer can be read
+ * @mmap_addr: mmap address at which the buffer can be read
+ * @mmap_size: size of the mmap at @mmap_addr
+ * @data_needs_freeing: @data was malloc'd so free it when it is no longer
+ *                      needed
+ * @consecutive: the original data was split up and this buffer is consecutive
+ *               to the previous buffer
+ * @offset: offset as determined by aux_head / aux_tail members of struct
+ *          perf_event_mmap_page
+ * @reference: an implementation-specific reference determined when the data is
+ *             recorded
+ * @buffer_nr: used to number each buffer
+ * @use_size: implementation actually only uses this number of bytes
+ * @use_data: implementation actually only uses data starting at this address
+ */
+struct auxtrace_buffer {
+       struct list_head        list;
+       size_t                  size;
+       pid_t                   pid;
+       pid_t                   tid;
+       int                     cpu;
+       void                    *data;
+       off_t                   data_offset;
+       void                    *mmap_addr;
+       size_t                  mmap_size;
+       bool                    data_needs_freeing;
+       bool                    consecutive;
+       u64                     offset;
+       u64                     reference;
+       u64                     buffer_nr;
+       size_t                  use_size;
+       void                    *use_data;
+};
+
+/**
+ * struct auxtrace_queue - a queue of AUX area tracing data buffers.
+ * @head: head of buffer list
+ * @tid: in per-thread mode, the tid this queue is associated with
+ * @cpu: in per-cpu mode, the cpu this queue is associated with
+ * @set: %true once this queue has been dedicated to a specific thread or cpu
+ * @priv: implementation-specific data
+ */
+struct auxtrace_queue {
+       struct list_head        head;
+       pid_t                   tid;
+       int                     cpu;
+       bool                    set;
+       void                    *priv;
+};
+
+/**
+ * struct auxtrace_queues - an array of AUX area tracing queues.
+ * @queue_array: array of queues
+ * @nr_queues: number of queues
+ * @new_data: set whenever new data is queued
+ * @populated: queues have been fully populated using the auxtrace_index
+ * @next_buffer_nr: used to number each buffer
+ */
+struct auxtrace_queues {
+       struct auxtrace_queue   *queue_array;
+       unsigned int            nr_queues;
+       bool                    new_data;
+       bool                    populated;
+       u64                     next_buffer_nr;
+};
+
+/**
+ * struct auxtrace_heap_item - element of struct auxtrace_heap.
+ * @queue_nr: queue number
+ * @ordinal: value used for sorting (lowest ordinal is top of the heap) expected
+ *           to be a timestamp
+ */
+struct auxtrace_heap_item {
+       unsigned int            queue_nr;
+       u64                     ordinal;
+};
+
+/**
+ * struct auxtrace_heap - a heap suitable for sorting AUX area tracing queues.
+ * @heap_array: the heap
+ * @heap_cnt: the number of elements in the heap
+ * @heap_sz: maximum number of elements (grows as needed)
+ */
+struct auxtrace_heap {
+       struct auxtrace_heap_item       *heap_array;
+       unsigned int            heap_cnt;
+       unsigned int            heap_sz;
+};
+
+/**
+ * struct auxtrace_mmap - records an mmap of the auxtrace buffer.
+ * @base: address of mapped area
+ * @userpg: pointer to buffer's perf_event_mmap_page
+ * @mask: %0 if @len is not a power of two, otherwise (@len - %1)
+ * @len: size of mapped area
+ * @prev: previous aux_head
+ * @idx: index of this mmap
+ * @tid: tid for a per-thread mmap (also set if there is only 1 tid on a per-cpu
+ *       mmap) otherwise %0
+ * @cpu: cpu number for a per-cpu mmap otherwise %-1
+ */
+struct auxtrace_mmap {
+       void            *base;
+       void            *userpg;
+       size_t          mask;
+       size_t          len;
+       u64             prev;
+       int             idx;
+       pid_t           tid;
+       int             cpu;
+};
+
+/**
+ * struct auxtrace_mmap_params - parameters to set up struct auxtrace_mmap.
+ * @mask: %0 if @len is not a power of two, otherwise (@len - %1)
+ * @offset: file offset of mapped area
+ * @len: size of mapped area
+ * @prot: mmap memory protection
+ * @idx: index of this mmap
+ * @tid: tid for a per-thread mmap (also set if there is only 1 tid on a per-cpu
+ *       mmap) otherwise %0
+ * @cpu: cpu number for a per-cpu mmap otherwise %-1
+ */
+struct auxtrace_mmap_params {
+       size_t          mask;
+       off_t           offset;
+       size_t          len;
+       int             prot;
+       int             idx;
+       pid_t           tid;
+       int             cpu;
+};
+
+/**
+ * struct auxtrace_record - callbacks for recording AUX area data.
+ * @recording_options: validate and process recording options
+ * @info_priv_size: return the size of the private data in auxtrace_info_event
+ * @info_fill: fill-in the private data in auxtrace_info_event
+ * @free: free this auxtrace record structure
+ * @snapshot_start: starting a snapshot
+ * @snapshot_finish: finishing a snapshot
+ * @find_snapshot: find data to snapshot within auxtrace mmap
+ * @parse_snapshot_options: parse snapshot options
+ * @reference: provide a 64-bit reference number for auxtrace_event
+ * @read_finish: called after reading from an auxtrace mmap
+ */
+struct auxtrace_record {
+       int (*recording_options)(struct auxtrace_record *itr,
+                                struct perf_evlist *evlist,
+                                struct record_opts *opts);
+       size_t (*info_priv_size)(struct auxtrace_record *itr);
+       int (*info_fill)(struct auxtrace_record *itr,
+                        struct perf_session *session,
+                        struct auxtrace_info_event *auxtrace_info,
+                        size_t priv_size);
+       void (*free)(struct auxtrace_record *itr);
+       int (*snapshot_start)(struct auxtrace_record *itr);
+       int (*snapshot_finish)(struct auxtrace_record *itr);
+       int (*find_snapshot)(struct auxtrace_record *itr, int idx,
+                            struct auxtrace_mmap *mm, unsigned char *data,
+                            u64 *head, u64 *old);
+       int (*parse_snapshot_options)(struct auxtrace_record *itr,
+                                     struct record_opts *opts,
+                                     const char *str);
+       u64 (*reference)(struct auxtrace_record *itr);
+       int (*read_finish)(struct auxtrace_record *itr, int idx);
+};
+
+#ifdef HAVE_AUXTRACE_SUPPORT
+
+/*
+ * In snapshot mode the mmapped page is read-only which makes using
+ * __sync_val_compare_and_swap() problematic.  However, snapshot mode expects
+ * the buffer is not updated while the snapshot is made (e.g. Intel PT disables
+ * the event) so there is not a race anyway.
+ */
+static inline u64 auxtrace_mmap__read_snapshot_head(struct auxtrace_mmap *mm)
+{
+       struct perf_event_mmap_page *pc = mm->userpg;
+       u64 head = ACCESS_ONCE(pc->aux_head);
+
+       /* Ensure all reads are done after we read the head */
+       rmb();
+       return head;
+}
+
+static inline u64 auxtrace_mmap__read_head(struct auxtrace_mmap *mm)
+{
+       struct perf_event_mmap_page *pc = mm->userpg;
+#if BITS_PER_LONG == 64 || !defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT)
+       u64 head = ACCESS_ONCE(pc->aux_head);
+#else
+       u64 head = __sync_val_compare_and_swap(&pc->aux_head, 0, 0);
+#endif
+
+       /* Ensure all reads are done after we read the head */
+       rmb();
+       return head;
+}
+
+static inline void auxtrace_mmap__write_tail(struct auxtrace_mmap *mm, u64 tail)
+{
+       struct perf_event_mmap_page *pc = mm->userpg;
+#if BITS_PER_LONG != 64 && defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT)
+       u64 old_tail;
+#endif
+
+       /* Ensure all reads are done before we write the tail out */
+       mb();
+#if BITS_PER_LONG == 64 || !defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT)
+       pc->aux_tail = tail;
+#else
+       do {
+               old_tail = __sync_val_compare_and_swap(&pc->aux_tail, 0, 0);
+       } while (!__sync_bool_compare_and_swap(&pc->aux_tail, old_tail, tail));
+#endif
+}
+
+int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
+                       struct auxtrace_mmap_params *mp,
+                       void *userpg, int fd);
+void auxtrace_mmap__munmap(struct auxtrace_mmap *mm);
+void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp,
+                               off_t auxtrace_offset,
+                               unsigned int auxtrace_pages,
+                               bool auxtrace_overwrite);
+void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
+                                  struct perf_evlist *evlist, int idx,
+                                  bool per_cpu);
+
+typedef int (*process_auxtrace_t)(struct perf_tool *tool,
+                                 union perf_event *event, void *data1,
+                                 size_t len1, void *data2, size_t len2);
+
+int auxtrace_mmap__read(struct auxtrace_mmap *mm, struct auxtrace_record *itr,
+                       struct perf_tool *tool, process_auxtrace_t fn);
+
+int auxtrace_mmap__read_snapshot(struct auxtrace_mmap *mm,
+                                struct auxtrace_record *itr,
+                                struct perf_tool *tool, process_auxtrace_t fn,
+                                size_t snapshot_size);
+
+int auxtrace_queues__init(struct auxtrace_queues *queues);
+int auxtrace_queues__add_event(struct auxtrace_queues *queues,
+                              struct perf_session *session,
+                              union perf_event *event, off_t data_offset,
+                              struct auxtrace_buffer **buffer_ptr);
+void auxtrace_queues__free(struct auxtrace_queues *queues);
+int auxtrace_queues__process_index(struct auxtrace_queues *queues,
+                                  struct perf_session *session);
+struct auxtrace_buffer *auxtrace_buffer__next(struct auxtrace_queue *queue,
+                                             struct auxtrace_buffer *buffer);
+void *auxtrace_buffer__get_data(struct auxtrace_buffer *buffer, int fd);
+void auxtrace_buffer__put_data(struct auxtrace_buffer *buffer);
+void auxtrace_buffer__drop_data(struct auxtrace_buffer *buffer);
+void auxtrace_buffer__free(struct auxtrace_buffer *buffer);
+
+int auxtrace_heap__add(struct auxtrace_heap *heap, unsigned int queue_nr,
+                      u64 ordinal);
+void auxtrace_heap__pop(struct auxtrace_heap *heap);
+void auxtrace_heap__free(struct auxtrace_heap *heap);
+
+struct auxtrace_cache_entry {
+       struct hlist_node hash;
+       u32 key;
+};
+
+struct auxtrace_cache *auxtrace_cache__new(unsigned int bits, size_t entry_size,
+                                          unsigned int limit_percent);
+void auxtrace_cache__free(struct auxtrace_cache *auxtrace_cache);
+void *auxtrace_cache__alloc_entry(struct auxtrace_cache *c);
+void auxtrace_cache__free_entry(struct auxtrace_cache *c, void *entry);
+int auxtrace_cache__add(struct auxtrace_cache *c, u32 key,
+                       struct auxtrace_cache_entry *entry);
+void *auxtrace_cache__lookup(struct auxtrace_cache *c, u32 key);
+
+struct auxtrace_record *auxtrace_record__init(struct perf_evlist *evlist,
+                                             int *err);
+
+int auxtrace_parse_snapshot_options(struct auxtrace_record *itr,
+                                   struct record_opts *opts,
+                                   const char *str);
+int auxtrace_record__options(struct auxtrace_record *itr,
+                            struct perf_evlist *evlist,
+                            struct record_opts *opts);
+size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr);
+int auxtrace_record__info_fill(struct auxtrace_record *itr,
+                              struct perf_session *session,
+                              struct auxtrace_info_event *auxtrace_info,
+                              size_t priv_size);
+void auxtrace_record__free(struct auxtrace_record *itr);
+int auxtrace_record__snapshot_start(struct auxtrace_record *itr);
+int auxtrace_record__snapshot_finish(struct auxtrace_record *itr);
+int auxtrace_record__find_snapshot(struct auxtrace_record *itr, int idx,
+                                  struct auxtrace_mmap *mm,
+                                  unsigned char *data, u64 *head, u64 *old);
+u64 auxtrace_record__reference(struct auxtrace_record *itr);
+
+int auxtrace_index__auxtrace_event(struct list_head *head, union perf_event *event,
+                                  off_t file_offset);
+int auxtrace_index__write(int fd, struct list_head *head);
+int auxtrace_index__process(int fd, u64 size, struct perf_session *session,
+                           bool needs_swap);
+void auxtrace_index__free(struct list_head *head);
+
+void auxtrace_synth_error(struct auxtrace_error_event *auxtrace_error, int type,
+                         int code, int cpu, pid_t pid, pid_t tid, u64 ip,
+                         const char *msg);
+
+int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr,
+                                        struct perf_tool *tool,
+                                        struct perf_session *session,
+                                        perf_event__handler_t process);
+int perf_event__process_auxtrace_info(struct perf_tool *tool,
+                                     union perf_event *event,
+                                     struct perf_session *session);
+s64 perf_event__process_auxtrace(struct perf_tool *tool,
+                                union perf_event *event,
+                                struct perf_session *session);
+int perf_event__process_auxtrace_error(struct perf_tool *tool,
+                                      union perf_event *event,
+                                      struct perf_session *session);
+int itrace_parse_synth_opts(const struct option *opt, const char *str,
+                           int unset);
+void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts);
+
+size_t perf_event__fprintf_auxtrace_error(union perf_event *event, FILE *fp);
+void perf_session__auxtrace_error_inc(struct perf_session *session,
+                                     union perf_event *event);
+void events_stats__auxtrace_error_warn(const struct events_stats *stats);
+
+static inline int auxtrace__process_event(struct perf_session *session,
+                                         union perf_event *event,
+                                         struct perf_sample *sample,
+                                         struct perf_tool *tool)
+{
+       if (!session->auxtrace)
+               return 0;
+
+       return session->auxtrace->process_event(session, event, sample, tool);
+}
+
+static inline int auxtrace__flush_events(struct perf_session *session,
+                                        struct perf_tool *tool)
+{
+       if (!session->auxtrace)
+               return 0;
+
+       return session->auxtrace->flush_events(session, tool);
+}
+
+static inline void auxtrace__free_events(struct perf_session *session)
+{
+       if (!session->auxtrace)
+               return;
+
+       return session->auxtrace->free_events(session);
+}
+
+static inline void auxtrace__free(struct perf_session *session)
+{
+       if (!session->auxtrace)
+               return;
+
+       return session->auxtrace->free(session);
+}
+
+#else
+
+static inline struct auxtrace_record *
+auxtrace_record__init(struct perf_evlist *evlist __maybe_unused,
+                     int *err __maybe_unused)
+{
+       *err = 0;
+       return NULL;
+}
+
+static inline
+void auxtrace_record__free(struct auxtrace_record *itr __maybe_unused)
+{
+}
+
+static inline int
+perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr __maybe_unused,
+                                    struct perf_tool *tool __maybe_unused,
+                                    struct perf_session *session __maybe_unused,
+                                    perf_event__handler_t process __maybe_unused)
+{
+       return -EINVAL;
+}
+
+static inline
+int auxtrace_record__options(struct auxtrace_record *itr __maybe_unused,
+                            struct perf_evlist *evlist __maybe_unused,
+                            struct record_opts *opts __maybe_unused)
+{
+       return 0;
+}
+
+#define perf_event__process_auxtrace_info              0
+#define perf_event__process_auxtrace                   0
+#define perf_event__process_auxtrace_error             0
+
+static inline
+void perf_session__auxtrace_error_inc(struct perf_session *session
+                                     __maybe_unused,
+                                     union perf_event *event
+                                     __maybe_unused)
+{
+}
+
+static inline
+void events_stats__auxtrace_error_warn(const struct events_stats *stats
+                                      __maybe_unused)
+{
+}
+
+static inline
+int itrace_parse_synth_opts(const struct option *opt __maybe_unused,
+                           const char *str __maybe_unused,
+                           int unset __maybe_unused)
+{
+       pr_err("AUX area tracing not supported\n");
+       return -EINVAL;
+}
+
+static inline
+int auxtrace_parse_snapshot_options(struct auxtrace_record *itr __maybe_unused,
+                                   struct record_opts *opts __maybe_unused,
+                                   const char *str)
+{
+       if (!str)
+               return 0;
+       pr_err("AUX area tracing not supported\n");
+       return -EINVAL;
+}
+
+static inline
+int auxtrace__process_event(struct perf_session *session __maybe_unused,
+                           union perf_event *event __maybe_unused,
+                           struct perf_sample *sample __maybe_unused,
+                           struct perf_tool *tool __maybe_unused)
+{
+       return 0;
+}
+
+static inline
+int auxtrace__flush_events(struct perf_session *session __maybe_unused,
+                          struct perf_tool *tool __maybe_unused)
+{
+       return 0;
+}
+
+static inline
+void auxtrace__free_events(struct perf_session *session __maybe_unused)
+{
+}
+
+static inline
+void auxtrace_cache__free(struct auxtrace_cache *auxtrace_cache __maybe_unused)
+{
+}
+
+static inline
+void auxtrace__free(struct perf_session *session __maybe_unused)
+{
+}
+
+static inline
+int auxtrace_index__write(int fd __maybe_unused,
+                         struct list_head *head __maybe_unused)
+{
+       return -EINVAL;
+}
+
+static inline
+int auxtrace_index__process(int fd __maybe_unused,
+                           u64 size __maybe_unused,
+                           struct perf_session *session __maybe_unused,
+                           bool needs_swap __maybe_unused)
+{
+       return -EINVAL;
+}
+
+static inline
+void auxtrace_index__free(struct list_head *head __maybe_unused)
+{
+}
+
+int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
+                       struct auxtrace_mmap_params *mp,
+                       void *userpg, int fd);
+void auxtrace_mmap__munmap(struct auxtrace_mmap *mm);
+void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp,
+                               off_t auxtrace_offset,
+                               unsigned int auxtrace_pages,
+                               bool auxtrace_overwrite);
+void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
+                                  struct perf_evlist *evlist, int idx,
+                                  bool per_cpu);
+
+#endif
+
+#endif
index 61867df..1f6fc23 100644 (file)
@@ -43,6 +43,7 @@ int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused,
        if (al.map != NULL)
                al.map->dso->hit = 1;
 
+       thread__put(thread);
        return 0;
 }
 
@@ -59,8 +60,10 @@ static int perf_event__exit_del_thread(struct perf_tool *tool __maybe_unused,
        dump_printf("(%d:%d):(%d:%d)\n", event->fork.pid, event->fork.tid,
                    event->fork.ppid, event->fork.ptid);
 
-       if (thread)
+       if (thread) {
                machine__remove_thread(machine, thread);
+               thread__put(thread);
+       }
 
        return 0;
 }
@@ -159,15 +162,20 @@ static int write_buildid(const char *name, size_t name_len, u8 *build_id,
        return write_padded(fd, name, name_len + 1, len);
 }
 
-static int __dsos__write_buildid_table(struct list_head *head,
-                                      struct machine *machine,
-                                      pid_t pid, u16 misc, int fd)
+static int machine__write_buildid_table(struct machine *machine, int fd)
 {
+       int err = 0;
        char nm[PATH_MAX];
        struct dso *pos;
+       u16 kmisc = PERF_RECORD_MISC_KERNEL,
+           umisc = PERF_RECORD_MISC_USER;
 
-       dsos__for_each_with_build_id(pos, head) {
-               int err;
+       if (!machine__is_host(machine)) {
+               kmisc = PERF_RECORD_MISC_GUEST_KERNEL;
+               umisc = PERF_RECORD_MISC_GUEST_USER;
+       }
+
+       dsos__for_each_with_build_id(pos, &machine->dsos.head) {
                const char *name;
                size_t name_len;
 
@@ -186,32 +194,12 @@ static int __dsos__write_buildid_table(struct list_head *head,
                        name_len = pos->long_name_len + 1;
                }
 
-               err = write_buildid(name, name_len, pos->build_id,
-                                   pid, misc, fd);
+               err = write_buildid(name, name_len, pos->build_id, machine->pid,
+                                   pos->kernel ? kmisc : umisc, fd);
                if (err)
-                       return err;
-       }
-
-       return 0;
-}
-
-static int machine__write_buildid_table(struct machine *machine, int fd)
-{
-       int err;
-       u16 kmisc = PERF_RECORD_MISC_KERNEL,
-           umisc = PERF_RECORD_MISC_USER;
-
-       if (!machine__is_host(machine)) {
-               kmisc = PERF_RECORD_MISC_GUEST_KERNEL;
-               umisc = PERF_RECORD_MISC_GUEST_USER;
+                       break;
        }
 
-       err = __dsos__write_buildid_table(&machine->kernel_dsos.head, machine,
-                                         machine->pid, kmisc, fd);
-       if (err == 0)
-               err = __dsos__write_buildid_table(&machine->user_dsos.head,
-                                                 machine, machine->pid, umisc,
-                                                 fd);
        return err;
 }
 
@@ -244,13 +232,7 @@ static int __dsos__hit_all(struct list_head *head)
 
 static int machine__hit_all_dsos(struct machine *machine)
 {
-       int err;
-
-       err = __dsos__hit_all(&machine->kernel_dsos.head);
-       if (err)
-               return err;
-
-       return __dsos__hit_all(&machine->user_dsos.head);
+       return __dsos__hit_all(&machine->dsos.head);
 }
 
 int dsos__hit_all(struct perf_session *session)
@@ -490,9 +472,7 @@ static int __dsos__cache_build_ids(struct list_head *head,
 
 static int machine__cache_build_ids(struct machine *machine)
 {
-       int ret = __dsos__cache_build_ids(&machine->kernel_dsos.head, machine);
-       ret |= __dsos__cache_build_ids(&machine->user_dsos.head, machine);
-       return ret;
+       return __dsos__cache_build_ids(&machine->dsos.head, machine);
 }
 
 int perf_session__cache_build_ids(struct perf_session *session)
@@ -517,11 +497,7 @@ int perf_session__cache_build_ids(struct perf_session *session)
 
 static bool machine__read_build_ids(struct machine *machine, bool with_hits)
 {
-       bool ret;
-
-       ret  = __dsos__read_build_ids(&machine->kernel_dsos.head, with_hits);
-       ret |= __dsos__read_build_ids(&machine->user_dsos.head, with_hits);
-       return ret;
+       return __dsos__read_build_ids(&machine->dsos.head, with_hits);
 }
 
 bool perf_session__read_build_ids(struct perf_session *session, bool with_hits)
index fbcca21..c861373 100644 (file)
@@ -30,7 +30,6 @@ extern const char *perf_config_dirname(const char *, const char *);
 
 /* pager.c */
 extern void setup_pager(void);
-extern const char *pager_program;
 extern int pager_in_use(void);
 extern int pager_use_color;
 
index 6033a0a..679c2c6 100644 (file)
@@ -72,6 +72,10 @@ extern struct callchain_param callchain_param;
 struct callchain_list {
        u64                     ip;
        struct map_symbol       ms;
+       struct /* for TUI */ {
+               bool            unfolded;
+               bool            has_children;
+       };
        char                   *srcline;
        struct list_head        list;
 };
index 88f7be3..32e12ec 100644 (file)
@@ -115,23 +115,19 @@ static int add_cgroup(struct perf_evlist *evlist, char *str)
                        goto found;
                n++;
        }
-       if (cgrp->refcnt == 0)
+       if (atomic_read(&cgrp->refcnt) == 0)
                free(cgrp);
 
        return -1;
 found:
-       cgrp->refcnt++;
+       atomic_inc(&cgrp->refcnt);
        counter->cgrp = cgrp;
        return 0;
 }
 
 void close_cgroup(struct cgroup_sel *cgrp)
 {
-       if (!cgrp)
-               return;
-
-       /* XXX: not reentrant */
-       if (--cgrp->refcnt == 0) {
+       if (cgrp && atomic_dec_and_test(&cgrp->refcnt)) {
                close(cgrp->fd);
                zfree(&cgrp->name);
                free(cgrp);
index 89acd6d..b4b8cb4 100644 (file)
@@ -1,12 +1,14 @@
 #ifndef __CGROUP_H__
 #define __CGROUP_H__
 
+#include <linux/atomic.h>
+
 struct option;
 
 struct cgroup_sel {
        char *name;
        int fd;
-       int refcnt;
+       atomic_t refcnt;
 };
 
 
index b2bb59d..21b7ff3 100644 (file)
@@ -2,24 +2,27 @@
 #include "util.h"
 #include <stdlib.h>
 #include <stdio.h>
+#include <linux/atomic.h>
 
 struct comm_str {
        char *str;
        struct rb_node rb_node;
-       int ref;
+       atomic_t refcnt;
 };
 
 /* Should perhaps be moved to struct machine */
 static struct rb_root comm_str_root;
 
-static void comm_str__get(struct comm_str *cs)
+static struct comm_str *comm_str__get(struct comm_str *cs)
 {
-       cs->ref++;
+       if (cs)
+               atomic_inc(&cs->refcnt);
+       return cs;
 }
 
 static void comm_str__put(struct comm_str *cs)
 {
-       if (!--cs->ref) {
+       if (cs && atomic_dec_and_test(&cs->refcnt)) {
                rb_erase(&cs->rb_node, &comm_str_root);
                zfree(&cs->str);
                free(cs);
@@ -40,6 +43,8 @@ static struct comm_str *comm_str__alloc(const char *str)
                return NULL;
        }
 
+       atomic_set(&cs->refcnt, 0);
+
        return cs;
 }
 
index dd17c9a..5bfc119 100644 (file)
@@ -14,6 +14,7 @@
 #include <babeltrace/ctf-writer/event.h>
 #include <babeltrace/ctf-writer/event-types.h>
 #include <babeltrace/ctf-writer/event-fields.h>
+#include <babeltrace/ctf-ir/utils.h>
 #include <babeltrace/ctf/events.h>
 #include <traceevent/event-parse.h>
 #include "asm/bug.h"
@@ -38,12 +39,21 @@ struct evsel_priv {
        struct bt_ctf_event_class *event_class;
 };
 
+#define MAX_CPUS       4096
+
+struct ctf_stream {
+       struct bt_ctf_stream *stream;
+       int cpu;
+       u32 count;
+};
+
 struct ctf_writer {
        /* writer primitives */
-       struct bt_ctf_writer            *writer;
-       struct bt_ctf_stream            *stream;
-       struct bt_ctf_stream_class      *stream_class;
-       struct bt_ctf_clock             *clock;
+       struct bt_ctf_writer             *writer;
+       struct ctf_stream               **stream;
+       int                               stream_cnt;
+       struct bt_ctf_stream_class       *stream_class;
+       struct bt_ctf_clock              *clock;
 
        /* data types */
        union {
@@ -65,6 +75,9 @@ struct convert {
 
        u64                     events_size;
        u64                     events_count;
+
+       /* Ordered events configured queue size. */
+       u64                     queue_size;
 };
 
 static int value_set(struct bt_ctf_field_type *type,
@@ -153,6 +166,43 @@ get_tracepoint_field_type(struct ctf_writer *cw, struct format_field *field)
                return cw->data.u32;
 }
 
+static unsigned long long adjust_signedness(unsigned long long value_int, int size)
+{
+       unsigned long long value_mask;
+
+       /*
+        * value_mask = (1 << (size * 8 - 1)) - 1.
+        * Directly set value_mask for code readers.
+        */
+       switch (size) {
+       case 1:
+               value_mask = 0x7fULL;
+               break;
+       case 2:
+               value_mask = 0x7fffULL;
+               break;
+       case 4:
+               value_mask = 0x7fffffffULL;
+               break;
+       case 8:
+               /*
+                * For 64 bit value, return it self. There is no need
+                * to fill high bit.
+                */
+               /* Fall through */
+       default:
+               /* BUG! */
+               return value_int;
+       }
+
+       /* If it is a positive value, don't adjust. */
+       if ((value_int & (~0ULL - value_mask)) == 0)
+               return value_int;
+
+       /* Fill upper part of value_int with 1 to make it a negative long long. */
+       return (value_int & value_mask) | ~value_mask;
+}
+
 static int add_tracepoint_field_value(struct ctf_writer *cw,
                                      struct bt_ctf_event_class *event_class,
                                      struct bt_ctf_event *event,
@@ -164,7 +214,6 @@ static int add_tracepoint_field_value(struct ctf_writer *cw,
        struct bt_ctf_field *field;
        const char *name = fmtf->name;
        void *data = sample->raw_data;
-       unsigned long long value_int;
        unsigned long flags = fmtf->flags;
        unsigned int n_items;
        unsigned int i;
@@ -172,6 +221,7 @@ static int add_tracepoint_field_value(struct ctf_writer *cw,
        unsigned int len;
        int ret;
 
+       name = fmtf->alias;
        offset = fmtf->offset;
        len = fmtf->size;
        if (flags & FIELD_IS_STRING)
@@ -208,11 +258,6 @@ static int add_tracepoint_field_value(struct ctf_writer *cw,
        type = get_tracepoint_field_type(cw, fmtf);
 
        for (i = 0; i < n_items; i++) {
-               if (!(flags & FIELD_IS_STRING))
-                       value_int = pevent_read_number(
-                                       fmtf->event->pevent,
-                                       data + offset + i * len, len);
-
                if (flags & FIELD_IS_ARRAY)
                        field = bt_ctf_field_array_get_field(array_field, i);
                else
@@ -226,12 +271,21 @@ static int add_tracepoint_field_value(struct ctf_writer *cw,
                if (flags & FIELD_IS_STRING)
                        ret = bt_ctf_field_string_set_value(field,
                                        data + offset + i * len);
-               else if (!(flags & FIELD_IS_SIGNED))
-                       ret = bt_ctf_field_unsigned_integer_set_value(
-                                       field, value_int);
-               else
-                       ret = bt_ctf_field_signed_integer_set_value(
-                                       field, value_int);
+               else {
+                       unsigned long long value_int;
+
+                       value_int = pevent_read_number(
+                                       fmtf->event->pevent,
+                                       data + offset + i * len, len);
+
+                       if (!(flags & FIELD_IS_SIGNED))
+                               ret = bt_ctf_field_unsigned_integer_set_value(
+                                               field, value_int);
+                       else
+                               ret = bt_ctf_field_signed_integer_set_value(
+                                               field, adjust_signedness(value_int, len));
+               }
+
                if (ret) {
                        pr_err("failed to set file value %s\n", name);
                        goto err_put_field;
@@ -346,12 +400,6 @@ static int add_generic_values(struct ctf_writer *cw,
                        return -1;
        }
 
-       if (type & PERF_SAMPLE_CPU) {
-               ret = value_set_u32(cw, event, "perf_cpu", sample->cpu);
-               if (ret)
-                       return -1;
-       }
-
        if (type & PERF_SAMPLE_PERIOD) {
                ret = value_set_u64(cw, event, "perf_period", sample->period);
                if (ret)
@@ -381,6 +429,129 @@ static int add_generic_values(struct ctf_writer *cw,
        return 0;
 }
 
+static int ctf_stream__flush(struct ctf_stream *cs)
+{
+       int err = 0;
+
+       if (cs) {
+               err = bt_ctf_stream_flush(cs->stream);
+               if (err)
+                       pr_err("CTF stream %d flush failed\n", cs->cpu);
+
+               pr("Flush stream for cpu %d (%u samples)\n",
+                  cs->cpu, cs->count);
+
+               cs->count = 0;
+       }
+
+       return err;
+}
+
+static struct ctf_stream *ctf_stream__create(struct ctf_writer *cw, int cpu)
+{
+       struct ctf_stream *cs;
+       struct bt_ctf_field *pkt_ctx   = NULL;
+       struct bt_ctf_field *cpu_field = NULL;
+       struct bt_ctf_stream *stream   = NULL;
+       int ret;
+
+       cs = zalloc(sizeof(*cs));
+       if (!cs) {
+               pr_err("Failed to allocate ctf stream\n");
+               return NULL;
+       }
+
+       stream = bt_ctf_writer_create_stream(cw->writer, cw->stream_class);
+       if (!stream) {
+               pr_err("Failed to create CTF stream\n");
+               goto out;
+       }
+
+       pkt_ctx = bt_ctf_stream_get_packet_context(stream);
+       if (!pkt_ctx) {
+               pr_err("Failed to obtain packet context\n");
+               goto out;
+       }
+
+       cpu_field = bt_ctf_field_structure_get_field(pkt_ctx, "cpu_id");
+       bt_ctf_field_put(pkt_ctx);
+       if (!cpu_field) {
+               pr_err("Failed to obtain cpu field\n");
+               goto out;
+       }
+
+       ret = bt_ctf_field_unsigned_integer_set_value(cpu_field, (u32) cpu);
+       if (ret) {
+               pr_err("Failed to update CPU number\n");
+               goto out;
+       }
+
+       bt_ctf_field_put(cpu_field);
+
+       cs->cpu    = cpu;
+       cs->stream = stream;
+       return cs;
+
+out:
+       if (cpu_field)
+               bt_ctf_field_put(cpu_field);
+       if (stream)
+               bt_ctf_stream_put(stream);
+
+       free(cs);
+       return NULL;
+}
+
+static void ctf_stream__delete(struct ctf_stream *cs)
+{
+       if (cs) {
+               bt_ctf_stream_put(cs->stream);
+               free(cs);
+       }
+}
+
+static struct ctf_stream *ctf_stream(struct ctf_writer *cw, int cpu)
+{
+       struct ctf_stream *cs = cw->stream[cpu];
+
+       if (!cs) {
+               cs = ctf_stream__create(cw, cpu);
+               cw->stream[cpu] = cs;
+       }
+
+       return cs;
+}
+
+static int get_sample_cpu(struct ctf_writer *cw, struct perf_sample *sample,
+                         struct perf_evsel *evsel)
+{
+       int cpu = 0;
+
+       if (evsel->attr.sample_type & PERF_SAMPLE_CPU)
+               cpu = sample->cpu;
+
+       if (cpu > cw->stream_cnt) {
+               pr_err("Event was recorded for CPU %d, limit is at %d.\n",
+                       cpu, cw->stream_cnt);
+               cpu = 0;
+       }
+
+       return cpu;
+}
+
+#define STREAM_FLUSH_COUNT 100000
+
+/*
+ * Currently we have no other way to determine the
+ * time for the stream flush other than keep track
+ * of the number of events and check it against
+ * threshold.
+ */
+static bool is_flush_needed(struct ctf_stream *cs)
+{
+       return cs->count >= STREAM_FLUSH_COUNT;
+}
+
 static int process_sample_event(struct perf_tool *tool,
                                union perf_event *_event __maybe_unused,
                                struct perf_sample *sample,
@@ -390,6 +561,7 @@ static int process_sample_event(struct perf_tool *tool,
        struct convert *c = container_of(tool, struct convert, tool);
        struct evsel_priv *priv = evsel->priv;
        struct ctf_writer *cw = &c->writer;
+       struct ctf_stream *cs;
        struct bt_ctf_event_class *event_class;
        struct bt_ctf_event *event;
        int ret;
@@ -424,9 +596,93 @@ static int process_sample_event(struct perf_tool *tool,
                        return -1;
        }
 
-       bt_ctf_stream_append_event(cw->stream, event);
+       cs = ctf_stream(cw, get_sample_cpu(cw, sample, evsel));
+       if (cs) {
+               if (is_flush_needed(cs))
+                       ctf_stream__flush(cs);
+
+               cs->count++;
+               bt_ctf_stream_append_event(cs->stream, event);
+       }
+
        bt_ctf_event_put(event);
-       return 0;
+       return cs ? 0 : -1;
+}
+
+/* If dup < 0, add a prefix. Else, add _dupl_X suffix. */
+static char *change_name(char *name, char *orig_name, int dup)
+{
+       char *new_name = NULL;
+       size_t len;
+
+       if (!name)
+               name = orig_name;
+
+       if (dup >= 10)
+               goto out;
+       /*
+        * Add '_' prefix to potential keywork.  According to
+        * Mathieu Desnoyers (https://lkml.org/lkml/2015/1/23/652),
+        * futher CTF spec updating may require us to use '$'.
+        */
+       if (dup < 0)
+               len = strlen(name) + sizeof("_");
+       else
+               len = strlen(orig_name) + sizeof("_dupl_X");
+
+       new_name = malloc(len);
+       if (!new_name)
+               goto out;
+
+       if (dup < 0)
+               snprintf(new_name, len, "_%s", name);
+       else
+               snprintf(new_name, len, "%s_dupl_%d", orig_name, dup);
+
+out:
+       if (name != orig_name)
+               free(name);
+       return new_name;
+}
+
+static int event_class_add_field(struct bt_ctf_event_class *event_class,
+               struct bt_ctf_field_type *type,
+               struct format_field *field)
+{
+       struct bt_ctf_field_type *t = NULL;
+       char *name;
+       int dup = 1;
+       int ret;
+
+       /* alias was already assigned */
+       if (field->alias != field->name)
+               return bt_ctf_event_class_add_field(event_class, type,
+                               (char *)field->alias);
+
+       name = field->name;
+
+       /* If 'name' is a keywork, add prefix. */
+       if (bt_ctf_validate_identifier(name))
+               name = change_name(name, field->name, -1);
+
+       if (!name) {
+               pr_err("Failed to fix invalid identifier.");
+               return -1;
+       }
+       while ((t = bt_ctf_event_class_get_field_by_name(event_class, name))) {
+               bt_ctf_field_type_put(t);
+               name = change_name(name, field->name, dup++);
+               if (!name) {
+                       pr_err("Failed to create dup name for '%s'\n", field->name);
+                       return -1;
+               }
+       }
+
+       ret = bt_ctf_event_class_add_field(event_class, type, name);
+       if (!ret)
+               field->alias = name;
+
+       return ret;
 }
 
 static int add_tracepoint_fields_types(struct ctf_writer *cw,
@@ -457,14 +713,14 @@ static int add_tracepoint_fields_types(struct ctf_writer *cw,
                if (flags & FIELD_IS_ARRAY)
                        type = bt_ctf_field_type_array_create(type, field->arraylen);
 
-               ret = bt_ctf_event_class_add_field(event_class, type,
-                               field->name);
+               ret = event_class_add_field(event_class, type, field);
 
                if (flags & FIELD_IS_ARRAY)
                        bt_ctf_field_type_put(type);
 
                if (ret) {
-                       pr_err("Failed to add field '%s\n", field->name);
+                       pr_err("Failed to add field '%s': %d\n",
+                                       field->name, ret);
                        return -1;
                }
        }
@@ -508,7 +764,7 @@ static int add_generic_types(struct ctf_writer *cw, struct perf_evsel *evsel,
        do {                                                            \
                pr2("  field '%s'\n", n);                               \
                if (bt_ctf_event_class_add_field(cl, t, n)) {           \
-                       pr_err("Failed to add field '%s;\n", n);        \
+                       pr_err("Failed to add field '%s';\n", n);       \
                        return -1;                                      \
                }                                                       \
        } while (0)
@@ -528,9 +784,6 @@ static int add_generic_types(struct ctf_writer *cw, struct perf_evsel *evsel,
        if (type & PERF_SAMPLE_STREAM_ID)
                ADD_FIELD(event_class, cw->data.u64, "perf_stream_id");
 
-       if (type & PERF_SAMPLE_CPU)
-               ADD_FIELD(event_class, cw->data.u32, "perf_cpu");
-
        if (type & PERF_SAMPLE_PERIOD)
                ADD_FIELD(event_class, cw->data.u64, "perf_period");
 
@@ -604,6 +857,39 @@ static int setup_events(struct ctf_writer *cw, struct perf_session *session)
        return 0;
 }
 
+static int setup_streams(struct ctf_writer *cw, struct perf_session *session)
+{
+       struct ctf_stream **stream;
+       struct perf_header *ph = &session->header;
+       int ncpus;
+
+       /*
+        * Try to get the number of cpus used in the data file,
+        * if not present fallback to the MAX_CPUS.
+        */
+       ncpus = ph->env.nr_cpus_avail ?: MAX_CPUS;
+
+       stream = zalloc(sizeof(*stream) * ncpus);
+       if (!stream) {
+               pr_err("Failed to allocate streams.\n");
+               return -ENOMEM;
+       }
+
+       cw->stream     = stream;
+       cw->stream_cnt = ncpus;
+       return 0;
+}
+
+static void free_streams(struct ctf_writer *cw)
+{
+       int cpu;
+
+       for (cpu = 0; cpu < cw->stream_cnt; cpu++)
+               ctf_stream__delete(cw->stream[cpu]);
+
+       free(cw->stream);
+}
+
 static int ctf_writer__setup_env(struct ctf_writer *cw,
                                 struct perf_session *session)
 {
@@ -713,7 +999,7 @@ static void ctf_writer__cleanup(struct ctf_writer *cw)
        ctf_writer__cleanup_data(cw);
 
        bt_ctf_clock_put(cw->clock);
-       bt_ctf_stream_put(cw->stream);
+       free_streams(cw);
        bt_ctf_stream_class_put(cw->stream_class);
        bt_ctf_writer_put(cw->writer);
 
@@ -725,8 +1011,9 @@ static int ctf_writer__init(struct ctf_writer *cw, const char *path)
 {
        struct bt_ctf_writer            *writer;
        struct bt_ctf_stream_class      *stream_class;
-       struct bt_ctf_stream            *stream;
        struct bt_ctf_clock             *clock;
+       struct bt_ctf_field_type        *pkt_ctx_type;
+       int                             ret;
 
        /* CTF writer */
        writer = bt_ctf_writer_create(path);
@@ -767,14 +1054,15 @@ static int ctf_writer__init(struct ctf_writer *cw, const char *path)
        if (ctf_writer__init_data(cw))
                goto err_cleanup;
 
-       /* CTF stream instance */
-       stream = bt_ctf_writer_create_stream(writer, stream_class);
-       if (!stream) {
-               pr("Failed to create CTF stream.\n");
+       /* Add cpu_id for packet context */
+       pkt_ctx_type = bt_ctf_stream_class_get_packet_context_type(stream_class);
+       if (!pkt_ctx_type)
                goto err_cleanup;
-       }
 
-       cw->stream = stream;
+       ret = bt_ctf_field_type_structure_add_field(pkt_ctx_type, cw->data.u32, "cpu_id");
+       bt_ctf_field_type_put(pkt_ctx_type);
+       if (ret)
+               goto err_cleanup;
 
        /* CTF clock writer setup */
        if (bt_ctf_writer_add_clock(writer, clock)) {
@@ -791,6 +1079,28 @@ err:
        return -1;
 }
 
+static int ctf_writer__flush_streams(struct ctf_writer *cw)
+{
+       int cpu, ret = 0;
+
+       for (cpu = 0; cpu < cw->stream_cnt && !ret; cpu++)
+               ret = ctf_stream__flush(cw->stream[cpu]);
+
+       return ret;
+}
+
+static int convert__config(const char *var, const char *value, void *cb)
+{
+       struct convert *c = cb;
+
+       if (!strcmp(var, "convert.queue-size")) {
+               c->queue_size = perf_config_u64(var, value);
+               return 0;
+       }
+
+       return perf_default_config(var, value, cb);
+}
+
 int bt_convert__perf2ctf(const char *input, const char *path, bool force)
 {
        struct perf_session *session;
@@ -817,6 +1127,8 @@ int bt_convert__perf2ctf(const char *input, const char *path, bool force)
        struct ctf_writer *cw = &c.writer;
        int err = -1;
 
+       perf_config(convert__config, &c);
+
        /* CTF writer */
        if (ctf_writer__init(cw, path))
                return -1;
@@ -826,6 +1138,11 @@ int bt_convert__perf2ctf(const char *input, const char *path, bool force)
        if (!session)
                goto free_writer;
 
+       if (c.queue_size) {
+               ordered_events__set_alloc_size(&session->ordered_events,
+                                              c.queue_size);
+       }
+
        /* CTF writer env/clock setup  */
        if (ctf_writer__setup_env(cw, session))
                goto free_session;
@@ -834,9 +1151,14 @@ int bt_convert__perf2ctf(const char *input, const char *path, bool force)
        if (setup_events(cw, session))
                goto free_session;
 
+       if (setup_streams(cw, session))
+               goto free_session;
+
        err = perf_session__process_events(session);
        if (!err)
-               err = bt_ctf_stream_flush(cw->stream);
+               err = ctf_writer__flush_streams(cw);
+       else
+               pr_err("Error during conversion.\n");
 
        fprintf(stderr,
                "[ perf data convert: Converted '%s' into CTF data '%s' ]\n",
@@ -847,11 +1169,15 @@ int bt_convert__perf2ctf(const char *input, const char *path, bool force)
                (double) c.events_size / 1024.0 / 1024.0,
                c.events_count);
 
-       /* its all good */
-free_session:
        perf_session__delete(session);
+       ctf_writer__cleanup(cw);
+
+       return err;
 
+free_session:
+       perf_session__delete(session);
 free_writer:
        ctf_writer__cleanup(cw);
+       pr_err("Error during conversion setup.\n");
        return err;
 }
index bb39a3f..1c9689e 100644 (file)
@@ -122,6 +122,7 @@ int db_export__machine(struct db_export *dbe, struct machine *machine)
 int db_export__thread(struct db_export *dbe, struct thread *thread,
                      struct machine *machine, struct comm *comm)
 {
+       struct thread *main_thread;
        u64 main_thread_db_id = 0;
        int err;
 
@@ -131,8 +132,6 @@ int db_export__thread(struct db_export *dbe, struct thread *thread,
        thread->db_id = ++dbe->thread_last_db_id;
 
        if (thread->pid_ != -1) {
-               struct thread *main_thread;
-
                if (thread->pid_ == thread->tid) {
                        main_thread = thread;
                } else {
@@ -144,14 +143,16 @@ int db_export__thread(struct db_export *dbe, struct thread *thread,
                        err = db_export__thread(dbe, main_thread, machine,
                                                comm);
                        if (err)
-                               return err;
+                               goto out_put;
                        if (comm) {
                                err = db_export__comm_thread(dbe, comm, thread);
                                if (err)
-                                       return err;
+                                       goto out_put;
                        }
                }
                main_thread_db_id = main_thread->db_id;
+               if (main_thread != thread)
+                       thread__put(main_thread);
        }
 
        if (dbe->export_thread)
@@ -159,6 +160,10 @@ int db_export__thread(struct db_export *dbe, struct thread *thread,
                                          machine);
 
        return 0;
+
+out_put:
+       thread__put(main_thread);
+       return err;
 }
 
 int db_export__comm(struct db_export *dbe, struct comm *comm,
@@ -229,7 +234,7 @@ int db_export__symbol(struct db_export *dbe, struct symbol *sym,
 static struct thread *get_main_thread(struct machine *machine, struct thread *thread)
 {
        if (thread->pid_ == thread->tid)
-               return thread;
+               return thread__get(thread);
 
        if (thread->pid_ == -1)
                return NULL;
@@ -309,12 +314,12 @@ int db_export__sample(struct db_export *dbe, union perf_event *event,
 
        err = db_export__thread(dbe, thread, al->machine, comm);
        if (err)
-               return err;
+               goto out_put;
 
        if (comm) {
                err = db_export__comm(dbe, comm, main_thread);
                if (err)
-                       return err;
+                       goto out_put;
                es.comm_db_id = comm->db_id;
        }
 
@@ -322,7 +327,7 @@ int db_export__sample(struct db_export *dbe, union perf_event *event,
 
        err = db_ids_from_al(dbe, al, &es.dso_db_id, &es.sym_db_id, &es.offset);
        if (err)
-               return err;
+               goto out_put;
 
        if ((evsel->attr.sample_type & PERF_SAMPLE_ADDR) &&
            sample_addr_correlates_sym(&evsel->attr)) {
@@ -332,20 +337,22 @@ int db_export__sample(struct db_export *dbe, union perf_event *event,
                err = db_ids_from_al(dbe, &addr_al, &es.addr_dso_db_id,
                                     &es.addr_sym_db_id, &es.addr_offset);
                if (err)
-                       return err;
+                       goto out_put;
                if (dbe->crp) {
                        err = thread_stack__process(thread, comm, sample, al,
                                                    &addr_al, es.db_id,
                                                    dbe->crp);
                        if (err)
-                               return err;
+                               goto out_put;
                }
        }
 
        if (dbe->export_sample)
-               return dbe->export_sample(dbe, &es);
+               err = dbe->export_sample(dbe, &es);
 
-       return 0;
+out_put:
+       thread__put(main_thread);
+       return err;
 }
 
 static struct {
index fc0ddd5..7c0c083 100644 (file)
@@ -4,6 +4,7 @@
 #include "symbol.h"
 #include "dso.h"
 #include "machine.h"
+#include "auxtrace.h"
 #include "util.h"
 #include "debug.h"
 
@@ -165,12 +166,28 @@ bool is_supported_compression(const char *ext)
        return false;
 }
 
-bool is_kernel_module(const char *pathname)
+bool is_kernel_module(const char *pathname, int cpumode)
 {
        struct kmod_path m;
-
-       if (kmod_path__parse(&m, pathname))
-               return NULL;
+       int mode = cpumode & PERF_RECORD_MISC_CPUMODE_MASK;
+
+       WARN_ONCE(mode != cpumode,
+                 "Internal error: passing unmasked cpumode (%x) to is_kernel_module",
+                 cpumode);
+
+       switch (mode) {
+       case PERF_RECORD_MISC_USER:
+       case PERF_RECORD_MISC_HYPERVISOR:
+       case PERF_RECORD_MISC_GUEST_USER:
+               return false;
+       /* Treat PERF_RECORD_MISC_CPUMODE_UNKNOWN as kernel */
+       default:
+               if (kmod_path__parse(&m, pathname)) {
+                       pr_err("Failed to check whether %s is a kernel module or not. Assume it is.",
+                                       pathname);
+                       return true;
+               }
+       }
 
        return m.kmod;
 }
@@ -214,12 +231,33 @@ int __kmod_path__parse(struct kmod_path *m, const char *path,
 {
        const char *name = strrchr(path, '/');
        const char *ext  = strrchr(path, '.');
+       bool is_simple_name = false;
 
        memset(m, 0x0, sizeof(*m));
        name = name ? name + 1 : path;
 
+       /*
+        * '.' is also a valid character for module name. For example:
+        * [aaa.bbb] is a valid module name. '[' should have higher
+        * priority than '.ko' suffix.
+        *
+        * The kernel names are from machine__mmap_name. Such
+        * name should belong to kernel itself, not kernel module.
+        */
+       if (name[0] == '[') {
+               is_simple_name = true;
+               if ((strncmp(name, "[kernel.kallsyms]", 17) == 0) ||
+                   (strncmp(name, "[guest.kernel.kallsyms", 22) == 0) ||
+                   (strncmp(name, "[vdso]", 6) == 0) ||
+                   (strncmp(name, "[vsyscall]", 10) == 0)) {
+                       m->kmod = false;
+
+               } else
+                       m->kmod = true;
+       }
+
        /* No extension, just return name. */
-       if (ext == NULL) {
+       if ((ext == NULL) || is_simple_name) {
                if (alloc_name) {
                        m->name = strdup(name);
                        return m->name ? 0 : -ENOMEM;
@@ -264,6 +302,7 @@ int __kmod_path__parse(struct kmod_path *m, const char *path,
  */
 static LIST_HEAD(dso__data_open);
 static long dso__data_open_cnt;
+static pthread_mutex_t dso__data_open_lock = PTHREAD_MUTEX_INITIALIZER;
 
 static void dso__list_add(struct dso *dso)
 {
@@ -433,18 +472,12 @@ static void check_data_close(void)
  */
 void dso__data_close(struct dso *dso)
 {
+       pthread_mutex_lock(&dso__data_open_lock);
        close_dso(dso);
+       pthread_mutex_unlock(&dso__data_open_lock);
 }
 
-/**
- * dso__data_fd - Get dso's data file descriptor
- * @dso: dso object
- * @machine: machine object
- *
- * External interface to find dso's file, open it and
- * returns file descriptor.
- */
-int dso__data_fd(struct dso *dso, struct machine *machine)
+static void try_to_open_dso(struct dso *dso, struct machine *machine)
 {
        enum dso_binary_type binary_type_data[] = {
                DSO_BINARY_TYPE__BUILD_ID_CACHE,
@@ -453,11 +486,8 @@ int dso__data_fd(struct dso *dso, struct machine *machine)
        };
        int i = 0;
 
-       if (dso->data.status == DSO_DATA_STATUS_ERROR)
-               return -1;
-
        if (dso->data.fd >= 0)
-               goto out;
+               return;
 
        if (dso->binary_type != DSO_BINARY_TYPE__NOT_FOUND) {
                dso->data.fd = open_dso(dso, machine);
@@ -477,10 +507,38 @@ out:
                dso->data.status = DSO_DATA_STATUS_OK;
        else
                dso->data.status = DSO_DATA_STATUS_ERROR;
+}
+
+/**
+ * dso__data_get_fd - Get dso's data file descriptor
+ * @dso: dso object
+ * @machine: machine object
+ *
+ * External interface to find dso's file, open it and
+ * returns file descriptor.  It should be paired with
+ * dso__data_put_fd() if it returns non-negative value.
+ */
+int dso__data_get_fd(struct dso *dso, struct machine *machine)
+{
+       if (dso->data.status == DSO_DATA_STATUS_ERROR)
+               return -1;
+
+       if (pthread_mutex_lock(&dso__data_open_lock) < 0)
+               return -1;
+
+       try_to_open_dso(dso, machine);
+
+       if (dso->data.fd < 0)
+               pthread_mutex_unlock(&dso__data_open_lock);
 
        return dso->data.fd;
 }
 
+void dso__data_put_fd(struct dso *dso __maybe_unused)
+{
+       pthread_mutex_unlock(&dso__data_open_lock);
+}
+
 bool dso__data_status_seen(struct dso *dso, enum dso_data_status_seen by)
 {
        u32 flag = 1 << by;
@@ -494,10 +552,12 @@ bool dso__data_status_seen(struct dso *dso, enum dso_data_status_seen by)
 }
 
 static void
-dso_cache__free(struct rb_root *root)
+dso_cache__free(struct dso *dso)
 {
+       struct rb_root *root = &dso->data.cache;
        struct rb_node *next = rb_first(root);
 
+       pthread_mutex_lock(&dso->lock);
        while (next) {
                struct dso_cache *cache;
 
@@ -506,10 +566,12 @@ dso_cache__free(struct rb_root *root)
                rb_erase(&cache->rb_node, root);
                free(cache);
        }
+       pthread_mutex_unlock(&dso->lock);
 }
 
-static struct dso_cache *dso_cache__find(const struct rb_root *root, u64 offset)
+static struct dso_cache *dso_cache__find(struct dso *dso, u64 offset)
 {
+       const struct rb_root *root = &dso->data.cache;
        struct rb_node * const *p = &root->rb_node;
        const struct rb_node *parent = NULL;
        struct dso_cache *cache;
@@ -528,17 +590,20 @@ static struct dso_cache *dso_cache__find(const struct rb_root *root, u64 offset)
                else
                        return cache;
        }
+
        return NULL;
 }
 
-static void
-dso_cache__insert(struct rb_root *root, struct dso_cache *new)
+static struct dso_cache *
+dso_cache__insert(struct dso *dso, struct dso_cache *new)
 {
+       struct rb_root *root = &dso->data.cache;
        struct rb_node **p = &root->rb_node;
        struct rb_node *parent = NULL;
        struct dso_cache *cache;
        u64 offset = new->offset;
 
+       pthread_mutex_lock(&dso->lock);
        while (*p != NULL) {
                u64 end;
 
@@ -550,10 +615,17 @@ dso_cache__insert(struct rb_root *root, struct dso_cache *new)
                        p = &(*p)->rb_left;
                else if (offset >= end)
                        p = &(*p)->rb_right;
+               else
+                       goto out;
        }
 
        rb_link_node(&new->rb_node, parent, p);
        rb_insert_color(&new->rb_node, root);
+
+       cache = NULL;
+out:
+       pthread_mutex_unlock(&dso->lock);
+       return cache;
 }
 
 static ssize_t
@@ -568,19 +640,33 @@ dso_cache__memcpy(struct dso_cache *cache, u64 offset,
 }
 
 static ssize_t
-dso_cache__read(struct dso *dso, u64 offset, u8 *data, ssize_t size)
+dso_cache__read(struct dso *dso, struct machine *machine,
+               u64 offset, u8 *data, ssize_t size)
 {
        struct dso_cache *cache;
+       struct dso_cache *old;
        ssize_t ret;
 
        do {
                u64 cache_offset;
 
-               ret = -ENOMEM;
-
                cache = zalloc(sizeof(*cache) + DSO__DATA_CACHE_SIZE);
                if (!cache)
+                       return -ENOMEM;
+
+               pthread_mutex_lock(&dso__data_open_lock);
+
+               /*
+                * dso->data.fd might be closed if other thread opened another
+                * file (dso) due to open file limit (RLIMIT_NOFILE).
+                */
+               try_to_open_dso(dso, machine);
+
+               if (dso->data.fd < 0) {
+                       ret = -errno;
+                       dso->data.status = DSO_DATA_STATUS_ERROR;
                        break;
+               }
 
                cache_offset = offset & DSO__DATA_CACHE_MASK;
 
@@ -590,11 +676,20 @@ dso_cache__read(struct dso *dso, u64 offset, u8 *data, ssize_t size)
 
                cache->offset = cache_offset;
                cache->size   = ret;
-               dso_cache__insert(&dso->data.cache, cache);
+       } while (0);
 
-               ret = dso_cache__memcpy(cache, offset, data, size);
+       pthread_mutex_unlock(&dso__data_open_lock);
 
-       } while (0);
+       if (ret > 0) {
+               old = dso_cache__insert(dso, cache);
+               if (old) {
+                       /* we lose the race */
+                       free(cache);
+                       cache = old;
+               }
+
+               ret = dso_cache__memcpy(cache, offset, data, size);
+       }
 
        if (ret <= 0)
                free(cache);
@@ -602,16 +697,16 @@ dso_cache__read(struct dso *dso, u64 offset, u8 *data, ssize_t size)
        return ret;
 }
 
-static ssize_t dso_cache_read(struct dso *dso, u64 offset,
-                             u8 *data, ssize_t size)
+static ssize_t dso_cache_read(struct dso *dso, struct machine *machine,
+                             u64 offset, u8 *data, ssize_t size)
 {
        struct dso_cache *cache;
 
-       cache = dso_cache__find(&dso->data.cache, offset);
+       cache = dso_cache__find(dso, offset);
        if (cache)
                return dso_cache__memcpy(cache, offset, data, size);
        else
-               return dso_cache__read(dso, offset, data, size);
+               return dso_cache__read(dso, machine, offset, data, size);
 }
 
 /*
@@ -619,7 +714,8 @@ static ssize_t dso_cache_read(struct dso *dso, u64 offset,
  * in the rb_tree. Any read to already cached data is served
  * by cached data.
  */
-static ssize_t cached_read(struct dso *dso, u64 offset, u8 *data, ssize_t size)
+static ssize_t cached_read(struct dso *dso, struct machine *machine,
+                          u64 offset, u8 *data, ssize_t size)
 {
        ssize_t r = 0;
        u8 *p = data;
@@ -627,7 +723,7 @@ static ssize_t cached_read(struct dso *dso, u64 offset, u8 *data, ssize_t size)
        do {
                ssize_t ret;
 
-               ret = dso_cache_read(dso, offset, p, size);
+               ret = dso_cache_read(dso, machine, offset, p, size);
                if (ret < 0)
                        return ret;
 
@@ -647,21 +743,44 @@ static ssize_t cached_read(struct dso *dso, u64 offset, u8 *data, ssize_t size)
        return r;
 }
 
-static int data_file_size(struct dso *dso)
+static int data_file_size(struct dso *dso, struct machine *machine)
 {
+       int ret = 0;
        struct stat st;
        char sbuf[STRERR_BUFSIZE];
 
-       if (!dso->data.file_size) {
-               if (fstat(dso->data.fd, &st)) {
-                       pr_err("dso mmap failed, fstat: %s\n",
-                               strerror_r(errno, sbuf, sizeof(sbuf)));
-                       return -1;
-               }
-               dso->data.file_size = st.st_size;
+       if (dso->data.file_size)
+               return 0;
+
+       if (dso->data.status == DSO_DATA_STATUS_ERROR)
+               return -1;
+
+       pthread_mutex_lock(&dso__data_open_lock);
+
+       /*
+        * dso->data.fd might be closed if other thread opened another
+        * file (dso) due to open file limit (RLIMIT_NOFILE).
+        */
+       try_to_open_dso(dso, machine);
+
+       if (dso->data.fd < 0) {
+               ret = -errno;
+               dso->data.status = DSO_DATA_STATUS_ERROR;
+               goto out;
        }
 
-       return 0;
+       if (fstat(dso->data.fd, &st) < 0) {
+               ret = -errno;
+               pr_err("dso cache fstat failed: %s\n",
+                      strerror_r(errno, sbuf, sizeof(sbuf)));
+               dso->data.status = DSO_DATA_STATUS_ERROR;
+               goto out;
+       }
+       dso->data.file_size = st.st_size;
+
+out:
+       pthread_mutex_unlock(&dso__data_open_lock);
+       return ret;
 }
 
 /**
@@ -673,23 +792,17 @@ static int data_file_size(struct dso *dso)
  */
 off_t dso__data_size(struct dso *dso, struct machine *machine)
 {
-       int fd;
-
-       fd = dso__data_fd(dso, machine);
-       if (fd < 0)
-               return fd;
-
-       if (data_file_size(dso))
+       if (data_file_size(dso, machine))
                return -1;
 
        /* For now just estimate dso data size is close to file size */
        return dso->data.file_size;
 }
 
-static ssize_t data_read_offset(struct dso *dso, u64 offset,
-                               u8 *data, ssize_t size)
+static ssize_t data_read_offset(struct dso *dso, struct machine *machine,
+                               u64 offset, u8 *data, ssize_t size)
 {
-       if (data_file_size(dso))
+       if (data_file_size(dso, machine))
                return -1;
 
        /* Check the offset sanity. */
@@ -699,7 +812,7 @@ static ssize_t data_read_offset(struct dso *dso, u64 offset,
        if (offset + size < offset)
                return -1;
 
-       return cached_read(dso, offset, data, size);
+       return cached_read(dso, machine, offset, data, size);
 }
 
 /**
@@ -716,10 +829,10 @@ static ssize_t data_read_offset(struct dso *dso, u64 offset,
 ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine,
                              u64 offset, u8 *data, ssize_t size)
 {
-       if (dso__data_fd(dso, machine) < 0)
+       if (dso->data.status == DSO_DATA_STATUS_ERROR)
                return -1;
 
-       return data_read_offset(dso, offset, data, size);
+       return data_read_offset(dso, machine, offset, data, size);
 }
 
 /**
@@ -751,13 +864,13 @@ struct map *dso__new_map(const char *name)
        return map;
 }
 
-struct dso *dso__kernel_findnew(struct machine *machine, const char *name,
-                   const char *short_name, int dso_type)
+struct dso *machine__findnew_kernel(struct machine *machine, const char *name,
+                                   const char *short_name, int dso_type)
 {
        /*
         * The kernel dso could be created by build_id processing.
         */
-       struct dso *dso = __dsos__findnew(&machine->kernel_dsos, name);
+       struct dso *dso = machine__findnew_dso(machine, name);
 
        /*
         * We need to run this in all cases, since during the build_id
@@ -776,8 +889,8 @@ struct dso *dso__kernel_findnew(struct machine *machine, const char *name,
  * Either one of the dso or name parameter must be non-NULL or the
  * function will not work.
  */
-static struct dso *dso__findlink_by_longname(struct rb_root *root,
-                                            struct dso *dso, const char *name)
+static struct dso *__dso__findlink_by_longname(struct rb_root *root,
+                                              struct dso *dso, const char *name)
 {
        struct rb_node **p = &root->rb_node;
        struct rb_node  *parent = NULL;
@@ -824,10 +937,10 @@ static struct dso *dso__findlink_by_longname(struct rb_root *root,
        return NULL;
 }
 
-static inline struct dso *
-dso__find_by_longname(const struct rb_root *root, const char *name)
+static inline struct dso *__dso__find_by_longname(struct rb_root *root,
+                                                 const char *name)
 {
-       return dso__findlink_by_longname((struct rb_root *)root, NULL, name);
+       return __dso__findlink_by_longname(root, NULL, name);
 }
 
 void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated)
@@ -935,6 +1048,8 @@ struct dso *dso__new(const char *name)
                RB_CLEAR_NODE(&dso->rb_node);
                INIT_LIST_HEAD(&dso->node);
                INIT_LIST_HEAD(&dso->data.open_entry);
+               pthread_mutex_init(&dso->lock, NULL);
+               atomic_set(&dso->refcnt, 1);
        }
 
        return dso;
@@ -961,12 +1076,27 @@ void dso__delete(struct dso *dso)
        }
 
        dso__data_close(dso);
-       dso_cache__free(&dso->data.cache);
+       auxtrace_cache__free(dso->auxtrace_cache);
+       dso_cache__free(dso);
        dso__free_a2l(dso);
        zfree(&dso->symsrc_filename);
+       pthread_mutex_destroy(&dso->lock);
        free(dso);
 }
 
+struct dso *dso__get(struct dso *dso)
+{
+       if (dso)
+               atomic_inc(&dso->refcnt);
+       return dso;
+}
+
+void dso__put(struct dso *dso)
+{
+       if (dso && atomic_dec_and_test(&dso->refcnt))
+               dso__delete(dso);
+}
+
 void dso__set_build_id(struct dso *dso, void *build_id)
 {
        memcpy(dso->build_id, build_id, sizeof(dso->build_id));
@@ -1033,14 +1163,41 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits)
        return have_build_id;
 }
 
-void dsos__add(struct dsos *dsos, struct dso *dso)
+void __dsos__add(struct dsos *dsos, struct dso *dso)
 {
        list_add_tail(&dso->node, &dsos->head);
-       dso__findlink_by_longname(&dsos->root, dso, NULL);
+       __dso__findlink_by_longname(&dsos->root, dso, NULL);
+       /*
+        * It is now in the linked list, grab a reference, then garbage collect
+        * this when needing memory, by looking at LRU dso instances in the
+        * list with atomic_read(&dso->refcnt) == 1, i.e. no references
+        * anywhere besides the one for the list, do, under a lock for the
+        * list: remove it from the list, then a dso__put(), that probably will
+        * be the last and will then call dso__delete(), end of life.
+        *
+        * That, or at the end of the 'struct machine' lifetime, when all
+        * 'struct dso' instances will be removed from the list, in
+        * dsos__exit(), if they have no other reference from some other data
+        * structure.
+        *
+        * E.g.: after processing a 'perf.data' file and storing references
+        * to objects instantiated while processing events, we will have
+        * references to the 'thread', 'map', 'dso' structs all from 'struct
+        * hist_entry' instances, but we may not need anything not referenced,
+        * so we might as well call machines__exit()/machines__delete() and
+        * garbage collect it.
+        */
+       dso__get(dso);
+}
+
+void dsos__add(struct dsos *dsos, struct dso *dso)
+{
+       pthread_rwlock_wrlock(&dsos->lock);
+       __dsos__add(dsos, dso);
+       pthread_rwlock_unlock(&dsos->lock);
 }
 
-struct dso *dsos__find(const struct dsos *dsos, const char *name,
-                      bool cmp_short)
+struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short)
 {
        struct dso *pos;
 
@@ -1050,15 +1207,24 @@ struct dso *dsos__find(const struct dsos *dsos, const char *name,
                                return pos;
                return NULL;
        }
-       return dso__find_by_longname(&dsos->root, name);
+       return __dso__find_by_longname(&dsos->root, name);
+}
+
+struct dso *dsos__find(struct dsos *dsos, const char *name, bool cmp_short)
+{
+       struct dso *dso;
+       pthread_rwlock_rdlock(&dsos->lock);
+       dso = __dsos__find(dsos, name, cmp_short);
+       pthread_rwlock_unlock(&dsos->lock);
+       return dso;
 }
 
-struct dso *dsos__addnew(struct dsos *dsos, const char *name)
+struct dso *__dsos__addnew(struct dsos *dsos, const char *name)
 {
        struct dso *dso = dso__new(name);
 
        if (dso != NULL) {
-               dsos__add(dsos, dso);
+               __dsos__add(dsos, dso);
                dso__set_basename(dso);
        }
        return dso;
@@ -1066,9 +1232,18 @@ struct dso *dsos__addnew(struct dsos *dsos, const char *name)
 
 struct dso *__dsos__findnew(struct dsos *dsos, const char *name)
 {
-       struct dso *dso = dsos__find(dsos, name, false);
+       struct dso *dso = __dsos__find(dsos, name, false);
 
-       return dso ? dso : dsos__addnew(dsos, name);
+       return dso ? dso : __dsos__addnew(dsos, name);
+}
+
+struct dso *dsos__findnew(struct dsos *dsos, const char *name)
+{
+       struct dso *dso;
+       pthread_rwlock_wrlock(&dsos->lock);
+       dso = dso__get(__dsos__findnew(dsos, name));
+       pthread_rwlock_unlock(&dsos->lock);
+       return dso;
 }
 
 size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp,
@@ -1130,12 +1305,15 @@ size_t dso__fprintf(struct dso *dso, enum map_type type, FILE *fp)
 enum dso_type dso__type(struct dso *dso, struct machine *machine)
 {
        int fd;
+       enum dso_type type = DSO__TYPE_UNKNOWN;
 
-       fd = dso__data_fd(dso, machine);
-       if (fd < 0)
-               return DSO__TYPE_UNKNOWN;
+       fd = dso__data_get_fd(dso, machine);
+       if (fd >= 0) {
+               type = dso__type_fd(fd);
+               dso__data_put_fd(dso);
+       }
 
-       return dso__type_fd(fd);
+       return type;
 }
 
 int dso__strerror_load(struct dso *dso, char *buf, size_t buflen)
index e0901b4..2fe98bb 100644 (file)
@@ -1,9 +1,11 @@
 #ifndef __PERF_DSO
 #define __PERF_DSO
 
+#include <linux/atomic.h>
 #include <linux/types.h>
 #include <linux/rbtree.h>
 #include <stdbool.h>
+#include <pthread.h>
 #include <linux/types.h>
 #include <linux/bitops.h>
 #include "map.h"
@@ -124,9 +126,13 @@ struct dso_cache {
 struct dsos {
        struct list_head head;
        struct rb_root   root;  /* rbtree root sorted by long name */
+       pthread_rwlock_t lock;
 };
 
+struct auxtrace_cache;
+
 struct dso {
+       pthread_mutex_t  lock;
        struct list_head node;
        struct rb_node   rb_node;       /* rbtree node sorted by long name */
        struct rb_root   symbols[MAP__NR_TYPES];
@@ -156,6 +162,7 @@ struct dso {
        u16              long_name_len;
        u16              short_name_len;
        void            *dwfl;                  /* DWARF debug info */
+       struct auxtrace_cache *auxtrace_cache;
 
        /* dso data file */
        struct {
@@ -173,7 +180,7 @@ struct dso {
                void     *priv;
                u64      db_id;
        };
-
+       atomic_t         refcnt;
        char             name[0];
 };
 
@@ -200,6 +207,17 @@ void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated);
 
 int dso__name_len(const struct dso *dso);
 
+struct dso *dso__get(struct dso *dso);
+void dso__put(struct dso *dso);
+
+static inline void __dso__zput(struct dso **dso)
+{
+       dso__put(*dso);
+       *dso = NULL;
+}
+
+#define dso__zput(dso) __dso__zput(&dso)
+
 bool dso__loaded(const struct dso *dso, enum map_type type);
 
 bool dso__sorted_by_name(const struct dso *dso, enum map_type type);
@@ -216,7 +234,7 @@ char dso__symtab_origin(const struct dso *dso);
 int dso__read_binary_type_filename(const struct dso *dso, enum dso_binary_type type,
                                   char *root_dir, char *filename, size_t size);
 bool is_supported_compression(const char *ext);
-bool is_kernel_module(const char *pathname);
+bool is_kernel_module(const char *pathname, int cpumode);
 bool decompress_to_file(const char *ext, const char *filename, int output_fd);
 bool dso__needs_decompress(struct dso *dso);
 
@@ -236,7 +254,8 @@ int __kmod_path__parse(struct kmod_path *m, const char *path,
 
 /*
  * The dso__data_* external interface provides following functions:
- *   dso__data_fd
+ *   dso__data_get_fd
+ *   dso__data_put_fd
  *   dso__data_close
  *   dso__data_size
  *   dso__data_read_offset
@@ -253,8 +272,11 @@ int __kmod_path__parse(struct kmod_path *m, const char *path,
  * The current usage of the dso__data_* interface is as follows:
  *
  * Get DSO's fd:
- *   int fd = dso__data_fd(dso, machine);
- *   USE 'fd' SOMEHOW
+ *   int fd = dso__data_get_fd(dso, machine);
+ *   if (fd >= 0) {
+ *       USE 'fd' SOMEHOW
+ *       dso__data_put_fd(dso);
+ *   }
  *
  * Read DSO's data:
  *   n = dso__data_read_offset(dso_0, &machine, 0, buf, BUFSIZE);
@@ -273,7 +295,8 @@ int __kmod_path__parse(struct kmod_path *m, const char *path,
  *
  * TODO
 */
-int dso__data_fd(struct dso *dso, struct machine *machine);
+int dso__data_get_fd(struct dso *dso, struct machine *machine);
+void dso__data_put_fd(struct dso *dso __maybe_unused);
 void dso__data_close(struct dso *dso);
 
 off_t dso__data_size(struct dso *dso, struct machine *machine);
@@ -285,14 +308,16 @@ ssize_t dso__data_read_addr(struct dso *dso, struct map *map,
 bool dso__data_status_seen(struct dso *dso, enum dso_data_status_seen by);
 
 struct map *dso__new_map(const char *name);
-struct dso *dso__kernel_findnew(struct machine *machine, const char *name,
-                               const char *short_name, int dso_type);
+struct dso *machine__findnew_kernel(struct machine *machine, const char *name,
+                                   const char *short_name, int dso_type);
 
+void __dsos__add(struct dsos *dsos, struct dso *dso);
 void dsos__add(struct dsos *dsos, struct dso *dso);
-struct dso *dsos__addnew(struct dsos *dsos, const char *name);
-struct dso *dsos__find(const struct dsos *dsos, const char *name,
-                      bool cmp_short);
+struct dso *__dsos__addnew(struct dsos *dsos, const char *name);
+struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short);
+struct dso *dsos__find(struct dsos *dsos, const char *name, bool cmp_short);
 struct dso *__dsos__findnew(struct dsos *dsos, const char *name);
+struct dso *dsos__findnew(struct dsos *dsos, const char *name);
 bool __dsos__read_build_ids(struct list_head *head, bool with_hits);
 
 size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp,
index c34e024..57f3ef4 100644 (file)
@@ -139,11 +139,27 @@ int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr,
 bool die_compare_name(Dwarf_Die *dw_die, const char *tname)
 {
        const char *name;
+
        name = dwarf_diename(dw_die);
        return name ? (strcmp(tname, name) == 0) : false;
 }
 
 /**
+ * die_match_name - Match diename and glob
+ * @dw_die: a DIE
+ * @glob: a string of target glob pattern
+ *
+ * Glob matching the name of @dw_die and @glob. Return false if matching fail.
+ */
+bool die_match_name(Dwarf_Die *dw_die, const char *glob)
+{
+       const char *name;
+
+       name = dwarf_diename(dw_die);
+       return name ? strglobmatch(name, glob) : false;
+}
+
+/**
  * die_get_call_lineno - Get callsite line number of inline-function instance
  * @in_die: a DIE of an inlined function instance
  *
@@ -417,6 +433,43 @@ struct __addr_die_search_param {
        Dwarf_Die       *die_mem;
 };
 
+static int __die_search_func_tail_cb(Dwarf_Die *fn_die, void *data)
+{
+       struct __addr_die_search_param *ad = data;
+       Dwarf_Addr addr = 0;
+
+       if (dwarf_tag(fn_die) == DW_TAG_subprogram &&
+           !dwarf_highpc(fn_die, &addr) &&
+           addr == ad->addr) {
+               memcpy(ad->die_mem, fn_die, sizeof(Dwarf_Die));
+               return DWARF_CB_ABORT;
+       }
+       return DWARF_CB_OK;
+}
+
+/**
+ * die_find_tailfunc - Search for a non-inlined function with tail call at
+ * given address
+ * @cu_die: a CU DIE which including @addr
+ * @addr: target address
+ * @die_mem: a buffer for result DIE
+ *
+ * Search for a non-inlined function DIE with tail call at @addr. Stores the
+ * DIE to @die_mem and returns it if found. Returns NULL if failed.
+ */
+Dwarf_Die *die_find_tailfunc(Dwarf_Die *cu_die, Dwarf_Addr addr,
+                                   Dwarf_Die *die_mem)
+{
+       struct __addr_die_search_param ad;
+       ad.addr = addr;
+       ad.die_mem = die_mem;
+       /* dwarf_getscopes can't find subprogram. */
+       if (!dwarf_getfuncs(cu_die, __die_search_func_tail_cb, &ad, 0))
+               return NULL;
+       else
+               return die_mem;
+}
+
 /* die_find callback for non-inlined function search */
 static int __die_search_func_cb(Dwarf_Die *fn_die, void *data)
 {
@@ -832,19 +885,17 @@ Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name,
 /**
  * die_get_typename - Get the name of given variable DIE
  * @vr_die: a variable DIE
- * @buf: a buffer for result type name
- * @len: a max-length of @buf
+ * @buf: a strbuf for result type name
  *
- * Get the name of @vr_die and stores it to @buf. Return the actual length
- * of type name if succeeded. Return -E2BIG if @len is not enough long, and
- * Return -ENOENT if failed to find type name.
+ * Get the name of @vr_die and stores it to @buf. Return 0 if succeeded.
+ * and Return -ENOENT if failed to find type name.
  * Note that the result will stores typedef name if possible, and stores
  * "*(function_type)" if the type is a function pointer.
  */
-int die_get_typename(Dwarf_Die *vr_die, char *buf, int len)
+int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf)
 {
        Dwarf_Die type;
-       int tag, ret, ret2;
+       int tag, ret;
        const char *tmp = "";
 
        if (__die_get_real_type(vr_die, &type) == NULL)
@@ -855,8 +906,8 @@ int die_get_typename(Dwarf_Die *vr_die, char *buf, int len)
                tmp = "*";
        else if (tag == DW_TAG_subroutine_type) {
                /* Function pointer */
-               ret = snprintf(buf, len, "(function_type)");
-               return (ret >= len) ? -E2BIG : ret;
+               strbuf_addf(buf, "(function_type)");
+               return 0;
        } else {
                if (!dwarf_diename(&type))
                        return -ENOENT;
@@ -867,39 +918,156 @@ int die_get_typename(Dwarf_Die *vr_die, char *buf, int len)
                else if (tag == DW_TAG_enumeration_type)
                        tmp = "enum ";
                /* Write a base name */
-               ret = snprintf(buf, len, "%s%s", tmp, dwarf_diename(&type));
-               return (ret >= len) ? -E2BIG : ret;
-       }
-       ret = die_get_typename(&type, buf, len);
-       if (ret > 0) {
-               ret2 = snprintf(buf + ret, len - ret, "%s", tmp);
-               ret = (ret2 >= len - ret) ? -E2BIG : ret2 + ret;
+               strbuf_addf(buf, "%s%s", tmp, dwarf_diename(&type));
+               return 0;
        }
+       ret = die_get_typename(&type, buf);
+       if (ret == 0)
+               strbuf_addf(buf, "%s", tmp);
+
        return ret;
 }
 
 /**
  * die_get_varname - Get the name and type of given variable DIE
  * @vr_die: a variable DIE
- * @buf: a buffer for type and variable name
- * @len: the max-length of @buf
+ * @buf: a strbuf for type and variable name
  *
  * Get the name and type of @vr_die and stores it in @buf as "type\tname".
  */
-int die_get_varname(Dwarf_Die *vr_die, char *buf, int len)
+int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf)
 {
-       int ret, ret2;
+       int ret;
 
-       ret = die_get_typename(vr_die, buf, len);
+       ret = die_get_typename(vr_die, buf);
        if (ret < 0) {
                pr_debug("Failed to get type, make it unknown.\n");
-               ret = snprintf(buf, len, "(unknown_type)");
+               strbuf_addf(buf, "(unknown_type)");
        }
-       if (ret > 0) {
-               ret2 = snprintf(buf + ret, len - ret, "\t%s",
-                               dwarf_diename(vr_die));
-               ret = (ret2 >= len - ret) ? -E2BIG : ret2 + ret;
+
+       strbuf_addf(buf, "\t%s", dwarf_diename(vr_die));
+
+       return 0;
+}
+
+/**
+ * die_get_var_innermost_scope - Get innermost scope range of given variable DIE
+ * @sp_die: a subprogram DIE
+ * @vr_die: a variable DIE
+ * @buf: a strbuf for variable byte offset range
+ *
+ * Get the innermost scope range of @vr_die and stores it in @buf as
+ * "@<function_name+[NN-NN,NN-NN]>".
+ */
+static int die_get_var_innermost_scope(Dwarf_Die *sp_die, Dwarf_Die *vr_die,
+                               struct strbuf *buf)
+{
+       Dwarf_Die *scopes;
+       int count;
+       size_t offset = 0;
+       Dwarf_Addr base;
+       Dwarf_Addr start, end;
+       Dwarf_Addr entry;
+       int ret;
+       bool first = true;
+       const char *name;
+
+       ret = dwarf_entrypc(sp_die, &entry);
+       if (ret)
+               return ret;
+
+       name = dwarf_diename(sp_die);
+       if (!name)
+               return -ENOENT;
+
+       count = dwarf_getscopes_die(vr_die, &scopes);
+
+       /* (*SCOPES)[1] is the DIE for the scope containing that scope */
+       if (count <= 1) {
+               ret = -EINVAL;
+               goto out;
        }
+
+       while ((offset = dwarf_ranges(&scopes[1], offset, &base,
+                               &start, &end)) > 0) {
+               start -= entry;
+               end -= entry;
+
+               if (first) {
+                       strbuf_addf(buf, "@<%s+[%" PRIu64 "-%" PRIu64,
+                               name, start, end);
+                       first = false;
+               } else {
+                       strbuf_addf(buf, ",%" PRIu64 "-%" PRIu64,
+                               start, end);
+               }
+       }
+
+       if (!first)
+               strbuf_addf(buf, "]>");
+
+out:
+       free(scopes);
        return ret;
 }
 
+/**
+ * die_get_var_range - Get byte offset range of given variable DIE
+ * @sp_die: a subprogram DIE
+ * @vr_die: a variable DIE
+ * @buf: a strbuf for type and variable name and byte offset range
+ *
+ * Get the byte offset range of @vr_die and stores it in @buf as
+ * "@<function_name+[NN-NN,NN-NN]>".
+ */
+int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf)
+{
+       int ret = 0;
+       Dwarf_Addr base;
+       Dwarf_Addr start, end;
+       Dwarf_Addr entry;
+       Dwarf_Op *op;
+       size_t nops;
+       size_t offset = 0;
+       Dwarf_Attribute attr;
+       bool first = true;
+       const char *name;
+
+       ret = dwarf_entrypc(sp_die, &entry);
+       if (ret)
+               return ret;
+
+       name = dwarf_diename(sp_die);
+       if (!name)
+               return -ENOENT;
+
+       if (dwarf_attr(vr_die, DW_AT_location, &attr) == NULL)
+               return -EINVAL;
+
+       while ((offset = dwarf_getlocations(
+                               &attr, offset, &base,
+                               &start, &end, &op, &nops)) > 0) {
+               if (start == 0) {
+                       /* Single Location Descriptions */
+                       ret = die_get_var_innermost_scope(sp_die, vr_die, buf);
+                       return ret;
+               }
+
+               /* Location Lists */
+               start -= entry;
+               end -= entry;
+               if (first) {
+                       strbuf_addf(buf, "@<%s+[%" PRIu64 "-%" PRIu64,
+                               name, start, end);
+                       first = false;
+               } else {
+                       strbuf_addf(buf, ",%" PRIu64 "-%" PRIu64,
+                               start, end);
+               }
+       }
+
+       if (!first)
+               strbuf_addf(buf, "]>");
+
+       return ret;
+}
index af7dbcd..c42ec36 100644 (file)
@@ -47,6 +47,9 @@ extern bool die_is_func_instance(Dwarf_Die *dw_die);
 /* Compare diename and tname */
 extern bool die_compare_name(Dwarf_Die *dw_die, const char *tname);
 
+/* Matching diename with glob pattern */
+extern bool die_match_name(Dwarf_Die *dw_die, const char *glob);
+
 /* Get callsite line number of inline-function instance */
 extern int die_get_call_lineno(Dwarf_Die *in_die);
 
@@ -82,6 +85,10 @@ extern Dwarf_Die *die_find_child(Dwarf_Die *rt_die,
 extern Dwarf_Die *die_find_realfunc(Dwarf_Die *cu_die, Dwarf_Addr addr,
                                    Dwarf_Die *die_mem);
 
+/* Search a non-inlined function with tail call at given address */
+Dwarf_Die *die_find_tailfunc(Dwarf_Die *cu_die, Dwarf_Addr addr,
+                                   Dwarf_Die *die_mem);
+
 /* Search the top inlined function including given address */
 extern Dwarf_Die *die_find_top_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
                                          Dwarf_Die *die_mem);
@@ -114,8 +121,10 @@ extern Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name,
                                  Dwarf_Die *die_mem);
 
 /* Get the name of given variable DIE */
-extern int die_get_typename(Dwarf_Die *vr_die, char *buf, int len);
+extern int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf);
 
 /* Get the name and type of given variable DIE, stored as "type\tname" */
-extern int die_get_varname(Dwarf_Die *vr_die, char *buf, int len);
+extern int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf);
+extern int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die,
+                       struct strbuf *buf);
 #endif
index 275b0ee..7405123 100644 (file)
@@ -5,5 +5,4 @@
  */
 #include "cache.h"
 
-const char *pager_program;
 int pager_use_color = 1;
index ff866c4..d7d986d 100644 (file)
@@ -23,12 +23,18 @@ static const char *perf_event__names[] = {
        [PERF_RECORD_FORK]                      = "FORK",
        [PERF_RECORD_READ]                      = "READ",
        [PERF_RECORD_SAMPLE]                    = "SAMPLE",
+       [PERF_RECORD_AUX]                       = "AUX",
+       [PERF_RECORD_ITRACE_START]              = "ITRACE_START",
+       [PERF_RECORD_LOST_SAMPLES]              = "LOST_SAMPLES",
        [PERF_RECORD_HEADER_ATTR]               = "ATTR",
        [PERF_RECORD_HEADER_EVENT_TYPE]         = "EVENT_TYPE",
        [PERF_RECORD_HEADER_TRACING_DATA]       = "TRACING_DATA",
        [PERF_RECORD_HEADER_BUILD_ID]           = "BUILD_ID",
        [PERF_RECORD_FINISHED_ROUND]            = "FINISHED_ROUND",
        [PERF_RECORD_ID_INDEX]                  = "ID_INDEX",
+       [PERF_RECORD_AUXTRACE_INFO]             = "AUXTRACE_INFO",
+       [PERF_RECORD_AUXTRACE]                  = "AUXTRACE",
+       [PERF_RECORD_AUXTRACE_ERROR]            = "AUXTRACE_ERROR",
 };
 
 const char *perf_event__name(unsigned int id)
@@ -212,10 +218,14 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
                                       pid_t pid, pid_t tgid,
                                       perf_event__handler_t process,
                                       struct machine *machine,
-                                      bool mmap_data)
+                                      bool mmap_data,
+                                      unsigned int proc_map_timeout)
 {
        char filename[PATH_MAX];
        FILE *fp;
+       unsigned long long t;
+       bool truncation = false;
+       unsigned long long timeout = proc_map_timeout * 1000000ULL;
        int rc = 0;
 
        if (machine__is_default_guest(machine))
@@ -234,6 +244,7 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
        }
 
        event->header.type = PERF_RECORD_MMAP2;
+       t = rdclock();
 
        while (1) {
                char bf[BUFSIZ];
@@ -247,6 +258,15 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
                if (fgets(bf, sizeof(bf), fp) == NULL)
                        break;
 
+               if ((rdclock() - t) > timeout) {
+                       pr_warning("Reading %s time out. "
+                                  "You may want to increase "
+                                  "the time limit by --proc-map-timeout\n",
+                                  filename);
+                       truncation = true;
+                       goto out;
+               }
+
                /* ensure null termination since stack will be reused. */
                strcpy(execname, "");
 
@@ -295,6 +315,10 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
                        event->header.misc |= PERF_RECORD_MISC_MMAP_DATA;
                }
 
+out:
+               if (truncation)
+                       event->header.misc |= PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT;
+
                if (!strcmp(execname, ""))
                        strcpy(execname, anonstr);
 
@@ -313,6 +337,9 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
                        rc = -1;
                        break;
                }
+
+               if (truncation)
+                       break;
        }
 
        fclose(fp);
@@ -324,8 +351,9 @@ int perf_event__synthesize_modules(struct perf_tool *tool,
                                   struct machine *machine)
 {
        int rc = 0;
-       struct rb_node *nd;
+       struct map *pos;
        struct map_groups *kmaps = &machine->kmaps;
+       struct maps *maps = &kmaps->maps[MAP__FUNCTION];
        union perf_event *event = zalloc((sizeof(event->mmap) +
                                          machine->id_hdr_size));
        if (event == NULL) {
@@ -345,10 +373,8 @@ int perf_event__synthesize_modules(struct perf_tool *tool,
        else
                event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
 
-       for (nd = rb_first(&kmaps->maps[MAP__FUNCTION]);
-            nd; nd = rb_next(nd)) {
+       for (pos = maps__first(maps); pos; pos = map__next(pos)) {
                size_t size;
-               struct map *pos = rb_entry(nd, struct map, rb_node);
 
                if (pos->dso->kernel)
                        continue;
@@ -381,7 +407,9 @@ static int __event__synthesize_thread(union perf_event *comm_event,
                                      pid_t pid, int full,
                                          perf_event__handler_t process,
                                      struct perf_tool *tool,
-                                     struct machine *machine, bool mmap_data)
+                                     struct machine *machine,
+                                     bool mmap_data,
+                                     unsigned int proc_map_timeout)
 {
        char filename[PATH_MAX];
        DIR *tasks;
@@ -398,7 +426,8 @@ static int __event__synthesize_thread(union perf_event *comm_event,
                        return -1;
 
                return perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid,
-                                                         process, machine, mmap_data);
+                                                         process, machine, mmap_data,
+                                                         proc_map_timeout);
        }
 
        if (machine__is_default_guest(machine))
@@ -439,7 +468,7 @@ static int __event__synthesize_thread(union perf_event *comm_event,
                if (_pid == pid) {
                        /* process the parent's maps too */
                        rc = perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid,
-                                               process, machine, mmap_data);
+                                               process, machine, mmap_data, proc_map_timeout);
                        if (rc)
                                break;
                }
@@ -453,7 +482,8 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
                                      struct thread_map *threads,
                                      perf_event__handler_t process,
                                      struct machine *machine,
-                                     bool mmap_data)
+                                     bool mmap_data,
+                                     unsigned int proc_map_timeout)
 {
        union perf_event *comm_event, *mmap_event, *fork_event;
        int err = -1, thread, j;
@@ -476,7 +506,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
                                               fork_event,
                                               threads->map[thread], 0,
                                               process, tool, machine,
-                                              mmap_data)) {
+                                              mmap_data, proc_map_timeout)) {
                        err = -1;
                        break;
                }
@@ -502,7 +532,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
                                                       fork_event,
                                                       comm_event->comm.pid, 0,
                                                       process, tool, machine,
-                                                      mmap_data)) {
+                                                      mmap_data, proc_map_timeout)) {
                                err = -1;
                                break;
                        }
@@ -519,7 +549,9 @@ out:
 
 int perf_event__synthesize_threads(struct perf_tool *tool,
                                   perf_event__handler_t process,
-                                  struct machine *machine, bool mmap_data)
+                                  struct machine *machine,
+                                  bool mmap_data,
+                                  unsigned int proc_map_timeout)
 {
        DIR *proc;
        char proc_path[PATH_MAX];
@@ -559,7 +591,8 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
                 * one thread couldn't be synthesized.
                 */
                __event__synthesize_thread(comm_event, mmap_event, fork_event, pid,
-                                          1, process, tool, machine, mmap_data);
+                                          1, process, tool, machine, mmap_data,
+                                          proc_map_timeout);
        }
 
        err = 0;
@@ -692,6 +725,30 @@ int perf_event__process_lost(struct perf_tool *tool __maybe_unused,
        return machine__process_lost_event(machine, event, sample);
 }
 
+int perf_event__process_aux(struct perf_tool *tool __maybe_unused,
+                           union perf_event *event,
+                           struct perf_sample *sample __maybe_unused,
+                           struct machine *machine)
+{
+       return machine__process_aux_event(machine, event);
+}
+
+int perf_event__process_itrace_start(struct perf_tool *tool __maybe_unused,
+                                    union perf_event *event,
+                                    struct perf_sample *sample __maybe_unused,
+                                    struct machine *machine)
+{
+       return machine__process_itrace_start_event(machine, event);
+}
+
+int perf_event__process_lost_samples(struct perf_tool *tool __maybe_unused,
+                                    union perf_event *event,
+                                    struct perf_sample *sample,
+                                    struct machine *machine)
+{
+       return machine__process_lost_samples_event(machine, event, sample);
+}
+
 size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp)
 {
        return fprintf(fp, " %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %c %s\n",
@@ -755,6 +812,21 @@ int perf_event__process_exit(struct perf_tool *tool __maybe_unused,
        return machine__process_exit_event(machine, event, sample);
 }
 
+size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp)
+{
+       return fprintf(fp, " offset: %#"PRIx64" size: %#"PRIx64" flags: %#"PRIx64" [%s%s]\n",
+                      event->aux.aux_offset, event->aux.aux_size,
+                      event->aux.flags,
+                      event->aux.flags & PERF_AUX_FLAG_TRUNCATED ? "T" : "",
+                      event->aux.flags & PERF_AUX_FLAG_OVERWRITE ? "O" : "");
+}
+
+size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp)
+{
+       return fprintf(fp, " pid: %u tid: %u\n",
+                      event->itrace_start.pid, event->itrace_start.tid);
+}
+
 size_t perf_event__fprintf(union perf_event *event, FILE *fp)
 {
        size_t ret = fprintf(fp, "PERF_RECORD_%s",
@@ -774,6 +846,12 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp)
        case PERF_RECORD_MMAP2:
                ret += perf_event__fprintf_mmap2(event, fp);
                break;
+       case PERF_RECORD_AUX:
+               ret += perf_event__fprintf_aux(event, fp);
+               break;
+       case PERF_RECORD_ITRACE_START:
+               ret += perf_event__fprintf_itrace_start(event, fp);
+               break;
        default:
                ret += fprintf(fp, "\n");
        }
@@ -877,6 +955,10 @@ void thread__find_addr_location(struct thread *thread,
                al->sym = NULL;
 }
 
+/*
+ * Callers need to drop the reference to al->thread, obtained in
+ * machine__findnew_thread()
+ */
 int perf_event__preprocess_sample(const union perf_event *event,
                                  struct machine *machine,
                                  struct addr_location *al,
@@ -937,6 +1019,17 @@ int perf_event__preprocess_sample(const union perf_event *event,
        return 0;
 }
 
+/*
+ * The preprocess_sample method will return with reference counts for the
+ * in it, when done using (and perhaps getting ref counts if needing to
+ * keep a pointer to one of those entries) it must be paired with
+ * addr_location__put(), so that the refcounts can be decremented.
+ */
+void addr_location__put(struct addr_location *al)
+{
+       thread__zput(al->thread);
+}
+
 bool is_bts_event(struct perf_event_attr *attr)
 {
        return attr->type == PERF_TYPE_HARDWARE &&
index 09b9e8d..c53f363 100644 (file)
@@ -52,6 +52,11 @@ struct lost_event {
        u64 lost;
 };
 
+struct lost_samples_event {
+       struct perf_event_header header;
+       u64 lost;
+};
+
 /*
  * PERF_FORMAT_ENABLED | PERF_FORMAT_RUNNING | PERF_FORMAT_ID
  */
@@ -157,6 +162,8 @@ enum {
        PERF_IP_FLAG_IN_TX              = 1ULL << 10,
 };
 
+#define PERF_IP_FLAG_CHARS "bcrosyiABEx"
+
 #define PERF_BRANCH_MASK               (\
        PERF_IP_FLAG_BRANCH             |\
        PERF_IP_FLAG_CALL               |\
@@ -215,9 +222,17 @@ enum perf_user_event_type { /* above any possible kernel type */
        PERF_RECORD_HEADER_BUILD_ID             = 67,
        PERF_RECORD_FINISHED_ROUND              = 68,
        PERF_RECORD_ID_INDEX                    = 69,
+       PERF_RECORD_AUXTRACE_INFO               = 70,
+       PERF_RECORD_AUXTRACE                    = 71,
+       PERF_RECORD_AUXTRACE_ERROR              = 72,
        PERF_RECORD_HEADER_MAX
 };
 
+enum auxtrace_error_type {
+       PERF_AUXTRACE_ERROR_ITRACE  = 1,
+       PERF_AUXTRACE_ERROR_MAX
+};
+
 /*
  * The kernel collects the number of events it couldn't send in a stretch and
  * when possible sends this number in a PERF_RECORD_LOST event. The number of
@@ -225,6 +240,12 @@ enum perf_user_event_type { /* above any possible kernel type */
  * total_lost tells exactly how many events the kernel in fact lost, i.e. it is
  * the sum of all struct lost_event.lost fields reported.
  *
+ * The kernel discards mixed up samples and sends the number in a
+ * PERF_RECORD_LOST_SAMPLES event. The number of lost-samples events is stored
+ * in .nr_events[PERF_RECORD_LOST_SAMPLES] while total_lost_samples tells
+ * exactly how many samples the kernel in fact dropped, i.e. it is the sum of
+ * all struct lost_samples_event.lost fields reported.
+ *
  * The total_period is needed because by default auto-freq is used, so
  * multipling nr_events[PERF_EVENT_SAMPLE] by a frequency isn't possible to get
  * the total number of low level events, it is necessary to to sum all struct
@@ -234,6 +255,7 @@ struct events_stats {
        u64 total_period;
        u64 total_non_filtered_period;
        u64 total_lost;
+       u64 total_lost_samples;
        u64 total_invalid_chains;
        u32 nr_events[PERF_RECORD_HEADER_MAX];
        u32 nr_non_filtered_samples;
@@ -242,6 +264,8 @@ struct events_stats {
        u32 nr_invalid_chains;
        u32 nr_unknown_id;
        u32 nr_unprocessable_samples;
+       u32 nr_auxtrace_errors[PERF_AUXTRACE_ERROR_MAX];
+       u32 nr_proc_map_timeout;
 };
 
 struct attr_event {
@@ -280,6 +304,50 @@ struct id_index_event {
        struct id_index_entry entries[0];
 };
 
+struct auxtrace_info_event {
+       struct perf_event_header header;
+       u32 type;
+       u32 reserved__; /* For alignment */
+       u64 priv[];
+};
+
+struct auxtrace_event {
+       struct perf_event_header header;
+       u64 size;
+       u64 offset;
+       u64 reference;
+       u32 idx;
+       u32 tid;
+       u32 cpu;
+       u32 reserved__; /* For alignment */
+};
+
+#define MAX_AUXTRACE_ERROR_MSG 64
+
+struct auxtrace_error_event {
+       struct perf_event_header header;
+       u32 type;
+       u32 code;
+       u32 cpu;
+       u32 pid;
+       u32 tid;
+       u32 reserved__; /* For alignment */
+       u64 ip;
+       char msg[MAX_AUXTRACE_ERROR_MSG];
+};
+
+struct aux_event {
+       struct perf_event_header header;
+       u64     aux_offset;
+       u64     aux_size;
+       u64     flags;
+};
+
+struct itrace_start_event {
+       struct perf_event_header header;
+       u32 pid, tid;
+};
+
 union perf_event {
        struct perf_event_header        header;
        struct mmap_event               mmap;
@@ -287,6 +355,7 @@ union perf_event {
        struct comm_event               comm;
        struct fork_event               fork;
        struct lost_event               lost;
+       struct lost_samples_event       lost_samples;
        struct read_event               read;
        struct throttle_event           throttle;
        struct sample_event             sample;
@@ -295,6 +364,11 @@ union perf_event {
        struct tracing_data_event       tracing_data;
        struct build_id_event           build_id;
        struct id_index_event           id_index;
+       struct auxtrace_info_event      auxtrace_info;
+       struct auxtrace_event           auxtrace;
+       struct auxtrace_error_event     auxtrace_error;
+       struct aux_event                aux;
+       struct itrace_start_event       itrace_start;
 };
 
 void perf_event__print_totals(void);
@@ -310,10 +384,12 @@ typedef int (*perf_event__handler_t)(struct perf_tool *tool,
 int perf_event__synthesize_thread_map(struct perf_tool *tool,
                                      struct thread_map *threads,
                                      perf_event__handler_t process,
-                                     struct machine *machine, bool mmap_data);
+                                     struct machine *machine, bool mmap_data,
+                                     unsigned int proc_map_timeout);
 int perf_event__synthesize_threads(struct perf_tool *tool,
                                   perf_event__handler_t process,
-                                  struct machine *machine, bool mmap_data);
+                                  struct machine *machine, bool mmap_data,
+                                  unsigned int proc_map_timeout);
 int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
                                       perf_event__handler_t process,
                                       struct machine *machine);
@@ -330,6 +406,18 @@ int perf_event__process_lost(struct perf_tool *tool,
                             union perf_event *event,
                             struct perf_sample *sample,
                             struct machine *machine);
+int perf_event__process_lost_samples(struct perf_tool *tool,
+                                    union perf_event *event,
+                                    struct perf_sample *sample,
+                                    struct machine *machine);
+int perf_event__process_aux(struct perf_tool *tool,
+                           union perf_event *event,
+                           struct perf_sample *sample,
+                           struct machine *machine);
+int perf_event__process_itrace_start(struct perf_tool *tool,
+                                    union perf_event *event,
+                                    struct perf_sample *sample,
+                                    struct machine *machine);
 int perf_event__process_mmap(struct perf_tool *tool,
                             union perf_event *event,
                             struct perf_sample *sample,
@@ -358,6 +446,8 @@ int perf_event__preprocess_sample(const union perf_event *event,
                                  struct addr_location *al,
                                  struct perf_sample *sample);
 
+void addr_location__put(struct addr_location *al);
+
 struct thread;
 
 bool is_bts_event(struct perf_event_attr *attr);
@@ -381,12 +471,15 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
                                       pid_t pid, pid_t tgid,
                                       perf_event__handler_t process,
                                       struct machine *machine,
-                                      bool mmap_data);
+                                      bool mmap_data,
+                                      unsigned int proc_map_timeout);
 
 size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf_task(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf(union perf_event *event, FILE *fp);
 
 u64 kallsyms__get_function_start(const char *kallsyms_filename,
index 080be93..8366511 100644 (file)
@@ -297,6 +297,8 @@ void perf_evlist__disable(struct perf_evlist *evlist)
                                      PERF_EVENT_IOC_DISABLE, 0);
                }
        }
+
+       evlist->enabled = false;
 }
 
 void perf_evlist__enable(struct perf_evlist *evlist)
@@ -316,6 +318,13 @@ void perf_evlist__enable(struct perf_evlist *evlist)
                                      PERF_EVENT_IOC_ENABLE, 0);
                }
        }
+
+       evlist->enabled = true;
+}
+
+void perf_evlist__toggle_enable(struct perf_evlist *evlist)
+{
+       (evlist->enabled ? perf_evlist__disable : perf_evlist__enable)(evlist);
 }
 
 int perf_evlist__disable_event(struct perf_evlist *evlist,
@@ -634,11 +643,18 @@ static struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist,
 union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
 {
        struct perf_mmap *md = &evlist->mmap[idx];
-       u64 head = perf_mmap__read_head(md);
+       u64 head;
        u64 old = md->prev;
        unsigned char *data = md->base + page_size;
        union perf_event *event = NULL;
 
+       /*
+        * Check if event was unmapped due to a POLLHUP/POLLERR.
+        */
+       if (!atomic_read(&md->refcnt))
+               return NULL;
+
+       head = perf_mmap__read_head(md);
        if (evlist->overwrite) {
                /*
                 * If we're further behind than half the buffer, there's a chance
@@ -695,19 +711,19 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
 
 static bool perf_mmap__empty(struct perf_mmap *md)
 {
-       return perf_mmap__read_head(md) == md->prev;
+       return perf_mmap__read_head(md) == md->prev && !md->auxtrace_mmap.base;
 }
 
 static void perf_evlist__mmap_get(struct perf_evlist *evlist, int idx)
 {
-       ++evlist->mmap[idx].refcnt;
+       atomic_inc(&evlist->mmap[idx].refcnt);
 }
 
 static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx)
 {
-       BUG_ON(evlist->mmap[idx].refcnt == 0);
+       BUG_ON(atomic_read(&evlist->mmap[idx].refcnt) == 0);
 
-       if (--evlist->mmap[idx].refcnt == 0)
+       if (atomic_dec_and_test(&evlist->mmap[idx].refcnt))
                __perf_evlist__munmap(evlist, idx);
 }
 
@@ -721,17 +737,46 @@ void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx)
                perf_mmap__write_tail(md, old);
        }
 
-       if (md->refcnt == 1 && perf_mmap__empty(md))
+       if (atomic_read(&md->refcnt) == 1 && perf_mmap__empty(md))
                perf_evlist__mmap_put(evlist, idx);
 }
 
+int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused,
+                              struct auxtrace_mmap_params *mp __maybe_unused,
+                              void *userpg __maybe_unused,
+                              int fd __maybe_unused)
+{
+       return 0;
+}
+
+void __weak auxtrace_mmap__munmap(struct auxtrace_mmap *mm __maybe_unused)
+{
+}
+
+void __weak auxtrace_mmap_params__init(
+                       struct auxtrace_mmap_params *mp __maybe_unused,
+                       off_t auxtrace_offset __maybe_unused,
+                       unsigned int auxtrace_pages __maybe_unused,
+                       bool auxtrace_overwrite __maybe_unused)
+{
+}
+
+void __weak auxtrace_mmap_params__set_idx(
+                       struct auxtrace_mmap_params *mp __maybe_unused,
+                       struct perf_evlist *evlist __maybe_unused,
+                       int idx __maybe_unused,
+                       bool per_cpu __maybe_unused)
+{
+}
+
 static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx)
 {
        if (evlist->mmap[idx].base != NULL) {
                munmap(evlist->mmap[idx].base, evlist->mmap_len);
                evlist->mmap[idx].base = NULL;
-               evlist->mmap[idx].refcnt = 0;
+               atomic_set(&evlist->mmap[idx].refcnt, 0);
        }
+       auxtrace_mmap__munmap(&evlist->mmap[idx].auxtrace_mmap);
 }
 
 void perf_evlist__munmap(struct perf_evlist *evlist)
@@ -759,6 +804,7 @@ static int perf_evlist__alloc_mmap(struct perf_evlist *evlist)
 struct mmap_params {
        int prot;
        int mask;
+       struct auxtrace_mmap_params auxtrace_mp;
 };
 
 static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx,
@@ -777,7 +823,7 @@ static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx,
         * evlist layer can't just drop it when filtering events in
         * perf_evlist__filter_pollfd().
         */
-       evlist->mmap[idx].refcnt = 2;
+       atomic_set(&evlist->mmap[idx].refcnt, 2);
        evlist->mmap[idx].prev = 0;
        evlist->mmap[idx].mask = mp->mask;
        evlist->mmap[idx].base = mmap(NULL, evlist->mmap_len, mp->prot,
@@ -789,6 +835,10 @@ static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx,
                return -1;
        }
 
+       if (auxtrace_mmap__mmap(&evlist->mmap[idx].auxtrace_mmap,
+                               &mp->auxtrace_mp, evlist->mmap[idx].base, fd))
+               return -1;
+
        return 0;
 }
 
@@ -853,6 +903,9 @@ static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist,
        for (cpu = 0; cpu < nr_cpus; cpu++) {
                int output = -1;
 
+               auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu,
+                                             true);
+
                for (thread = 0; thread < nr_threads; thread++) {
                        if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu,
                                                        thread, &output))
@@ -878,6 +931,9 @@ static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist,
        for (thread = 0; thread < nr_threads; thread++) {
                int output = -1;
 
+               auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread,
+                                             false);
+
                if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread,
                                                &output))
                        goto out_unmap;
@@ -960,10 +1016,8 @@ static long parse_pages_arg(const char *str, unsigned long min,
        return pages;
 }
 
-int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
-                                 int unset __maybe_unused)
+int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str)
 {
-       unsigned int *mmap_pages = opt->value;
        unsigned long max = UINT_MAX;
        long pages;
 
@@ -980,20 +1034,32 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
        return 0;
 }
 
+int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
+                                 int unset __maybe_unused)
+{
+       return __perf_evlist__parse_mmap_pages(opt->value, str);
+}
+
 /**
- * perf_evlist__mmap - Create mmaps to receive events.
+ * perf_evlist__mmap_ex - Create mmaps to receive events.
  * @evlist: list of events
  * @pages: map length in pages
  * @overwrite: overwrite older events?
+ * @auxtrace_pages - auxtrace map length in pages
+ * @auxtrace_overwrite - overwrite older auxtrace data?
  *
  * If @overwrite is %false the user needs to signal event consumption using
  * perf_mmap__write_tail().  Using perf_evlist__mmap_read() does this
  * automatically.
  *
+ * Similarly, if @auxtrace_overwrite is %false the user needs to signal data
+ * consumption using auxtrace_mmap__write_tail().
+ *
  * Return: %0 on success, negative error code otherwise.
  */
-int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
-                     bool overwrite)
+int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
+                        bool overwrite, unsigned int auxtrace_pages,
+                        bool auxtrace_overwrite)
 {
        struct perf_evsel *evsel;
        const struct cpu_map *cpus = evlist->cpus;
@@ -1013,6 +1079,9 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
        pr_debug("mmap size %zuB\n", evlist->mmap_len);
        mp.mask = evlist->mmap_len - page_size - 1;
 
+       auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->mmap_len,
+                                  auxtrace_pages, auxtrace_overwrite);
+
        evlist__for_each(evlist, evsel) {
                if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
                    evsel->sample_id == NULL &&
@@ -1026,6 +1095,12 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
        return perf_evlist__mmap_per_cpu(evlist, &mp);
 }
 
+int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
+                     bool overwrite)
+{
+       return perf_evlist__mmap_ex(evlist, pages, overwrite, 0, false);
+}
+
 int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target)
 {
        evlist->threads = thread_map__new_str(target->pid, target->tid,
index b5cce95..a8489b9 100644 (file)
@@ -1,6 +1,7 @@
 #ifndef __PERF_EVLIST_H
 #define __PERF_EVLIST_H 1
 
+#include <linux/atomic.h>
 #include <linux/list.h>
 #include <api/fd/array.h>
 #include <stdio.h>
@@ -8,6 +9,7 @@
 #include "event.h"
 #include "evsel.h"
 #include "util.h"
+#include "auxtrace.h"
 #include <unistd.h>
 
 struct pollfd;
@@ -26,8 +28,9 @@ struct record_opts;
 struct perf_mmap {
        void             *base;
        int              mask;
-       int              refcnt;
+       atomic_t         refcnt;
        u64              prev;
+       struct auxtrace_mmap auxtrace_mmap;
        char             event_copy[PERF_SAMPLE_MAX_SIZE] __attribute__((aligned(8)));
 };
 
@@ -37,6 +40,8 @@ struct perf_evlist {
        int              nr_entries;
        int              nr_groups;
        int              nr_mmaps;
+       bool             overwrite;
+       bool             enabled;
        size_t           mmap_len;
        int              id_pos;
        int              is_pos;
@@ -45,7 +50,6 @@ struct perf_evlist {
                int     cork_fd;
                pid_t   pid;
        } workload;
-       bool             overwrite;
        struct fdarray   pollfd;
        struct perf_mmap *mmap;
        struct thread_map *threads;
@@ -122,16 +126,21 @@ int perf_evlist__start_workload(struct perf_evlist *evlist);
 
 struct option;
 
+int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str);
 int perf_evlist__parse_mmap_pages(const struct option *opt,
                                  const char *str,
                                  int unset);
 
+int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
+                        bool overwrite, unsigned int auxtrace_pages,
+                        bool auxtrace_overwrite);
 int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
                      bool overwrite);
 void perf_evlist__munmap(struct perf_evlist *evlist);
 
 void perf_evlist__disable(struct perf_evlist *evlist);
 void perf_evlist__enable(struct perf_evlist *evlist);
+void perf_evlist__toggle_enable(struct perf_evlist *evlist);
 
 int perf_evlist__disable_event(struct perf_evlist *evlist,
                               struct perf_evsel *evsel);
index 33e3fd8..33449de 100644 (file)
@@ -26,6 +26,7 @@
 #include "perf_regs.h"
 #include "debug.h"
 #include "trace-event.h"
+#include "stat.h"
 
 static struct {
        bool sample_id_all;
@@ -851,19 +852,6 @@ int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
        return 0;
 }
 
-void perf_evsel__reset_counts(struct perf_evsel *evsel, int ncpus)
-{
-       memset(evsel->counts, 0, (sizeof(*evsel->counts) +
-                                (ncpus * sizeof(struct perf_counts_values))));
-}
-
-int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus)
-{
-       evsel->counts = zalloc((sizeof(*evsel->counts) +
-                               (ncpus * sizeof(struct perf_counts_values))));
-       return evsel->counts != NULL ? 0 : -ENOMEM;
-}
-
 static void perf_evsel__free_fd(struct perf_evsel *evsel)
 {
        xyarray__delete(evsel->fd);
@@ -891,11 +879,6 @@ void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
                }
 }
 
-void perf_evsel__free_counts(struct perf_evsel *evsel)
-{
-       zfree(&evsel->counts);
-}
-
 void perf_evsel__exit(struct perf_evsel *evsel)
 {
        assert(list_empty(&evsel->node));
@@ -1058,7 +1041,7 @@ static void __p_read_format(char *buf, size_t size, u64 value)
 
 #define BUF_SIZE               1024
 
-#define p_hex(val)             snprintf(buf, BUF_SIZE, "%"PRIx64, (uint64_t)(val))
+#define p_hex(val)             snprintf(buf, BUF_SIZE, "%#"PRIx64, (uint64_t)(val))
 #define p_unsigned(val)                snprintf(buf, BUF_SIZE, "%"PRIu64, (uint64_t)(val))
 #define p_signed(val)          snprintf(buf, BUF_SIZE, "%"PRId64, (int64_t)(val))
 #define p_sample_type(val)     __p_sample_type(buf, BUF_SIZE, val)
@@ -1121,6 +1104,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
        PRINT_ATTRf(sample_stack_user, p_unsigned);
        PRINT_ATTRf(clockid, p_signed);
        PRINT_ATTRf(sample_regs_intr, p_hex);
+       PRINT_ATTRf(aux_watermark, p_unsigned);
 
        return ret;
 }
@@ -2148,7 +2132,9 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target,
        case EMFILE:
                return scnprintf(msg, size, "%s",
                         "Too many events are opened.\n"
-                        "Try again after reducing the number of events.");
+                        "Probably the maximum number of open file descriptors has been reached.\n"
+                        "Hint: Try again after reducing the number of events.\n"
+                        "Hint: Try increasing the limit with 'ulimit -n <limit>'");
        case ENODEV:
                if (target->cpu_list)
                        return scnprintf(msg, size, "%s",
index e486151..bb0579e 100644 (file)
@@ -73,7 +73,6 @@ struct perf_evsel {
        char                    *name;
        double                  scale;
        const char              *unit;
-       bool                    snapshot;
        struct event_format     *tp_format;
        union {
                void            *priv;
@@ -86,6 +85,7 @@ struct perf_evsel {
        unsigned int            sample_size;
        int                     id_pos;
        int                     is_pos;
+       bool                    snapshot;
        bool                    supported;
        bool                    needs_swap;
        bool                    no_aux_samples;
@@ -93,11 +93,11 @@ struct perf_evsel {
        bool                    system_wide;
        bool                    tracking;
        bool                    per_pkg;
-       unsigned long           *per_pkg_mask;
        /* parse modifier helper */
        int                     exclude_GH;
        int                     nr_members;
        int                     sample_read;
+       unsigned long           *per_pkg_mask;
        struct perf_evsel       *leader;
        char                    *group_name;
 };
@@ -170,9 +170,6 @@ const char *perf_evsel__group_name(struct perf_evsel *evsel);
 int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size);
 
 int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads);
-int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus);
-void perf_evsel__reset_counts(struct perf_evsel *evsel, int ncpus);
-void perf_evsel__free_counts(struct perf_evsel *evsel);
 void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
 
 void __perf_evsel__set_sample_bit(struct perf_evsel *evsel,
index 918fd8a..21a77e7 100644 (file)
@@ -869,6 +869,20 @@ static int write_branch_stack(int fd __maybe_unused,
        return 0;
 }
 
+static int write_auxtrace(int fd, struct perf_header *h,
+                         struct perf_evlist *evlist __maybe_unused)
+{
+       struct perf_session *session;
+       int err;
+
+       session = container_of(h, struct perf_session, header);
+
+       err = auxtrace_index__write(fd, &session->auxtrace_index);
+       if (err < 0)
+               pr_err("Failed to write auxtrace index\n");
+       return err;
+}
+
 static void print_hostname(struct perf_header *ph, int fd __maybe_unused,
                           FILE *fp)
 {
@@ -1151,6 +1165,12 @@ static void print_branch_stack(struct perf_header *ph __maybe_unused,
        fprintf(fp, "# contains samples with branch stack\n");
 }
 
+static void print_auxtrace(struct perf_header *ph __maybe_unused,
+                          int fd __maybe_unused, FILE *fp)
+{
+       fprintf(fp, "# contains AUX area data (e.g. instruction trace)\n");
+}
+
 static void print_pmu_mappings(struct perf_header *ph, int fd __maybe_unused,
                               FILE *fp)
 {
@@ -1218,9 +1238,8 @@ static int __event_process_build_id(struct build_id_event *bev,
                                    struct perf_session *session)
 {
        int err = -1;
-       struct dsos *dsos;
        struct machine *machine;
-       u16 misc;
+       u16 cpumode;
        struct dso *dso;
        enum dso_kernel_type dso_type;
 
@@ -1228,39 +1247,37 @@ static int __event_process_build_id(struct build_id_event *bev,
        if (!machine)
                goto out;
 
-       misc = bev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+       cpumode = bev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 
-       switch (misc) {
+       switch (cpumode) {
        case PERF_RECORD_MISC_KERNEL:
                dso_type = DSO_TYPE_KERNEL;
-               dsos = &machine->kernel_dsos;
                break;
        case PERF_RECORD_MISC_GUEST_KERNEL:
                dso_type = DSO_TYPE_GUEST_KERNEL;
-               dsos = &machine->kernel_dsos;
                break;
        case PERF_RECORD_MISC_USER:
        case PERF_RECORD_MISC_GUEST_USER:
                dso_type = DSO_TYPE_USER;
-               dsos = &machine->user_dsos;
                break;
        default:
                goto out;
        }
 
-       dso = __dsos__findnew(dsos, filename);
+       dso = machine__findnew_dso(machine, filename);
        if (dso != NULL) {
                char sbuild_id[BUILD_ID_SIZE * 2 + 1];
 
                dso__set_build_id(dso, &bev->build_id);
 
-               if (!is_kernel_module(filename))
+               if (!is_kernel_module(filename, cpumode))
                        dso->kernel = dso_type;
 
                build_id__sprintf(dso->build_id, sizeof(dso->build_id),
                                  sbuild_id);
                pr_debug("build id event received for %s: %s\n",
                         dso->long_name, sbuild_id);
+               dso__put(dso);
        }
 
        err = 0;
@@ -1821,6 +1838,22 @@ out_free:
        return ret;
 }
 
+static int process_auxtrace(struct perf_file_section *section,
+                           struct perf_header *ph, int fd,
+                           void *data __maybe_unused)
+{
+       struct perf_session *session;
+       int err;
+
+       session = container_of(ph, struct perf_session, header);
+
+       err = auxtrace_index__process(fd, section->size, session,
+                                     ph->needs_swap);
+       if (err < 0)
+               pr_err("Failed to process auxtrace index\n");
+       return err;
+}
+
 struct feature_ops {
        int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist);
        void (*print)(struct perf_header *h, int fd, FILE *fp);
@@ -1861,6 +1894,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
        FEAT_OPA(HEADER_BRANCH_STACK,   branch_stack),
        FEAT_OPP(HEADER_PMU_MAPPINGS,   pmu_mappings),
        FEAT_OPP(HEADER_GROUP_DESC,     group_desc),
+       FEAT_OPP(HEADER_AUXTRACE,       auxtrace),
 };
 
 struct header_print_data {
index 3bb90ac..d4d5796 100644 (file)
@@ -30,6 +30,7 @@ enum {
        HEADER_BRANCH_STACK,
        HEADER_PMU_MAPPINGS,
        HEADER_GROUP_DESC,
+       HEADER_AUXTRACE,
        HEADER_LAST_FEATURE,
        HEADER_FEAT_BITS        = 256,
 };
index cc22b91..6f28d53 100644 (file)
@@ -313,8 +313,7 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template,
                                memset(&he->stat, 0, sizeof(he->stat));
                }
 
-               if (he->ms.map)
-                       he->ms.map->referenced = true;
+               map__get(he->ms.map);
 
                if (he->branch_info) {
                        /*
@@ -324,6 +323,7 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template,
                         */
                        he->branch_info = malloc(sizeof(*he->branch_info));
                        if (he->branch_info == NULL) {
+                               map__zput(he->ms.map);
                                free(he->stat_acc);
                                free(he);
                                return NULL;
@@ -332,17 +332,13 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template,
                        memcpy(he->branch_info, template->branch_info,
                               sizeof(*he->branch_info));
 
-                       if (he->branch_info->from.map)
-                               he->branch_info->from.map->referenced = true;
-                       if (he->branch_info->to.map)
-                               he->branch_info->to.map->referenced = true;
+                       map__get(he->branch_info->from.map);
+                       map__get(he->branch_info->to.map);
                }
 
                if (he->mem_info) {
-                       if (he->mem_info->iaddr.map)
-                               he->mem_info->iaddr.map->referenced = true;
-                       if (he->mem_info->daddr.map)
-                               he->mem_info->daddr.map->referenced = true;
+                       map__get(he->mem_info->iaddr.map);
+                       map__get(he->mem_info->daddr.map);
                }
 
                if (symbol_conf.use_callchain)
@@ -362,10 +358,10 @@ static u8 symbol__parent_filter(const struct symbol *parent)
        return 0;
 }
 
-static struct hist_entry *add_hist_entry(struct hists *hists,
-                                        struct hist_entry *entry,
-                                        struct addr_location *al,
-                                        bool sample_self)
+static struct hist_entry *hists__findnew_entry(struct hists *hists,
+                                              struct hist_entry *entry,
+                                              struct addr_location *al,
+                                              bool sample_self)
 {
        struct rb_node **p;
        struct rb_node *parent = NULL;
@@ -407,9 +403,8 @@ static struct hist_entry *add_hist_entry(struct hists *hists,
                         * the history counter to increment.
                         */
                        if (he->ms.map != entry->ms.map) {
-                               he->ms.map = entry->ms.map;
-                               if (he->ms.map)
-                                       he->ms.map->referenced = true;
+                               map__put(he->ms.map);
+                               he->ms.map = map__get(entry->ms.map);
                        }
                        goto out;
                }
@@ -468,7 +463,7 @@ struct hist_entry *__hists__add_entry(struct hists *hists,
                .transaction = transaction,
        };
 
-       return add_hist_entry(hists, &entry, al, sample_self);
+       return hists__findnew_entry(hists, &entry, al, sample_self);
 }
 
 static int
@@ -548,9 +543,9 @@ iter_finish_mem_entry(struct hist_entry_iter *iter,
 
 out:
        /*
-        * We don't need to free iter->priv (mem_info) here since
-        * the mem info was either already freed in add_hist_entry() or
-        * passed to a new hist entry by hist_entry__new().
+        * We don't need to free iter->priv (mem_info) here since the mem info
+        * was either already freed in hists__findnew_entry() or passed to a
+        * new hist entry by hist_entry__new().
         */
        iter->priv = NULL;
 
@@ -851,19 +846,15 @@ const struct hist_iter_ops hist_iter_cumulative = {
 };
 
 int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
-                        struct perf_evsel *evsel, struct perf_sample *sample,
                         int max_stack_depth, void *arg)
 {
        int err, err2;
 
-       err = sample__resolve_callchain(sample, &iter->parent, evsel, al,
-                                       max_stack_depth);
+       err = sample__resolve_callchain(iter->sample, &iter->parent,
+                                       iter->evsel, al, max_stack_depth);
        if (err)
                return err;
 
-       iter->evsel = evsel;
-       iter->sample = sample;
-
        err = iter->ops->prepare_entry(iter, al);
        if (err)
                goto out;
@@ -937,8 +928,20 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
 void hist_entry__delete(struct hist_entry *he)
 {
        thread__zput(he->thread);
-       zfree(&he->branch_info);
-       zfree(&he->mem_info);
+       map__zput(he->ms.map);
+
+       if (he->branch_info) {
+               map__zput(he->branch_info->from.map);
+               map__zput(he->branch_info->to.map);
+               zfree(&he->branch_info);
+       }
+
+       if (he->mem_info) {
+               map__zput(he->mem_info->iaddr.map);
+               map__zput(he->mem_info->daddr.map);
+               zfree(&he->mem_info);
+       }
+
        zfree(&he->stat_acc);
        free_srcline(he->srcline);
        free_callchain(he->callchain);
@@ -1163,7 +1166,7 @@ static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *h
                return;
 
        /* force fold unfiltered entry for simplicity */
-       h->ms.unfolded = false;
+       h->unfolded = false;
        h->row_offset = 0;
        h->nr_rows = 0;
 
index 9f31b89..5ed8d9c 100644 (file)
@@ -111,7 +111,6 @@ struct hist_entry *__hists__add_entry(struct hists *hists,
                                      u64 weight, u64 transaction,
                                      bool sample_self);
 int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
-                        struct perf_evsel *evsel, struct perf_sample *sample,
                         int max_stack_depth, void *arg);
 
 int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right);
diff --git a/tools/perf/util/include/linux/poison.h b/tools/perf/util/include/linux/poison.h
deleted file mode 100644 (file)
index fef6dbc..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include "../../../../include/linux/poison.h"
index 2a030c5..f06d89f 100644 (file)
@@ -1,2 +1,16 @@
+#ifndef __TOOLS_LINUX_PERF_RBTREE_H
+#define __TOOLS_LINUX_PERF_RBTREE_H
 #include <stdbool.h>
 #include "../../../../include/linux/rbtree.h"
+
+/*
+ * Handy for checking that we are not deleting an entry that is
+ * already in a list, found in block/{blk-throttle,cfq-iosched}.c,
+ * probably should be moved to lib/rbtree.c...
+ */
+static inline void rb_erase_init(struct rb_node *n, struct rb_root *root)
+{
+       rb_erase(n, root);
+       RB_CLEAR_NODE(n);
+}
+#endif /* __TOOLS_LINUX_PERF_RBTREE_H */
index 527e032..4744673 100644 (file)
 #include "unwind.h"
 #include "linux/hash.h"
 
+static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock);
+
 static void dsos__init(struct dsos *dsos)
 {
        INIT_LIST_HEAD(&dsos->head);
        dsos->root = RB_ROOT;
+       pthread_rwlock_init(&dsos->lock, NULL);
 }
 
 int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
 {
        map_groups__init(&machine->kmaps, machine);
        RB_CLEAR_NODE(&machine->rb_node);
-       dsos__init(&machine->user_dsos);
-       dsos__init(&machine->kernel_dsos);
+       dsos__init(&machine->dsos);
 
        machine->threads = RB_ROOT;
+       pthread_rwlock_init(&machine->threads_lock, NULL);
        INIT_LIST_HEAD(&machine->dead_threads);
        machine->last_match = NULL;
 
@@ -54,6 +57,7 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
 
                snprintf(comm, sizeof(comm), "[guest/%d]", pid);
                thread__set_comm(thread, comm, 0);
+               thread__put(thread);
        }
 
        machine->current_tid = NULL;
@@ -78,37 +82,50 @@ out_delete:
        return NULL;
 }
 
-static void dsos__delete(struct dsos *dsos)
+static void dsos__purge(struct dsos *dsos)
 {
        struct dso *pos, *n;
 
+       pthread_rwlock_wrlock(&dsos->lock);
+
        list_for_each_entry_safe(pos, n, &dsos->head, node) {
                RB_CLEAR_NODE(&pos->rb_node);
-               list_del(&pos->node);
-               dso__delete(pos);
+               list_del_init(&pos->node);
+               dso__put(pos);
        }
+
+       pthread_rwlock_unlock(&dsos->lock);
+}
+
+static void dsos__exit(struct dsos *dsos)
+{
+       dsos__purge(dsos);
+       pthread_rwlock_destroy(&dsos->lock);
 }
 
 void machine__delete_threads(struct machine *machine)
 {
-       struct rb_node *nd = rb_first(&machine->threads);
+       struct rb_node *nd;
 
+       pthread_rwlock_wrlock(&machine->threads_lock);
+       nd = rb_first(&machine->threads);
        while (nd) {
                struct thread *t = rb_entry(nd, struct thread, rb_node);
 
                nd = rb_next(nd);
-               machine__remove_thread(machine, t);
+               __machine__remove_thread(machine, t, false);
        }
+       pthread_rwlock_unlock(&machine->threads_lock);
 }
 
 void machine__exit(struct machine *machine)
 {
        map_groups__exit(&machine->kmaps);
-       dsos__delete(&machine->user_dsos);
-       dsos__delete(&machine->kernel_dsos);
-       vdso__exit(machine);
+       dsos__exit(&machine->dsos);
+       machine__exit_vdso(machine);
        zfree(&machine->root_dir);
        zfree(&machine->current_tid);
+       pthread_rwlock_destroy(&machine->threads_lock);
 }
 
 void machine__delete(struct machine *machine)
@@ -303,7 +320,7 @@ static void machine__update_thread_pid(struct machine *machine,
        if (th->pid_ == th->tid)
                return;
 
-       leader = machine__findnew_thread(machine, th->pid_, th->pid_);
+       leader = __machine__findnew_thread(machine, th->pid_, th->pid_);
        if (!leader)
                goto out_err;
 
@@ -325,7 +342,7 @@ static void machine__update_thread_pid(struct machine *machine,
                if (!map_groups__empty(th->mg))
                        pr_err("Discarding thread maps for %d:%d\n",
                               th->pid_, th->tid);
-               map_groups__delete(th->mg);
+               map_groups__put(th->mg);
        }
 
        th->mg = map_groups__get(leader->mg);
@@ -336,9 +353,9 @@ out_err:
        pr_err("Failed to join map groups for %d:%d\n", th->pid_, th->tid);
 }
 
-static struct thread *__machine__findnew_thread(struct machine *machine,
-                                               pid_t pid, pid_t tid,
-                                               bool create)
+static struct thread *____machine__findnew_thread(struct machine *machine,
+                                                 pid_t pid, pid_t tid,
+                                                 bool create)
 {
        struct rb_node **p = &machine->threads.rb_node;
        struct rb_node *parent = NULL;
@@ -356,7 +373,7 @@ static struct thread *__machine__findnew_thread(struct machine *machine,
                        return th;
                }
 
-               thread__zput(machine->last_match);
+               machine->last_match = NULL;
        }
 
        while (*p != NULL) {
@@ -364,7 +381,7 @@ static struct thread *__machine__findnew_thread(struct machine *machine,
                th = rb_entry(parent, struct thread, rb_node);
 
                if (th->tid == tid) {
-                       machine->last_match = thread__get(th);
+                       machine->last_match = th;
                        machine__update_thread_pid(machine, th, pid);
                        return th;
                }
@@ -392,7 +409,8 @@ static struct thread *__machine__findnew_thread(struct machine *machine,
                 * leader and that would screwed the rb tree.
                 */
                if (thread__init_map_groups(th, machine)) {
-                       rb_erase(&th->rb_node, &machine->threads);
+                       rb_erase_init(&th->rb_node, &machine->threads);
+                       RB_CLEAR_NODE(&th->rb_node);
                        thread__delete(th);
                        return NULL;
                }
@@ -400,22 +418,36 @@ static struct thread *__machine__findnew_thread(struct machine *machine,
                 * It is now in the rbtree, get a ref
                 */
                thread__get(th);
-               machine->last_match = thread__get(th);
+               machine->last_match = th;
        }
 
        return th;
 }
 
+struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid)
+{
+       return ____machine__findnew_thread(machine, pid, tid, true);
+}
+
 struct thread *machine__findnew_thread(struct machine *machine, pid_t pid,
                                       pid_t tid)
 {
-       return __machine__findnew_thread(machine, pid, tid, true);
+       struct thread *th;
+
+       pthread_rwlock_wrlock(&machine->threads_lock);
+       th = thread__get(__machine__findnew_thread(machine, pid, tid));
+       pthread_rwlock_unlock(&machine->threads_lock);
+       return th;
 }
 
 struct thread *machine__find_thread(struct machine *machine, pid_t pid,
                                    pid_t tid)
 {
-       return __machine__findnew_thread(machine, pid, tid, false);
+       struct thread *th;
+       pthread_rwlock_rdlock(&machine->threads_lock);
+       th =  thread__get(____machine__findnew_thread(machine, pid, tid, false));
+       pthread_rwlock_unlock(&machine->threads_lock);
+       return th;
 }
 
 struct comm *machine__thread_exec_comm(struct machine *machine,
@@ -434,6 +466,7 @@ int machine__process_comm_event(struct machine *machine, union perf_event *event
                                                        event->comm.pid,
                                                        event->comm.tid);
        bool exec = event->header.misc & PERF_RECORD_MISC_COMM_EXEC;
+       int err = 0;
 
        if (exec)
                machine->comm_exec = true;
@@ -444,10 +477,12 @@ int machine__process_comm_event(struct machine *machine, union perf_event *event
        if (thread == NULL ||
            __thread__set_comm(thread, event->comm.comm, sample->time, exec)) {
                dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n");
-               return -1;
+               err = -1;
        }
 
-       return 0;
+       thread__put(thread);
+
+       return err;
 }
 
 int machine__process_lost_event(struct machine *machine __maybe_unused,
@@ -458,17 +493,27 @@ int machine__process_lost_event(struct machine *machine __maybe_unused,
        return 0;
 }
 
-static struct dso*
-machine__module_dso(struct machine *machine, struct kmod_path *m,
-                   const char *filename)
+int machine__process_lost_samples_event(struct machine *machine __maybe_unused,
+                                       union perf_event *event, struct perf_sample *sample)
+{
+       dump_printf(": id:%" PRIu64 ": lost samples :%" PRIu64 "\n",
+                   sample->id, event->lost_samples.lost);
+       return 0;
+}
+
+static struct dso *machine__findnew_module_dso(struct machine *machine,
+                                              struct kmod_path *m,
+                                              const char *filename)
 {
        struct dso *dso;
 
-       dso = dsos__find(&machine->kernel_dsos, m->name, true);
+       pthread_rwlock_wrlock(&machine->dsos.lock);
+
+       dso = __dsos__find(&machine->dsos, m->name, true);
        if (!dso) {
-               dso = dsos__addnew(&machine->kernel_dsos, m->name);
+               dso = __dsos__addnew(&machine->dsos, m->name);
                if (dso == NULL)
-                       return NULL;
+                       goto out_unlock;
 
                if (machine__is_host(machine))
                        dso->symtab_type = DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE;
@@ -483,11 +528,30 @@ machine__module_dso(struct machine *machine, struct kmod_path *m,
                dso__set_long_name(dso, strdup(filename), true);
        }
 
+       dso__get(dso);
+out_unlock:
+       pthread_rwlock_unlock(&machine->dsos.lock);
        return dso;
 }
 
-struct map *machine__new_module(struct machine *machine, u64 start,
-                               const char *filename)
+int machine__process_aux_event(struct machine *machine __maybe_unused,
+                              union perf_event *event)
+{
+       if (dump_trace)
+               perf_event__fprintf_aux(event, stdout);
+       return 0;
+}
+
+int machine__process_itrace_start_event(struct machine *machine __maybe_unused,
+                                       union perf_event *event)
+{
+       if (dump_trace)
+               perf_event__fprintf_itrace_start(event, stdout);
+       return 0;
+}
+
+struct map *machine__findnew_module_map(struct machine *machine, u64 start,
+                                       const char *filename)
 {
        struct map *map = NULL;
        struct dso *dso;
@@ -501,7 +565,7 @@ struct map *machine__new_module(struct machine *machine, u64 start,
        if (map)
                goto out;
 
-       dso = machine__module_dso(machine, &m, filename);
+       dso = machine__findnew_module_dso(machine, &m, filename);
        if (dso == NULL)
                goto out;
 
@@ -519,13 +583,11 @@ out:
 size_t machines__fprintf_dsos(struct machines *machines, FILE *fp)
 {
        struct rb_node *nd;
-       size_t ret = __dsos__fprintf(&machines->host.kernel_dsos.head, fp) +
-                    __dsos__fprintf(&machines->host.user_dsos.head, fp);
+       size_t ret = __dsos__fprintf(&machines->host.dsos.head, fp);
 
        for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) {
                struct machine *pos = rb_entry(nd, struct machine, rb_node);
-               ret += __dsos__fprintf(&pos->kernel_dsos.head, fp);
-               ret += __dsos__fprintf(&pos->user_dsos.head, fp);
+               ret += __dsos__fprintf(&pos->dsos.head, fp);
        }
 
        return ret;
@@ -534,8 +596,7 @@ size_t machines__fprintf_dsos(struct machines *machines, FILE *fp)
 size_t machine__fprintf_dsos_buildid(struct machine *m, FILE *fp,
                                     bool (skip)(struct dso *dso, int parm), int parm)
 {
-       return __dsos__fprintf_buildid(&m->kernel_dsos.head, fp, skip, parm) +
-              __dsos__fprintf_buildid(&m->user_dsos.head, fp, skip, parm);
+       return __dsos__fprintf_buildid(&m->dsos.head, fp, skip, parm);
 }
 
 size_t machines__fprintf_dsos_buildid(struct machines *machines, FILE *fp,
@@ -575,12 +636,16 @@ size_t machine__fprintf(struct machine *machine, FILE *fp)
        size_t ret = 0;
        struct rb_node *nd;
 
+       pthread_rwlock_rdlock(&machine->threads_lock);
+
        for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) {
                struct thread *pos = rb_entry(nd, struct thread, rb_node);
 
                ret += thread__fprintf(pos, fp);
        }
 
+       pthread_rwlock_unlock(&machine->threads_lock);
+
        return ret;
 }
 
@@ -594,9 +659,8 @@ static struct dso *machine__get_kernel(struct machine *machine)
                if (!vmlinux_name)
                        vmlinux_name = "[kernel.kallsyms]";
 
-               kernel = dso__kernel_findnew(machine, vmlinux_name,
-                                            "[kernel]",
-                                            DSO_TYPE_KERNEL);
+               kernel = machine__findnew_kernel(machine, vmlinux_name,
+                                                "[kernel]", DSO_TYPE_KERNEL);
        } else {
                char bf[PATH_MAX];
 
@@ -606,9 +670,9 @@ static struct dso *machine__get_kernel(struct machine *machine)
                        vmlinux_name = machine__mmap_name(machine, bf,
                                                          sizeof(bf));
 
-               kernel = dso__kernel_findnew(machine, vmlinux_name,
-                                            "[guest.kernel]",
-                                            DSO_TYPE_GUEST_KERNEL);
+               kernel = machine__findnew_kernel(machine, vmlinux_name,
+                                                "[guest.kernel]",
+                                                DSO_TYPE_GUEST_KERNEL);
        }
 
        if (kernel != NULL && (!kernel->has_build_id))
@@ -713,7 +777,6 @@ void machine__destroy_kernel_maps(struct machine *machine)
                                kmap->ref_reloc_sym = NULL;
                }
 
-               map__delete(machine->vmlinux_maps[type]);
                machine->vmlinux_maps[type] = NULL;
        }
 }
@@ -970,7 +1033,7 @@ static int machine__create_module(void *arg, const char *name, u64 start)
        struct machine *machine = arg;
        struct map *map;
 
-       map = machine__new_module(machine, start, name);
+       map = machine__findnew_module_map(machine, start, name);
        if (map == NULL)
                return -1;
 
@@ -1062,7 +1125,7 @@ static bool machine__uses_kcore(struct machine *machine)
 {
        struct dso *dso;
 
-       list_for_each_entry(dso, &machine->kernel_dsos.head, node) {
+       list_for_each_entry(dso, &machine->dsos.head, node) {
                if (dso__is_kcore(dso))
                        return true;
        }
@@ -1093,8 +1156,8 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
                                strlen(kmmap_prefix) - 1) == 0;
        if (event->mmap.filename[0] == '/' ||
            (!is_kernel_mmap && event->mmap.filename[0] == '[')) {
-               map = machine__new_module(machine, event->mmap.start,
-                                         event->mmap.filename);
+               map = machine__findnew_module_map(machine, event->mmap.start,
+                                                 event->mmap.filename);
                if (map == NULL)
                        goto out_problem;
 
@@ -1109,23 +1172,48 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
                struct dso *kernel = NULL;
                struct dso *dso;
 
-               list_for_each_entry(dso, &machine->kernel_dsos.head, node) {
-                       if (is_kernel_module(dso->long_name))
+               pthread_rwlock_rdlock(&machine->dsos.lock);
+
+               list_for_each_entry(dso, &machine->dsos.head, node) {
+
+                       /*
+                        * The cpumode passed to is_kernel_module is not the
+                        * cpumode of *this* event. If we insist on passing
+                        * correct cpumode to is_kernel_module, we should
+                        * record the cpumode when we adding this dso to the
+                        * linked list.
+                        *
+                        * However we don't really need passing correct
+                        * cpumode.  We know the correct cpumode must be kernel
+                        * mode (if not, we should not link it onto kernel_dsos
+                        * list).
+                        *
+                        * Therefore, we pass PERF_RECORD_MISC_CPUMODE_UNKNOWN.
+                        * is_kernel_module() treats it as a kernel cpumode.
+                        */
+
+                       if (!dso->kernel ||
+                           is_kernel_module(dso->long_name,
+                                            PERF_RECORD_MISC_CPUMODE_UNKNOWN))
                                continue;
 
+
                        kernel = dso;
                        break;
                }
 
+               pthread_rwlock_unlock(&machine->dsos.lock);
+
                if (kernel == NULL)
-                       kernel = __dsos__findnew(&machine->kernel_dsos,
-                                                kmmap_prefix);
+                       kernel = machine__findnew_dso(machine, kmmap_prefix);
                if (kernel == NULL)
                        goto out_problem;
 
                kernel->kernel = kernel_type;
-               if (__machine__create_kernel_maps(machine, kernel) < 0)
+               if (__machine__create_kernel_maps(machine, kernel) < 0) {
+                       dso__put(kernel);
                        goto out_problem;
+               }
 
                if (strstr(kernel->long_name, "vmlinux"))
                        dso__set_short_name(kernel, "[kernel.vmlinux]", false);
@@ -1197,11 +1285,15 @@ int machine__process_mmap2_event(struct machine *machine,
                        event->mmap2.filename, type, thread);
 
        if (map == NULL)
-               goto out_problem;
+               goto out_problem_map;
 
        thread__insert_map(thread, map);
+       thread__put(thread);
+       map__put(map);
        return 0;
 
+out_problem_map:
+       thread__put(thread);
 out_problem:
        dump_printf("problem processing PERF_RECORD_MMAP2, skipping event.\n");
        return 0;
@@ -1244,31 +1336,46 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event
                        type, thread);
 
        if (map == NULL)
-               goto out_problem;
+               goto out_problem_map;
 
        thread__insert_map(thread, map);
+       thread__put(thread);
+       map__put(map);
        return 0;
 
+out_problem_map:
+       thread__put(thread);
 out_problem:
        dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n");
        return 0;
 }
 
-void machine__remove_thread(struct machine *machine, struct thread *th)
+static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock)
 {
        if (machine->last_match == th)
-               thread__zput(machine->last_match);
+               machine->last_match = NULL;
 
-       rb_erase(&th->rb_node, &machine->threads);
+       BUG_ON(atomic_read(&th->refcnt) == 0);
+       if (lock)
+               pthread_rwlock_wrlock(&machine->threads_lock);
+       rb_erase_init(&th->rb_node, &machine->threads);
+       RB_CLEAR_NODE(&th->rb_node);
        /*
         * Move it first to the dead_threads list, then drop the reference,
         * if this is the last reference, then the thread__delete destructor
         * will be called and we will remove it from the dead_threads list.
         */
        list_add_tail(&th->node, &machine->dead_threads);
+       if (lock)
+               pthread_rwlock_unlock(&machine->threads_lock);
        thread__put(th);
 }
 
+void machine__remove_thread(struct machine *machine, struct thread *th)
+{
+       return __machine__remove_thread(machine, th, true);
+}
+
 int machine__process_fork_event(struct machine *machine, union perf_event *event,
                                struct perf_sample *sample)
 {
@@ -1278,10 +1385,13 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event
        struct thread *parent = machine__findnew_thread(machine,
                                                        event->fork.ppid,
                                                        event->fork.ptid);
+       int err = 0;
 
        /* if a thread currently exists for the thread id remove it */
-       if (thread != NULL)
+       if (thread != NULL) {
                machine__remove_thread(machine, thread);
+               thread__put(thread);
+       }
 
        thread = machine__findnew_thread(machine, event->fork.pid,
                                         event->fork.tid);
@@ -1291,10 +1401,12 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event
        if (thread == NULL || parent == NULL ||
            thread__fork(thread, parent, sample->time) < 0) {
                dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n");
-               return -1;
+               err = -1;
        }
+       thread__put(thread);
+       thread__put(parent);
 
-       return 0;
+       return err;
 }
 
 int machine__process_exit_event(struct machine *machine, union perf_event *event,
@@ -1307,8 +1419,10 @@ int machine__process_exit_event(struct machine *machine, union perf_event *event
        if (dump_trace)
                perf_event__fprintf_task(event, stdout);
 
-       if (thread != NULL)
+       if (thread != NULL) {
                thread__exited(thread);
+               thread__put(thread);
+       }
 
        return 0;
 }
@@ -1331,6 +1445,13 @@ int machine__process_event(struct machine *machine, union perf_event *event,
                ret = machine__process_exit_event(machine, event, sample); break;
        case PERF_RECORD_LOST:
                ret = machine__process_lost_event(machine, event, sample); break;
+       case PERF_RECORD_AUX:
+               ret = machine__process_aux_event(machine, event); break;
+       case PERF_RECORD_ITRACE_START:
+               ret = machine__process_itrace_start_event(machine, event);
+       case PERF_RECORD_LOST_SAMPLES:
+               ret = machine__process_lost_samples_event(machine, event, sample); break;
+               break;
        default:
                ret = -1;
                break;
@@ -1769,14 +1890,36 @@ int machine__for_each_thread(struct machine *machine,
        return rc;
 }
 
+int machines__for_each_thread(struct machines *machines,
+                             int (*fn)(struct thread *thread, void *p),
+                             void *priv)
+{
+       struct rb_node *nd;
+       int rc = 0;
+
+       rc = machine__for_each_thread(&machines->host, fn, priv);
+       if (rc != 0)
+               return rc;
+
+       for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) {
+               struct machine *machine = rb_entry(nd, struct machine, rb_node);
+
+               rc = machine__for_each_thread(machine, fn, priv);
+               if (rc != 0)
+                       return rc;
+       }
+       return rc;
+}
+
 int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
                                  struct target *target, struct thread_map *threads,
-                                 perf_event__handler_t process, bool data_mmap)
+                                 perf_event__handler_t process, bool data_mmap,
+                                 unsigned int proc_map_timeout)
 {
        if (target__has_task(target))
-               return perf_event__synthesize_thread_map(tool, threads, process, machine, data_mmap);
+               return perf_event__synthesize_thread_map(tool, threads, process, machine, data_mmap, proc_map_timeout);
        else if (target__has_cpu(target))
-               return perf_event__synthesize_threads(tool, process, machine, data_mmap);
+               return perf_event__synthesize_threads(tool, process, machine, data_mmap, proc_map_timeout);
        /* command specified */
        return 0;
 }
@@ -1820,6 +1963,7 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid,
                return -ENOMEM;
 
        thread->cpu = cpu;
+       thread__put(thread);
 
        return 0;
 }
@@ -1845,3 +1989,8 @@ int machine__get_kernel_start(struct machine *machine)
        }
        return err;
 }
+
+struct dso *machine__findnew_dso(struct machine *machine, const char *filename)
+{
+       return dsos__findnew(&machine->dsos, filename);
+}
index 6d64ced..887798e 100644 (file)
@@ -30,11 +30,11 @@ struct machine {
        bool              comm_exec;
        char              *root_dir;
        struct rb_root    threads;
+       pthread_rwlock_t  threads_lock;
        struct list_head  dead_threads;
        struct thread     *last_match;
        struct vdso_info  *vdso_info;
-       struct dsos       user_dsos;
-       struct dsos       kernel_dsos;
+       struct dsos       dsos;
        struct map_groups kmaps;
        struct map        *vmlinux_maps[MAP__NR_TYPES];
        u64               kernel_start;
@@ -81,6 +81,12 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event
                                struct perf_sample *sample);
 int machine__process_lost_event(struct machine *machine, union perf_event *event,
                                struct perf_sample *sample);
+int machine__process_lost_samples_event(struct machine *machine, union perf_event *event,
+                                       struct perf_sample *sample);
+int machine__process_aux_event(struct machine *machine,
+                              union perf_event *event);
+int machine__process_itrace_start_event(struct machine *machine,
+                                       union perf_event *event);
 int machine__process_mmap_event(struct machine *machine, union perf_event *event,
                                struct perf_sample *sample);
 int machine__process_mmap2_event(struct machine *machine, union perf_event *event,
@@ -147,8 +153,10 @@ static inline bool machine__is_host(struct machine *machine)
        return machine ? machine->pid == HOST_KERNEL_ID : false;
 }
 
-struct thread *machine__findnew_thread(struct machine *machine, pid_t pid,
-                                      pid_t tid);
+struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid);
+struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid);
+
+struct dso *machine__findnew_dso(struct machine *machine, const char *filename);
 
 size_t machine__fprintf(struct machine *machine, FILE *fp);
 
@@ -181,8 +189,8 @@ struct symbol *machine__find_kernel_function_by_name(struct machine *machine,
                                                 filter);
 }
 
-struct map *machine__new_module(struct machine *machine, u64 start,
-                               const char *filename);
+struct map *machine__findnew_module_map(struct machine *machine, u64 start,
+                                       const char *filename);
 
 int machine__load_kallsyms(struct machine *machine, const char *filename,
                           enum map_type type, symbol_filter_t filter);
@@ -208,16 +216,22 @@ size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp);
 int machine__for_each_thread(struct machine *machine,
                             int (*fn)(struct thread *thread, void *p),
                             void *priv);
+int machines__for_each_thread(struct machines *machines,
+                             int (*fn)(struct thread *thread, void *p),
+                             void *priv);
 
 int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
                                  struct target *target, struct thread_map *threads,
-                                 perf_event__handler_t process, bool data_mmap);
+                                 perf_event__handler_t process, bool data_mmap,
+                                 unsigned int proc_map_timeout);
 static inline
 int machine__synthesize_threads(struct machine *machine, struct target *target,
-                               struct thread_map *threads, bool data_mmap)
+                               struct thread_map *threads, bool data_mmap,
+                               unsigned int proc_map_timeout)
 {
        return __machine__synthesize_threads(machine, NULL, target, threads,
-                                            perf_event__process, data_mmap);
+                                            perf_event__process, data_mmap,
+                                            proc_map_timeout);
 }
 
 pid_t machine__get_current_tid(struct machine *machine, int cpu);
index a14f08f..b5a5e9c 100644 (file)
@@ -16,6 +16,8 @@
 #include "machine.h"
 #include <linux/string.h>
 
+static void __maps__insert(struct maps *maps, struct map *map);
+
 const char *map_type__name[MAP__NR_TYPES] = {
        [MAP__FUNCTION] = "Functions",
        [MAP__VARIABLE] = "Variables",
@@ -130,13 +132,13 @@ void map__init(struct map *map, enum map_type type,
        map->end      = end;
        map->pgoff    = pgoff;
        map->reloc    = 0;
-       map->dso      = dso;
+       map->dso      = dso__get(dso);
        map->map_ip   = map__map_ip;
        map->unmap_ip = map__unmap_ip;
        RB_CLEAR_NODE(&map->rb_node);
        map->groups   = NULL;
-       map->referenced = false;
        map->erange_warned = false;
+       atomic_set(&map->refcnt, 1);
 }
 
 struct map *map__new(struct machine *machine, u64 start, u64 len,
@@ -175,9 +177,9 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
 
                if (vdso) {
                        pgoff = 0;
-                       dso = vdso__dso_findnew(machine, thread);
+                       dso = machine__findnew_vdso(machine, thread);
                } else
-                       dso = __dsos__findnew(&machine->user_dsos, filename);
+                       dso = machine__findnew_dso(machine, filename);
 
                if (dso == NULL)
                        goto out_delete;
@@ -195,6 +197,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
                        if (type != MAP__FUNCTION)
                                dso__set_loaded(dso, map->type);
                }
+               dso__put(dso);
        }
        return map;
 out_delete:
@@ -221,11 +224,24 @@ struct map *map__new2(u64 start, struct dso *dso, enum map_type type)
        return map;
 }
 
+static void map__exit(struct map *map)
+{
+       BUG_ON(!RB_EMPTY_NODE(&map->rb_node));
+       dso__zput(map->dso);
+}
+
 void map__delete(struct map *map)
 {
+       map__exit(map);
        free(map);
 }
 
+void map__put(struct map *map)
+{
+       if (map && atomic_dec_and_test(&map->refcnt))
+               map__delete(map);
+}
+
 void map__fixup_start(struct map *map)
 {
        struct rb_root *symbols = &map->dso->symbols[map->type];
@@ -292,6 +308,11 @@ int map__load(struct map *map, symbol_filter_t filter)
        return 0;
 }
 
+int __weak arch__compare_symbol_names(const char *namea, const char *nameb)
+{
+       return strcmp(namea, nameb);
+}
+
 struct symbol *map__find_symbol(struct map *map, u64 addr,
                                symbol_filter_t filter)
 {
@@ -413,48 +434,49 @@ u64 map__objdump_2mem(struct map *map, u64 ip)
        return ip + map->reloc;
 }
 
+static void maps__init(struct maps *maps)
+{
+       maps->entries = RB_ROOT;
+       pthread_rwlock_init(&maps->lock, NULL);
+}
+
 void map_groups__init(struct map_groups *mg, struct machine *machine)
 {
        int i;
        for (i = 0; i < MAP__NR_TYPES; ++i) {
-               mg->maps[i] = RB_ROOT;
-               INIT_LIST_HEAD(&mg->removed_maps[i]);
+               maps__init(&mg->maps[i]);
        }
        mg->machine = machine;
-       mg->refcnt = 1;
+       atomic_set(&mg->refcnt, 1);
 }
 
-static void maps__delete(struct rb_root *maps)
+static void __maps__purge(struct maps *maps)
 {
-       struct rb_node *next = rb_first(maps);
+       struct rb_root *root = &maps->entries;
+       struct rb_node *next = rb_first(root);
 
        while (next) {
                struct map *pos = rb_entry(next, struct map, rb_node);
 
                next = rb_next(&pos->rb_node);
-               rb_erase(&pos->rb_node, maps);
-               map__delete(pos);
+               rb_erase_init(&pos->rb_node, root);
+               map__put(pos);
        }
 }
 
-static void maps__delete_removed(struct list_head *maps)
+static void maps__exit(struct maps *maps)
 {
-       struct map *pos, *n;
-
-       list_for_each_entry_safe(pos, n, maps, node) {
-               list_del(&pos->node);
-               map__delete(pos);
-       }
+       pthread_rwlock_wrlock(&maps->lock);
+       __maps__purge(maps);
+       pthread_rwlock_unlock(&maps->lock);
 }
 
 void map_groups__exit(struct map_groups *mg)
 {
        int i;
 
-       for (i = 0; i < MAP__NR_TYPES; ++i) {
-               maps__delete(&mg->maps[i]);
-               maps__delete_removed(&mg->removed_maps[i]);
-       }
+       for (i = 0; i < MAP__NR_TYPES; ++i)
+               maps__exit(&mg->maps[i]);
 }
 
 bool map_groups__empty(struct map_groups *mg)
@@ -464,8 +486,6 @@ bool map_groups__empty(struct map_groups *mg)
        for (i = 0; i < MAP__NR_TYPES; ++i) {
                if (maps__first(&mg->maps[i]))
                        return false;
-               if (!list_empty(&mg->removed_maps[i]))
-                       return false;
        }
 
        return true;
@@ -489,32 +509,10 @@ void map_groups__delete(struct map_groups *mg)
 
 void map_groups__put(struct map_groups *mg)
 {
-       if (--mg->refcnt == 0)
+       if (mg && atomic_dec_and_test(&mg->refcnt))
                map_groups__delete(mg);
 }
 
-void map_groups__flush(struct map_groups *mg)
-{
-       int type;
-
-       for (type = 0; type < MAP__NR_TYPES; type++) {
-               struct rb_root *root = &mg->maps[type];
-               struct rb_node *next = rb_first(root);
-
-               while (next) {
-                       struct map *pos = rb_entry(next, struct map, rb_node);
-                       next = rb_next(&pos->rb_node);
-                       rb_erase(&pos->rb_node, root);
-                       /*
-                        * We may have references to this map, for
-                        * instance in some hist_entry instances, so
-                        * just move them to a separate list.
-                        */
-                       list_add_tail(&pos->node, &mg->removed_maps[pos->type]);
-               }
-       }
-}
-
 struct symbol *map_groups__find_symbol(struct map_groups *mg,
                                       enum map_type type, u64 addr,
                                       struct map **mapp,
@@ -538,20 +536,28 @@ struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg,
                                               struct map **mapp,
                                               symbol_filter_t filter)
 {
+       struct maps *maps = &mg->maps[type];
+       struct symbol *sym;
        struct rb_node *nd;
 
-       for (nd = rb_first(&mg->maps[type]); nd; nd = rb_next(nd)) {
+       pthread_rwlock_rdlock(&maps->lock);
+
+       for (nd = rb_first(&maps->entries); nd; nd = rb_next(nd)) {
                struct map *pos = rb_entry(nd, struct map, rb_node);
-               struct symbol *sym = map__find_symbol_by_name(pos, name, filter);
+
+               sym = map__find_symbol_by_name(pos, name, filter);
 
                if (sym == NULL)
                        continue;
                if (mapp != NULL)
                        *mapp = pos;
-               return sym;
+               goto out;
        }
 
-       return NULL;
+       sym = NULL;
+out:
+       pthread_rwlock_unlock(&maps->lock);
+       return sym;
 }
 
 int map_groups__find_ams(struct addr_map_symbol *ams, symbol_filter_t filter)
@@ -571,73 +577,54 @@ int map_groups__find_ams(struct addr_map_symbol *ams, symbol_filter_t filter)
        return ams->sym ? 0 : -1;
 }
 
-size_t __map_groups__fprintf_maps(struct map_groups *mg, enum map_type type,
-                                 FILE *fp)
+static size_t maps__fprintf(struct maps *maps, FILE *fp)
 {
-       size_t printed = fprintf(fp, "%s:\n", map_type__name[type]);
+       size_t printed = 0;
        struct rb_node *nd;
 
-       for (nd = rb_first(&mg->maps[type]); nd; nd = rb_next(nd)) {
+       pthread_rwlock_rdlock(&maps->lock);
+
+       for (nd = rb_first(&maps->entries); nd; nd = rb_next(nd)) {
                struct map *pos = rb_entry(nd, struct map, rb_node);
                printed += fprintf(fp, "Map:");
                printed += map__fprintf(pos, fp);
                if (verbose > 2) {
-                       printed += dso__fprintf(pos->dso, type, fp);
+                       printed += dso__fprintf(pos->dso, pos->type, fp);
                        printed += fprintf(fp, "--\n");
                }
        }
 
-       return printed;
-}
+       pthread_rwlock_unlock(&maps->lock);
 
-static size_t map_groups__fprintf_maps(struct map_groups *mg, FILE *fp)
-{
-       size_t printed = 0, i;
-       for (i = 0; i < MAP__NR_TYPES; ++i)
-               printed += __map_groups__fprintf_maps(mg, i, fp);
        return printed;
 }
 
-static size_t __map_groups__fprintf_removed_maps(struct map_groups *mg,
-                                                enum map_type type, FILE *fp)
+size_t __map_groups__fprintf_maps(struct map_groups *mg, enum map_type type,
+                                 FILE *fp)
 {
-       struct map *pos;
-       size_t printed = 0;
-
-       list_for_each_entry(pos, &mg->removed_maps[type], node) {
-               printed += fprintf(fp, "Map:");
-               printed += map__fprintf(pos, fp);
-               if (verbose > 1) {
-                       printed += dso__fprintf(pos->dso, type, fp);
-                       printed += fprintf(fp, "--\n");
-               }
-       }
-       return printed;
+       size_t printed = fprintf(fp, "%s:\n", map_type__name[type]);
+       return printed += maps__fprintf(&mg->maps[type], fp);
 }
 
-static size_t map_groups__fprintf_removed_maps(struct map_groups *mg,
-                                              FILE *fp)
+size_t map_groups__fprintf(struct map_groups *mg, FILE *fp)
 {
        size_t printed = 0, i;
        for (i = 0; i < MAP__NR_TYPES; ++i)
-               printed += __map_groups__fprintf_removed_maps(mg, i, fp);
+               printed += __map_groups__fprintf_maps(mg, i, fp);
        return printed;
 }
 
-size_t map_groups__fprintf(struct map_groups *mg, FILE *fp)
+static int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp)
 {
-       size_t printed = map_groups__fprintf_maps(mg, fp);
-       printed += fprintf(fp, "Removed maps:\n");
-       return printed + map_groups__fprintf_removed_maps(mg, fp);
-}
-
-int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map,
-                                  FILE *fp)
-{
-       struct rb_root *root = &mg->maps[map->type];
-       struct rb_node *next = rb_first(root);
+       struct rb_root *root;
+       struct rb_node *next;
        int err = 0;
 
+       pthread_rwlock_wrlock(&maps->lock);
+
+       root = &maps->entries;
+       next = rb_first(root);
+
        while (next) {
                struct map *pos = rb_entry(next, struct map, rb_node);
                next = rb_next(&pos->rb_node);
@@ -651,7 +638,7 @@ int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map,
                        map__fprintf(pos, fp);
                }
 
-               rb_erase(&pos->rb_node, root);
+               rb_erase_init(&pos->rb_node, root);
                /*
                 * Now check if we need to create new maps for areas not
                 * overlapped by the new map:
@@ -661,11 +648,11 @@ int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map,
 
                        if (before == NULL) {
                                err = -ENOMEM;
-                               goto move_map;
+                               goto put_map;
                        }
 
                        before->end = map->start;
-                       map_groups__insert(mg, before);
+                       __maps__insert(maps, before);
                        if (verbose >= 2)
                                map__fprintf(before, fp);
                }
@@ -675,28 +662,31 @@ int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map,
 
                        if (after == NULL) {
                                err = -ENOMEM;
-                               goto move_map;
+                               goto put_map;
                        }
 
                        after->start = map->end;
-                       map_groups__insert(mg, after);
+                       __maps__insert(maps, after);
                        if (verbose >= 2)
                                map__fprintf(after, fp);
                }
-move_map:
-               /*
-                * If we have references, just move them to a separate list.
-                */
-               if (pos->referenced)
-                       list_add_tail(&pos->node, &mg->removed_maps[map->type]);
-               else
-                       map__delete(pos);
+put_map:
+               map__put(pos);
 
                if (err)
-                       return err;
+                       goto out;
        }
 
-       return 0;
+       err = 0;
+out:
+       pthread_rwlock_unlock(&maps->lock);
+       return err;
+}
+
+int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map,
+                                  FILE *fp)
+{
+       return maps__fixup_overlappings(&mg->maps[map->type], map, fp);
 }
 
 /*
@@ -705,20 +695,28 @@ move_map:
 int map_groups__clone(struct map_groups *mg,
                      struct map_groups *parent, enum map_type type)
 {
-       struct rb_node *nd;
-       for (nd = rb_first(&parent->maps[type]); nd; nd = rb_next(nd)) {
-               struct map *map = rb_entry(nd, struct map, rb_node);
+       int err = -ENOMEM;
+       struct map *map;
+       struct maps *maps = &parent->maps[type];
+
+       pthread_rwlock_rdlock(&maps->lock);
+
+       for (map = maps__first(maps); map; map = map__next(map)) {
                struct map *new = map__clone(map);
                if (new == NULL)
-                       return -ENOMEM;
+                       goto out_unlock;
                map_groups__insert(mg, new);
        }
-       return 0;
+
+       err = 0;
+out_unlock:
+       pthread_rwlock_unlock(&maps->lock);
+       return err;
 }
 
-void maps__insert(struct rb_root *maps, struct map *map)
+static void __maps__insert(struct maps *maps, struct map *map)
 {
-       struct rb_node **p = &maps->rb_node;
+       struct rb_node **p = &maps->entries.rb_node;
        struct rb_node *parent = NULL;
        const u64 ip = map->start;
        struct map *m;
@@ -733,20 +731,38 @@ void maps__insert(struct rb_root *maps, struct map *map)
        }
 
        rb_link_node(&map->rb_node, parent, p);
-       rb_insert_color(&map->rb_node, maps);
+       rb_insert_color(&map->rb_node, &maps->entries);
+       map__get(map);
 }
 
-void maps__remove(struct rb_root *maps, struct map *map)
+void maps__insert(struct maps *maps, struct map *map)
 {
-       rb_erase(&map->rb_node, maps);
+       pthread_rwlock_wrlock(&maps->lock);
+       __maps__insert(maps, map);
+       pthread_rwlock_unlock(&maps->lock);
 }
 
-struct map *maps__find(struct rb_root *maps, u64 ip)
+static void __maps__remove(struct maps *maps, struct map *map)
 {
-       struct rb_node **p = &maps->rb_node;
-       struct rb_node *parent = NULL;
+       rb_erase_init(&map->rb_node, &maps->entries);
+       map__put(map);
+}
+
+void maps__remove(struct maps *maps, struct map *map)
+{
+       pthread_rwlock_wrlock(&maps->lock);
+       __maps__remove(maps, map);
+       pthread_rwlock_unlock(&maps->lock);
+}
+
+struct map *maps__find(struct maps *maps, u64 ip)
+{
+       struct rb_node **p, *parent = NULL;
        struct map *m;
 
+       pthread_rwlock_rdlock(&maps->lock);
+
+       p = &maps->entries.rb_node;
        while (*p != NULL) {
                parent = *p;
                m = rb_entry(parent, struct map, rb_node);
@@ -755,22 +771,25 @@ struct map *maps__find(struct rb_root *maps, u64 ip)
                else if (ip >= m->end)
                        p = &(*p)->rb_right;
                else
-                       return m;
+                       goto out;
        }
 
-       return NULL;
+       m = NULL;
+out:
+       pthread_rwlock_unlock(&maps->lock);
+       return m;
 }
 
-struct map *maps__first(struct rb_root *maps)
+struct map *maps__first(struct maps *maps)
 {
-       struct rb_node *first = rb_first(maps);
+       struct rb_node *first = rb_first(&maps->entries);
 
        if (first)
                return rb_entry(first, struct map, rb_node);
        return NULL;
 }
 
-struct map *maps__next(struct map *map)
+struct map *map__next(struct map *map)
 {
        struct rb_node *next = rb_next(&map->rb_node);
 
index ec19c59..d73e687 100644 (file)
@@ -1,9 +1,11 @@
 #ifndef __PERF_MAP_H
 #define __PERF_MAP_H
 
+#include <linux/atomic.h>
 #include <linux/compiler.h>
 #include <linux/list.h>
 #include <linux/rbtree.h>
+#include <pthread.h>
 #include <stdio.h>
 #include <stdbool.h>
 #include <linux/types.h>
@@ -32,7 +34,6 @@ struct map {
        u64                     start;
        u64                     end;
        u8 /* enum map_type */  type;
-       bool                    referenced;
        bool                    erange_warned;
        u32                     priv;
        u32                     prot;
@@ -50,6 +51,7 @@ struct map {
 
        struct dso              *dso;
        struct map_groups       *groups;
+       atomic_t                refcnt;
 };
 
 struct kmap {
@@ -57,11 +59,15 @@ struct kmap {
        struct map_groups       *kmaps;
 };
 
+struct maps {
+       struct rb_root   entries;
+       pthread_rwlock_t lock;
+};
+
 struct map_groups {
-       struct rb_root   maps[MAP__NR_TYPES];
-       struct list_head removed_maps[MAP__NR_TYPES];
+       struct maps      maps[MAP__NR_TYPES];
        struct machine   *machine;
-       int              refcnt;
+       atomic_t         refcnt;
 };
 
 struct map_groups *map_groups__new(struct machine *machine);
@@ -70,7 +76,8 @@ bool map_groups__empty(struct map_groups *mg);
 
 static inline struct map_groups *map_groups__get(struct map_groups *mg)
 {
-       ++mg->refcnt;
+       if (mg)
+               atomic_inc(&mg->refcnt);
        return mg;
 }
 
@@ -124,7 +131,7 @@ struct thread;
  */
 #define __map__for_each_symbol_by_name(map, sym_name, pos, filter)     \
        for (pos = map__find_symbol_by_name(map, sym_name, filter);     \
-            pos && strcmp(pos->name, sym_name) == 0;           \
+            pos && arch__compare_symbol_names(pos->name, sym_name) == 0;       \
             pos = symbol__next_by_name(pos))
 
 #define map__for_each_symbol_by_name(map, sym_name, pos)               \
@@ -132,6 +139,7 @@ struct thread;
 
 typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym);
 
+int arch__compare_symbol_names(const char *namea, const char *nameb);
 void map__init(struct map *map, enum map_type type,
               u64 start, u64 end, u64 pgoff, struct dso *dso);
 struct map *map__new(struct machine *machine, u64 start, u64 len,
@@ -141,6 +149,24 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
 struct map *map__new2(u64 start, struct dso *dso, enum map_type type);
 void map__delete(struct map *map);
 struct map *map__clone(struct map *map);
+
+static inline struct map *map__get(struct map *map)
+{
+       if (map)
+               atomic_inc(&map->refcnt);
+       return map;
+}
+
+void map__put(struct map *map);
+
+static inline void __map__zput(struct map **map)
+{
+       map__put(*map);
+       *map = NULL;
+}
+
+#define map__zput(map) __map__zput(&map)
+
 int map__overlap(struct map *l, struct map *r);
 size_t map__fprintf(struct map *map, FILE *fp);
 size_t map__fprintf_dsoname(struct map *map, FILE *fp);
@@ -159,11 +185,11 @@ void map__reloc_vmlinux(struct map *map);
 
 size_t __map_groups__fprintf_maps(struct map_groups *mg, enum map_type type,
                                  FILE *fp);
-void maps__insert(struct rb_root *maps, struct map *map);
-void maps__remove(struct rb_root *maps, struct map *map);
-struct map *maps__find(struct rb_root *maps, u64 addr);
-struct map *maps__first(struct rb_root *maps);
-struct map *maps__next(struct map *map);
+void maps__insert(struct maps *maps, struct map *map);
+void maps__remove(struct maps *maps, struct map *map);
+struct map *maps__find(struct maps *maps, u64 addr);
+struct map *maps__first(struct maps *maps);
+struct map *map__next(struct map *map);
 void map_groups__init(struct map_groups *mg, struct machine *machine);
 void map_groups__exit(struct map_groups *mg);
 int map_groups__clone(struct map_groups *mg,
@@ -198,7 +224,7 @@ static inline struct map *map_groups__first(struct map_groups *mg,
 
 static inline struct map *map_groups__next(struct map *map)
 {
-       return maps__next(map);
+       return map__next(map);
 }
 
 struct symbol *map_groups__find_symbol(struct map_groups *mg,
@@ -230,6 +256,4 @@ int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map,
 struct map *map_groups__find_by_name(struct map_groups *mg,
                                     enum map_type type, const char *name);
 
-void map_groups__flush(struct map_groups *mg);
-
 #endif /* __PERF_MAP_H */
index 31ee02d..53ef006 100644 (file)
@@ -50,11 +50,6 @@ void setup_pager(void)
 
        if (!isatty(1))
                return;
-       if (!pager) {
-               if (!pager_program)
-                       perf_config(perf_default_config, NULL);
-               pager = pager_program;
-       }
        if (!pager)
                pager = getenv("PAGER");
        if (!(pager || access("/usr/bin/pager", X_OK)))
diff --git a/tools/perf/util/parse-branch-options.c b/tools/perf/util/parse-branch-options.c
new file mode 100644 (file)
index 0000000..a3b1e13
--- /dev/null
@@ -0,0 +1,94 @@
+#include "perf.h"
+#include "util/util.h"
+#include "util/debug.h"
+#include "util/parse-options.h"
+#include "util/parse-branch-options.h"
+
+#define BRANCH_OPT(n, m) \
+       { .name = n, .mode = (m) }
+
+#define BRANCH_END { .name = NULL }
+
+struct branch_mode {
+       const char *name;
+       int mode;
+};
+
+static const struct branch_mode branch_modes[] = {
+       BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
+       BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
+       BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
+       BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
+       BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
+       BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
+       BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
+       BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX),
+       BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX),
+       BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX),
+       BRANCH_OPT("cond", PERF_SAMPLE_BRANCH_COND),
+       BRANCH_OPT("ind_jmp", PERF_SAMPLE_BRANCH_IND_JUMP),
+       BRANCH_END
+};
+
+int
+parse_branch_stack(const struct option *opt, const char *str, int unset)
+{
+#define ONLY_PLM \
+       (PERF_SAMPLE_BRANCH_USER        |\
+        PERF_SAMPLE_BRANCH_KERNEL      |\
+        PERF_SAMPLE_BRANCH_HV)
+
+       uint64_t *mode = (uint64_t *)opt->value;
+       const struct branch_mode *br;
+       char *s, *os = NULL, *p;
+       int ret = -1;
+
+       if (unset)
+               return 0;
+
+       /*
+        * cannot set it twice, -b + --branch-filter for instance
+        */
+       if (*mode)
+               return -1;
+
+       /* str may be NULL in case no arg is passed to -b */
+       if (str) {
+               /* because str is read-only */
+               s = os = strdup(str);
+               if (!s)
+                       return -1;
+
+               for (;;) {
+                       p = strchr(s, ',');
+                       if (p)
+                               *p = '\0';
+
+                       for (br = branch_modes; br->name; br++) {
+                               if (!strcasecmp(s, br->name))
+                                       break;
+                       }
+                       if (!br->name) {
+                               ui__warning("unknown branch filter %s,"
+                                           " check man page\n", s);
+                               goto error;
+                       }
+
+                       *mode |= br->mode;
+
+                       if (!p)
+                               break;
+
+                       s = p + 1;
+               }
+       }
+       ret = 0;
+
+       /* default to any branch */
+       if ((*mode & ~ONLY_PLM) == 0) {
+               *mode = PERF_SAMPLE_BRANCH_ANY;
+       }
+error:
+       free(os);
+       return ret;
+}
diff --git a/tools/perf/util/parse-branch-options.h b/tools/perf/util/parse-branch-options.h
new file mode 100644 (file)
index 0000000..b9d9470
--- /dev/null
@@ -0,0 +1,5 @@
+#ifndef _PERF_PARSE_BRANCH_OPTIONS_H
+#define _PERF_PARSE_BRANCH_OPTIONS_H 1
+struct option;
+int parse_branch_stack(const struct option *opt, const char *str, int unset);
+#endif /* _PERF_PARSE_BRANCH_OPTIONS_H */
index be06553..2a4d1ec 100644 (file)
@@ -17,6 +17,7 @@
 #include "parse-events-flex.h"
 #include "pmu.h"
 #include "thread_map.h"
+#include "asm/bug.h"
 
 #define MAX_NAME_LEN 100
 
@@ -538,16 +539,40 @@ int parse_events_add_breakpoint(struct list_head *list, int *idx,
        return add_event(list, idx, &attr, NULL);
 }
 
+static int check_type_val(struct parse_events_term *term,
+                         struct parse_events_error *err,
+                         int type)
+{
+       if (type == term->type_val)
+               return 0;
+
+       if (err) {
+               err->idx = term->err_val;
+               if (type == PARSE_EVENTS__TERM_TYPE_NUM)
+                       err->str = strdup("expected numeric value");
+               else
+                       err->str = strdup("expected string value");
+       }
+       return -EINVAL;
+}
+
 static int config_term(struct perf_event_attr *attr,
-                      struct parse_events_term *term)
+                      struct parse_events_term *term,
+                      struct parse_events_error *err)
 {
-#define CHECK_TYPE_VAL(type)                                   \
-do {                                                           \
-       if (PARSE_EVENTS__TERM_TYPE_ ## type != term->type_val) \
-               return -EINVAL;                                 \
+#define CHECK_TYPE_VAL(type)                                              \
+do {                                                                      \
+       if (check_type_val(term, err, PARSE_EVENTS__TERM_TYPE_ ## type)) \
+               return -EINVAL;                                            \
 } while (0)
 
        switch (term->type_term) {
+       case PARSE_EVENTS__TERM_TYPE_USER:
+               /*
+                * Always succeed for sysfs terms, as we dont know
+                * at this point what type they need to have.
+                */
+               return 0;
        case PARSE_EVENTS__TERM_TYPE_CONFIG:
                CHECK_TYPE_VAL(NUM);
                attr->config = term->val.num;
@@ -582,18 +607,20 @@ do {                                                              \
 }
 
 static int config_attr(struct perf_event_attr *attr,
-                      struct list_head *head, int fail)
+                      struct list_head *head,
+                      struct parse_events_error *err)
 {
        struct parse_events_term *term;
 
        list_for_each_entry(term, head, list)
-               if (config_term(attr, term) && fail)
+               if (config_term(attr, term, err))
                        return -EINVAL;
 
        return 0;
 }
 
-int parse_events_add_numeric(struct list_head *list, int *idx,
+int parse_events_add_numeric(struct parse_events_evlist *data,
+                            struct list_head *list,
                             u32 type, u64 config,
                             struct list_head *head_config)
 {
@@ -604,10 +631,10 @@ int parse_events_add_numeric(struct list_head *list, int *idx,
        attr.config = config;
 
        if (head_config &&
-           config_attr(&attr, head_config, 1))
+           config_attr(&attr, head_config, data->error))
                return -EINVAL;
 
-       return add_event(list, idx, &attr, NULL);
+       return add_event(list, &data->idx, &attr, NULL);
 }
 
 static int parse_events__is_name_term(struct parse_events_term *term)
@@ -626,8 +653,9 @@ static char *pmu_event_name(struct list_head *head_terms)
        return NULL;
 }
 
-int parse_events_add_pmu(struct list_head *list, int *idx,
-                        char *name, struct list_head *head_config)
+int parse_events_add_pmu(struct parse_events_evlist *data,
+                        struct list_head *list, char *name,
+                        struct list_head *head_config)
 {
        struct perf_event_attr attr;
        struct perf_pmu_info info;
@@ -647,7 +675,7 @@ int parse_events_add_pmu(struct list_head *list, int *idx,
 
        if (!head_config) {
                attr.type = pmu->type;
-               evsel = __add_event(list, idx, &attr, NULL, pmu->cpus);
+               evsel = __add_event(list, &data->idx, &attr, NULL, pmu->cpus);
                return evsel ? 0 : -ENOMEM;
        }
 
@@ -658,13 +686,14 @@ int parse_events_add_pmu(struct list_head *list, int *idx,
         * Configure hardcoded terms first, no need to check
         * return value when called with fail == 0 ;)
         */
-       config_attr(&attr, head_config, 0);
+       if (config_attr(&attr, head_config, data->error))
+               return -EINVAL;
 
-       if (perf_pmu__config(pmu, &attr, head_config))
+       if (perf_pmu__config(pmu, &attr, head_config, data->error))
                return -EINVAL;
 
-       evsel = __add_event(list, idx, &attr, pmu_event_name(head_config),
-                           pmu->cpus);
+       evsel = __add_event(list, &data->idx, &attr,
+                           pmu_event_name(head_config), pmu->cpus);
        if (evsel) {
                evsel->unit = info.unit;
                evsel->scale = info.scale;
@@ -1019,11 +1048,13 @@ int parse_events_terms(struct list_head *terms, const char *str)
        return ret;
 }
 
-int parse_events(struct perf_evlist *evlist, const char *str)
+int parse_events(struct perf_evlist *evlist, const char *str,
+                struct parse_events_error *err)
 {
        struct parse_events_evlist data = {
-               .list = LIST_HEAD_INIT(data.list),
-               .idx  = evlist->nr_entries,
+               .list  = LIST_HEAD_INIT(data.list),
+               .idx   = evlist->nr_entries,
+               .error = err,
        };
        int ret;
 
@@ -1044,16 +1075,87 @@ int parse_events(struct perf_evlist *evlist, const char *str)
        return ret;
 }
 
+#define MAX_WIDTH 1000
+static int get_term_width(void)
+{
+       struct winsize ws;
+
+       get_term_dimensions(&ws);
+       return ws.ws_col > MAX_WIDTH ? MAX_WIDTH : ws.ws_col;
+}
+
+static void parse_events_print_error(struct parse_events_error *err,
+                                    const char *event)
+{
+       const char *str = "invalid or unsupported event: ";
+       char _buf[MAX_WIDTH];
+       char *buf = (char *) event;
+       int idx = 0;
+
+       if (err->str) {
+               /* -2 for extra '' in the final fprintf */
+               int width       = get_term_width() - 2;
+               int len_event   = strlen(event);
+               int len_str, max_len, cut = 0;
+
+               /*
+                * Maximum error index indent, we will cut
+                * the event string if it's bigger.
+                */
+               int max_err_idx = 10;
+
+               /*
+                * Let's be specific with the message when
+                * we have the precise error.
+                */
+               str     = "event syntax error: ";
+               len_str = strlen(str);
+               max_len = width - len_str;
+
+               buf = _buf;
+
+               /* We're cutting from the beggining. */
+               if (err->idx > max_err_idx)
+                       cut = err->idx - max_err_idx;
+
+               strncpy(buf, event + cut, max_len);
+
+               /* Mark cut parts with '..' on both sides. */
+               if (cut)
+                       buf[0] = buf[1] = '.';
+
+               if ((len_event - cut) > max_len) {
+                       buf[max_len - 1] = buf[max_len - 2] = '.';
+                       buf[max_len] = 0;
+               }
+
+               idx = len_str + err->idx - cut;
+       }
+
+       fprintf(stderr, "%s'%s'\n", str, buf);
+       if (idx) {
+               fprintf(stderr, "%*s\\___ %s\n", idx + 1, "", err->str);
+               if (err->help)
+                       fprintf(stderr, "\n%s\n", err->help);
+               free(err->str);
+               free(err->help);
+       }
+
+       fprintf(stderr, "Run 'perf list' for a list of valid events\n");
+}
+
+#undef MAX_WIDTH
+
 int parse_events_option(const struct option *opt, const char *str,
                        int unset __maybe_unused)
 {
        struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
-       int ret = parse_events(evlist, str);
+       struct parse_events_error err = { .idx = 0, };
+       int ret = parse_events(evlist, str, &err);
+
+       if (ret)
+               parse_events_print_error(&err, str);
 
-       if (ret) {
-               fprintf(stderr, "invalid or unsupported event: '%s'\n", str);
-               fprintf(stderr, "Run 'perf list' for a list of valid events\n");
-       }
        return ret;
 }
 
@@ -1460,7 +1562,7 @@ int parse_events__is_hardcoded_term(struct parse_events_term *term)
 
 static int new_term(struct parse_events_term **_term, int type_val,
                    int type_term, char *config,
-                   char *str, u64 num)
+                   char *str, u64 num, int err_term, int err_val)
 {
        struct parse_events_term *term;
 
@@ -1472,6 +1574,8 @@ static int new_term(struct parse_events_term **_term, int type_val,
        term->type_val  = type_val;
        term->type_term = type_term;
        term->config = config;
+       term->err_term = err_term;
+       term->err_val  = err_val;
 
        switch (type_val) {
        case PARSE_EVENTS__TERM_TYPE_NUM:
@@ -1490,17 +1594,29 @@ static int new_term(struct parse_events_term **_term, int type_val,
 }
 
 int parse_events_term__num(struct parse_events_term **term,
-                          int type_term, char *config, u64 num)
+                          int type_term, char *config, u64 num,
+                          void *loc_term_, void *loc_val_)
 {
+       YYLTYPE *loc_term = loc_term_;
+       YYLTYPE *loc_val = loc_val_;
+
        return new_term(term, PARSE_EVENTS__TERM_TYPE_NUM, type_term,
-                       config, NULL, num);
+                       config, NULL, num,
+                       loc_term ? loc_term->first_column : 0,
+                       loc_val ? loc_val->first_column : 0);
 }
 
 int parse_events_term__str(struct parse_events_term **term,
-                          int type_term, char *config, char *str)
+                          int type_term, char *config, char *str,
+                          void *loc_term_, void *loc_val_)
 {
+       YYLTYPE *loc_term = loc_term_;
+       YYLTYPE *loc_val = loc_val_;
+
        return new_term(term, PARSE_EVENTS__TERM_TYPE_STR, type_term,
-                       config, str, 0);
+                       config, str, 0,
+                       loc_term ? loc_term->first_column : 0,
+                       loc_val ? loc_val->first_column : 0);
 }
 
 int parse_events_term__sym_hw(struct parse_events_term **term,
@@ -1514,18 +1630,20 @@ int parse_events_term__sym_hw(struct parse_events_term **term,
        if (config)
                return new_term(term, PARSE_EVENTS__TERM_TYPE_STR,
                                PARSE_EVENTS__TERM_TYPE_USER, config,
-                               (char *) sym->symbol, 0);
+                               (char *) sym->symbol, 0, 0, 0);
        else
                return new_term(term, PARSE_EVENTS__TERM_TYPE_STR,
                                PARSE_EVENTS__TERM_TYPE_USER,
-                               (char *) "event", (char *) sym->symbol, 0);
+                               (char *) "event", (char *) sym->symbol,
+                               0, 0, 0);
 }
 
 int parse_events_term__clone(struct parse_events_term **new,
                             struct parse_events_term *term)
 {
        return new_term(new, term->type_val, term->type_term, term->config,
-                       term->val.str, term->val.num);
+                       term->val.str, term->val.num,
+                       term->err_term, term->err_val);
 }
 
 void parse_events__free_terms(struct list_head *terms)
@@ -1535,3 +1653,15 @@ void parse_events__free_terms(struct list_head *terms)
        list_for_each_entry_safe(term, h, terms, list)
                free(term);
 }
+
+void parse_events_evlist_error(struct parse_events_evlist *data,
+                              int idx, const char *str)
+{
+       struct parse_events_error *err = data->error;
+
+       if (!err)
+               return;
+       err->idx = idx;
+       err->str = strdup(str);
+       WARN_ONCE(!err->str, "WARNING: failed to allocate error string");
+}
index 52a2dda..131f29b 100644 (file)
@@ -12,6 +12,7 @@
 struct list_head;
 struct perf_evsel;
 struct perf_evlist;
+struct parse_events_error;
 
 struct option;
 
@@ -29,7 +30,8 @@ const char *event_type(int type);
 
 extern int parse_events_option(const struct option *opt, const char *str,
                               int unset);
-extern int parse_events(struct perf_evlist *evlist, const char *str);
+extern int parse_events(struct perf_evlist *evlist, const char *str,
+                       struct parse_events_error *error);
 extern int parse_events_terms(struct list_head *terms, const char *str);
 extern int parse_filter(const struct option *opt, const char *str, int unset);
 
@@ -72,12 +74,23 @@ struct parse_events_term {
        int type_term;
        struct list_head list;
        bool used;
+
+       /* error string indexes for within parsed string */
+       int err_term;
+       int err_val;
+};
+
+struct parse_events_error {
+       int   idx;      /* index in the parsed string */
+       char *str;      /* string to display at the index */
+       char *help;     /* optional help string */
 };
 
 struct parse_events_evlist {
-       struct list_head list;
-       int idx;
-       int nr_groups;
+       struct list_head           list;
+       int                        idx;
+       int                        nr_groups;
+       struct parse_events_error *error;
 };
 
 struct parse_events_terms {
@@ -85,10 +98,12 @@ struct parse_events_terms {
 };
 
 int parse_events__is_hardcoded_term(struct parse_events_term *term);
-int parse_events_term__num(struct parse_events_term **_term,
-                          int type_term, char *config, u64 num);
-int parse_events_term__str(struct parse_events_term **_term,
-                          int type_term, char *config, char *str);
+int parse_events_term__num(struct parse_events_term **term,
+                          int type_term, char *config, u64 num,
+                          void *loc_term, void *loc_val);
+int parse_events_term__str(struct parse_events_term **term,
+                          int type_term, char *config, char *str,
+                          void *loc_term, void *loc_val);
 int parse_events_term__sym_hw(struct parse_events_term **term,
                              char *config, unsigned idx);
 int parse_events_term__clone(struct parse_events_term **new,
@@ -99,21 +114,24 @@ int parse_events__modifier_group(struct list_head *list, char *event_mod);
 int parse_events_name(struct list_head *list, char *name);
 int parse_events_add_tracepoint(struct list_head *list, int *idx,
                                char *sys, char *event);
-int parse_events_add_numeric(struct list_head *list, int *idx,
+int parse_events_add_numeric(struct parse_events_evlist *data,
+                            struct list_head *list,
                             u32 type, u64 config,
                             struct list_head *head_config);
 int parse_events_add_cache(struct list_head *list, int *idx,
                           char *type, char *op_result1, char *op_result2);
 int parse_events_add_breakpoint(struct list_head *list, int *idx,
                                void *ptr, char *type, u64 len);
-int parse_events_add_pmu(struct list_head *list, int *idx,
-                        char *pmu , struct list_head *head_config);
+int parse_events_add_pmu(struct parse_events_evlist *data,
+                        struct list_head *list, char *name,
+                        struct list_head *head_config);
 enum perf_pmu_event_symbol_type
 perf_pmu__parse_check(const char *name);
 void parse_events__set_leader(char *name, struct list_head *list);
 void parse_events_update_lists(struct list_head *list_event,
                               struct list_head *list_all);
-void parse_events_error(void *data, void *scanner, char const *msg);
+void parse_events_evlist_error(struct parse_events_evlist *data,
+                              int idx, const char *str);
 
 void print_events(const char *event_glob, bool name_only);
 
index 8895cf3..09e738f 100644 (file)
@@ -3,6 +3,8 @@
 %option bison-bridge
 %option prefix="parse_events_"
 %option stack
+%option bison-locations
+%option yylineno
 
 %{
 #include <errno.h>
@@ -51,6 +53,18 @@ static int str(yyscan_t scanner, int token)
        return token;
 }
 
+#define REWIND(__alloc)                                \
+do {                                                           \
+       YYSTYPE *__yylval = parse_events_get_lval(yyscanner);   \
+       char *text = parse_events_get_text(yyscanner);          \
+                                                               \
+       if (__alloc)                                            \
+               __yylval->str = strdup(text);                   \
+                                                               \
+       yycolumn -= strlen(text);                               \
+       yyless(0);                                              \
+} while (0)
+
 static int pmu_str_check(yyscan_t scanner)
 {
        YYSTYPE *yylval = parse_events_get_lval(scanner);
@@ -85,6 +99,13 @@ static int term(yyscan_t scanner, int type)
        return PE_TERM;
 }
 
+#define YY_USER_ACTION                                 \
+do {                                                   \
+       yylloc->last_column  = yylloc->first_column;    \
+       yylloc->first_column = yycolumn;                \
+       yycolumn += yyleng;                             \
+} while (0);
+
 %}
 
 %x mem
@@ -119,6 +140,12 @@ modifier_bp        [rwx]{1,3}
 
                if (start_token) {
                        parse_events_set_extra(NULL, yyscanner);
+                       /*
+                        * The flex parser does not init locations variable
+                        * via the scan_string interface, so we need do the
+                        * init in here.
+                        */
+                       yycolumn = 0;
                        return start_token;
                }
          }
@@ -127,24 +154,30 @@ modifier_bp       [rwx]{1,3}
 <event>{
 
 {group}                {
-                       BEGIN(INITIAL); yyless(0);
+                       BEGIN(INITIAL);
+                       REWIND(0);
                }
 
 {event_pmu}    |
 {event}                {
-                       str(yyscanner, PE_EVENT_NAME);
-                       BEGIN(INITIAL); yyless(0);
+                       BEGIN(INITIAL);
+                       REWIND(1);
                        return PE_EVENT_NAME;
                }
 
 .              |
 <<EOF>>                {
-                       BEGIN(INITIAL); yyless(0);
+                       BEGIN(INITIAL);
+                       REWIND(0);
                }
 
 }
 
 <config>{
+       /*
+        * Please update formats_error_string any time
+        * new static term is added.
+        */
 config                 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG); }
 config1                        { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG1); }
 config2                        { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG2); }
index 72def07..591905a 100644 (file)
@@ -2,6 +2,7 @@
 %parse-param {void *_data}
 %parse-param {void *scanner}
 %lex-param {void* scanner}
+%locations
 
 %{
 
@@ -14,8 +15,6 @@
 #include "parse-events.h"
 #include "parse-events-bison.h"
 
-extern int parse_events_lex (YYSTYPE* lvalp, void* scanner);
-
 #define ABORT_ON(val) \
 do { \
        if (val) \
@@ -208,7 +207,7 @@ PE_NAME '/' event_config '/'
        struct list_head *list;
 
        ALLOC_LIST(list);
-       ABORT_ON(parse_events_add_pmu(list, &data->idx, $1, $3));
+       ABORT_ON(parse_events_add_pmu(data, list, $1, $3));
        parse_events__free_terms($3);
        $$ = list;
 }
@@ -219,7 +218,7 @@ PE_NAME '/' '/'
        struct list_head *list;
 
        ALLOC_LIST(list);
-       ABORT_ON(parse_events_add_pmu(list, &data->idx, $1, NULL));
+       ABORT_ON(parse_events_add_pmu(data, list, $1, NULL));
        $$ = list;
 }
 |
@@ -232,11 +231,11 @@ PE_KERNEL_PMU_EVENT sep_dc
 
        ALLOC_LIST(head);
        ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
-                                       $1, 1));
+                                       $1, 1, &@1, NULL));
        list_add_tail(&term->list, head);
 
        ALLOC_LIST(list);
-       ABORT_ON(parse_events_add_pmu(list, &data->idx, "cpu", head));
+       ABORT_ON(parse_events_add_pmu(data, list, "cpu", head));
        parse_events__free_terms(head);
        $$ = list;
 }
@@ -252,7 +251,7 @@ PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc
 
        ALLOC_LIST(head);
        ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
-                                       &pmu_name, 1));
+                                       &pmu_name, 1, &@1, NULL));
        list_add_tail(&term->list, head);
 
        ALLOC_LIST(list);
@@ -275,8 +274,7 @@ value_sym '/' event_config '/'
        int config = $1 & 255;
 
        ALLOC_LIST(list);
-       ABORT_ON(parse_events_add_numeric(list, &data->idx,
-                                         type, config, $3));
+       ABORT_ON(parse_events_add_numeric(data, list, type, config, $3));
        parse_events__free_terms($3);
        $$ = list;
 }
@@ -289,8 +287,7 @@ value_sym sep_slash_dc
        int config = $1 & 255;
 
        ALLOC_LIST(list);
-       ABORT_ON(parse_events_add_numeric(list, &data->idx,
-                                         type, config, NULL));
+       ABORT_ON(parse_events_add_numeric(data, list, type, config, NULL));
        $$ = list;
 }
 
@@ -389,7 +386,15 @@ PE_NAME ':' PE_NAME
        struct list_head *list;
 
        ALLOC_LIST(list);
-       ABORT_ON(parse_events_add_tracepoint(list, &data->idx, $1, $3));
+       if (parse_events_add_tracepoint(list, &data->idx, $1, $3)) {
+               struct parse_events_error *error = data->error;
+
+               if (error) {
+                       error->idx = @1.first_column;
+                       error->str = strdup("unknown tracepoint");
+               }
+               return -1;
+       }
        $$ = list;
 }
 
@@ -400,7 +405,7 @@ PE_VALUE ':' PE_VALUE
        struct list_head *list;
 
        ALLOC_LIST(list);
-       ABORT_ON(parse_events_add_numeric(list, &data->idx, (u32)$1, $3, NULL));
+       ABORT_ON(parse_events_add_numeric(data, list, (u32)$1, $3, NULL));
        $$ = list;
 }
 
@@ -411,8 +416,7 @@ PE_RAW
        struct list_head *list;
 
        ALLOC_LIST(list);
-       ABORT_ON(parse_events_add_numeric(list, &data->idx,
-                                         PERF_TYPE_RAW, $1, NULL));
+       ABORT_ON(parse_events_add_numeric(data, list, PERF_TYPE_RAW, $1, NULL));
        $$ = list;
 }
 
@@ -450,7 +454,7 @@ PE_NAME '=' PE_NAME
        struct parse_events_term *term;
 
        ABORT_ON(parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER,
-                                       $1, $3));
+                                       $1, $3, &@1, &@3));
        $$ = term;
 }
 |
@@ -459,7 +463,7 @@ PE_NAME '=' PE_VALUE
        struct parse_events_term *term;
 
        ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
-                                       $1, $3));
+                                       $1, $3, &@1, &@3));
        $$ = term;
 }
 |
@@ -477,7 +481,7 @@ PE_NAME
        struct parse_events_term *term;
 
        ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
-                                       $1, 1));
+                                       $1, 1, &@1, NULL));
        $$ = term;
 }
 |
@@ -494,7 +498,7 @@ PE_TERM '=' PE_NAME
 {
        struct parse_events_term *term;
 
-       ABORT_ON(parse_events_term__str(&term, (int)$1, NULL, $3));
+       ABORT_ON(parse_events_term__str(&term, (int)$1, NULL, $3, &@1, &@3));
        $$ = term;
 }
 |
@@ -502,7 +506,7 @@ PE_TERM '=' PE_VALUE
 {
        struct parse_events_term *term;
 
-       ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, $3));
+       ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, $3, &@1, &@3));
        $$ = term;
 }
 |
@@ -510,7 +514,7 @@ PE_TERM
 {
        struct parse_events_term *term;
 
-       ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, 1));
+       ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, 1, &@1, NULL));
        $$ = term;
 }
 
@@ -520,7 +524,9 @@ sep_slash_dc: '/' | ':' |
 
 %%
 
-void parse_events_error(void *data __maybe_unused, void *scanner __maybe_unused,
+void parse_events_error(YYLTYPE *loc, void *data,
+                       void *scanner __maybe_unused,
                        char const *msg __maybe_unused)
 {
+       parse_events_evlist_error(data, loc->last_column, "parser error");
 }
index 59561fd..367d8b8 100644 (file)
@@ -123,6 +123,10 @@ struct option {
 #define OPT_LONG(s, l, v, h)        { .type = OPTION_LONG, .short_name = (s), .long_name = (l), .value = check_vtype(v, long *), .help = (h) }
 #define OPT_U64(s, l, v, h)         { .type = OPTION_U64, .short_name = (s), .long_name = (l), .value = check_vtype(v, u64 *), .help = (h) }
 #define OPT_STRING(s, l, v, a, h)   { .type = OPTION_STRING,  .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), (a), .help = (h) }
+#define OPT_STRING_OPTARG(s, l, v, a, h, d) \
+       { .type = OPTION_STRING,  .short_name = (s), .long_name = (l), \
+         .value = check_vtype(v, const char **), (a), .help = (h), \
+         .flags = PARSE_OPT_OPTARG, .defval = (intptr_t)(d) }
 #define OPT_STRING_NOEMPTY(s, l, v, a, h)   { .type = OPTION_STRING,  .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), (a), .help = (h), .flags = PARSE_OPT_NOEMPTY}
 #define OPT_DATE(s, l, v, h) \
        { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = "time", .help = (h), .callback = parse_opt_approxidate_cb }
index 4841167..0fcc624 100644 (file)
@@ -112,7 +112,11 @@ static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char *
        if (sret < 0)
                goto error;
 
-       scale[sret] = '\0';
+       if (scale[sret - 1] == '\n')
+               scale[sret - 1] = '\0';
+       else
+               scale[sret] = '\0';
+
        /*
         * save current locale
         */
@@ -154,7 +158,10 @@ static int perf_pmu__parse_unit(struct perf_pmu_alias *alias, char *dir, char *n
 
        close(fd);
 
-       alias->unit[sret] = '\0';
+       if (alias->unit[sret - 1] == '\n')
+               alias->unit[sret - 1] = '\0';
+       else
+               alias->unit[sret] = '\0';
 
        return 0;
 error:
@@ -442,6 +449,10 @@ static struct perf_pmu *pmu_lookup(const char *name)
        LIST_HEAD(aliases);
        __u32 type;
 
+       /* No support for intel_bts or intel_pt so disallow them */
+       if (!strcmp(name, "intel_bts") || !strcmp(name, "intel_pt"))
+               return NULL;
+
        /*
         * The pmu data we store & need consists of the pmu
         * type value and format definitions. Load both right
@@ -579,6 +590,38 @@ static int pmu_resolve_param_term(struct parse_events_term *term,
        return -1;
 }
 
+static char *formats_error_string(struct list_head *formats)
+{
+       struct perf_pmu_format *format;
+       char *err, *str;
+       static const char *static_terms = "config,config1,config2,name,period,branch_type\n";
+       unsigned i = 0;
+
+       if (!asprintf(&str, "valid terms:"))
+               return NULL;
+
+       /* sysfs exported terms */
+       list_for_each_entry(format, formats, list) {
+               char c = i++ ? ',' : ' ';
+
+               err = str;
+               if (!asprintf(&str, "%s%c%s", err, c, format->name))
+                       goto fail;
+               free(err);
+       }
+
+       /* static terms */
+       err = str;
+       if (!asprintf(&str, "%s,%s", err, static_terms))
+               goto fail;
+
+       free(err);
+       return str;
+fail:
+       free(err);
+       return NULL;
+}
+
 /*
  * Setup one of config[12] attr members based on the
  * user input data - term parameter.
@@ -587,7 +630,7 @@ static int pmu_config_term(struct list_head *formats,
                           struct perf_event_attr *attr,
                           struct parse_events_term *term,
                           struct list_head *head_terms,
-                          bool zero)
+                          bool zero, struct parse_events_error *err)
 {
        struct perf_pmu_format *format;
        __u64 *vp;
@@ -611,6 +654,11 @@ static int pmu_config_term(struct list_head *formats,
        if (!format) {
                if (verbose)
                        printf("Invalid event/parameter '%s'\n", term->config);
+               if (err) {
+                       err->idx  = term->err_term;
+                       err->str  = strdup("unknown term");
+                       err->help = formats_error_string(formats);
+               }
                return -EINVAL;
        }
 
@@ -636,9 +684,14 @@ static int pmu_config_term(struct list_head *formats,
                val = term->val.num;
        else if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) {
                if (strcmp(term->val.str, "?")) {
-                       if (verbose)
+                       if (verbose) {
                                pr_info("Invalid sysfs entry %s=%s\n",
                                                term->config, term->val.str);
+                       }
+                       if (err) {
+                               err->idx = term->err_val;
+                               err->str = strdup("expected numeric value");
+                       }
                        return -EINVAL;
                }
 
@@ -654,12 +707,13 @@ static int pmu_config_term(struct list_head *formats,
 int perf_pmu__config_terms(struct list_head *formats,
                           struct perf_event_attr *attr,
                           struct list_head *head_terms,
-                          bool zero)
+                          bool zero, struct parse_events_error *err)
 {
        struct parse_events_term *term;
 
        list_for_each_entry(term, head_terms, list) {
-               if (pmu_config_term(formats, attr, term, head_terms, zero))
+               if (pmu_config_term(formats, attr, term, head_terms,
+                                   zero, err))
                        return -EINVAL;
        }
 
@@ -672,12 +726,14 @@ int perf_pmu__config_terms(struct list_head *formats,
  * 2) pmu format definitions - specified by pmu parameter
  */
 int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr,
-                    struct list_head *head_terms)
+                    struct list_head *head_terms,
+                    struct parse_events_error *err)
 {
        bool zero = !!pmu->default_config;
 
        attr->type = pmu->type;
-       return perf_pmu__config_terms(&pmu->format, attr, head_terms, zero);
+       return perf_pmu__config_terms(&pmu->format, attr, head_terms,
+                                     zero, err);
 }
 
 static struct perf_pmu_alias *pmu_find_alias(struct perf_pmu *pmu,
index 6b1249f..7b9c8cf 100644 (file)
@@ -4,6 +4,7 @@
 #include <linux/bitmap.h>
 #include <linux/perf_event.h>
 #include <stdbool.h>
+#include "parse-events.h"
 
 enum {
        PERF_PMU_FORMAT_VALUE_CONFIG,
@@ -47,11 +48,12 @@ struct perf_pmu_alias {
 
 struct perf_pmu *perf_pmu__find(const char *name);
 int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr,
-                    struct list_head *head_terms);
+                    struct list_head *head_terms,
+                    struct parse_events_error *error);
 int perf_pmu__config_terms(struct list_head *formats,
                           struct perf_event_attr *attr,
                           struct list_head *head_terms,
-                          bool zero);
+                          bool zero, struct parse_events_error *error);
 int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms,
                          struct perf_pmu_info *info);
 struct list_head *perf_pmu__alias(struct perf_pmu *pmu,
index d8bb616..076527b 100644 (file)
@@ -51,6 +51,7 @@
 #define PERFPROBE_GROUP "probe"
 
 bool probe_event_dry_run;      /* Dry run flag */
+struct probe_conf probe_conf;
 
 #define semantic_error(msg ...) pr_err("Semantic error :" msg)
 
@@ -161,18 +162,18 @@ static u64 kernel_get_symbol_address_by_name(const char *name, bool reloc)
 
 static struct map *kernel_get_module_map(const char *module)
 {
-       struct rb_node *nd;
        struct map_groups *grp = &host_machine->kmaps;
+       struct maps *maps = &grp->maps[MAP__FUNCTION];
+       struct map *pos;
 
        /* A file path -- this is an offline module */
        if (module && strchr(module, '/'))
-               return machine__new_module(host_machine, 0, module);
+               return machine__findnew_module_map(host_machine, 0, module);
 
        if (!module)
                module = "kernel";
 
-       for (nd = rb_first(&grp->maps[MAP__FUNCTION]); nd; nd = rb_next(nd)) {
-               struct map *pos = rb_entry(nd, struct map, rb_node);
+       for (pos = maps__first(maps); pos; pos = map__next(pos)) {
                if (strncmp(pos->dso->short_name + 1, module,
                            pos->dso->short_name_len - 2) == 0) {
                        return pos;
@@ -194,52 +195,11 @@ static void put_target_map(struct map *map, bool user)
 {
        if (map && user) {
                /* Only the user map needs to be released */
-               dso__delete(map->dso);
-               map__delete(map);
+               map__put(map);
        }
 }
 
 
-static struct dso *kernel_get_module_dso(const char *module)
-{
-       struct dso *dso;
-       struct map *map;
-       const char *vmlinux_name;
-
-       if (module) {
-               list_for_each_entry(dso, &host_machine->kernel_dsos.head,
-                                   node) {
-                       if (strncmp(dso->short_name + 1, module,
-                                   dso->short_name_len - 2) == 0)
-                               goto found;
-               }
-               pr_debug("Failed to find module %s.\n", module);
-               return NULL;
-       }
-
-       map = host_machine->vmlinux_maps[MAP__FUNCTION];
-       dso = map->dso;
-
-       vmlinux_name = symbol_conf.vmlinux_name;
-       if (vmlinux_name) {
-               if (dso__load_vmlinux(dso, map, vmlinux_name, false, NULL) <= 0)
-                       return NULL;
-       } else {
-               if (dso__load_vmlinux_path(dso, map, NULL) <= 0) {
-                       pr_debug("Failed to load kernel map.\n");
-                       return NULL;
-               }
-       }
-found:
-       return dso;
-}
-
-const char *kernel_get_module_path(const char *module)
-{
-       struct dso *dso = kernel_get_module_dso(module);
-       return (dso) ? dso->long_name : NULL;
-}
-
 static int convert_exec_to_group(const char *exec, char **result)
 {
        char *ptr1, *ptr2, *exec_copy;
@@ -286,7 +246,55 @@ static void clear_probe_trace_events(struct probe_trace_event *tevs, int ntevs)
                clear_probe_trace_event(tevs + i);
 }
 
+static bool kprobe_blacklist__listed(unsigned long address);
+static bool kprobe_warn_out_range(const char *symbol, unsigned long address)
+{
+       /* Get the address of _etext for checking non-probable text symbol */
+       if (kernel_get_symbol_address_by_name("_etext", false) < address)
+               pr_warning("%s is out of .text, skip it.\n", symbol);
+       else if (kprobe_blacklist__listed(address))
+               pr_warning("%s is blacklisted function, skip it.\n", symbol);
+       else
+               return false;
+
+       return true;
+}
+
 #ifdef HAVE_DWARF_SUPPORT
+
+static int kernel_get_module_dso(const char *module, struct dso **pdso)
+{
+       struct dso *dso;
+       struct map *map;
+       const char *vmlinux_name;
+       int ret = 0;
+
+       if (module) {
+               list_for_each_entry(dso, &host_machine->dsos.head, node) {
+                       if (!dso->kernel)
+                               continue;
+                       if (strncmp(dso->short_name + 1, module,
+                                   dso->short_name_len - 2) == 0)
+                               goto found;
+               }
+               pr_debug("Failed to find module %s.\n", module);
+               return -ENOENT;
+       }
+
+       map = host_machine->vmlinux_maps[MAP__FUNCTION];
+       dso = map->dso;
+
+       vmlinux_name = symbol_conf.vmlinux_name;
+       dso->load_errno = 0;
+       if (vmlinux_name)
+               ret = dso__load_vmlinux(dso, map, vmlinux_name, false, NULL);
+       else
+               ret = dso__load_vmlinux_path(dso, map, NULL);
+found:
+       *pdso = dso;
+       return ret;
+}
+
 /*
  * Some binaries like glibc have special symbols which are on the symbol
  * table, but not in the debuginfo. If we can find the address of the
@@ -344,15 +352,14 @@ out:
 
 static int get_alternative_probe_event(struct debuginfo *dinfo,
                                       struct perf_probe_event *pev,
-                                      struct perf_probe_point *tmp,
-                                      const char *target)
+                                      struct perf_probe_point *tmp)
 {
        int ret;
 
        memcpy(tmp, &pev->point, sizeof(*tmp));
        memset(&pev->point, 0, sizeof(pev->point));
        ret = find_alternative_probe_point(dinfo, tmp, &pev->point,
-                                          target, pev->uprobes);
+                                          pev->target, pev->uprobes);
        if (ret < 0)
                memcpy(&pev->point, tmp, sizeof(*tmp));
 
@@ -390,16 +397,25 @@ static int get_alternative_line_range(struct debuginfo *dinfo,
 static struct debuginfo *open_debuginfo(const char *module, bool silent)
 {
        const char *path = module;
-       struct debuginfo *ret;
+       char reason[STRERR_BUFSIZE];
+       struct debuginfo *ret = NULL;
+       struct dso *dso = NULL;
+       int err;
 
        if (!module || !strchr(module, '/')) {
-               path = kernel_get_module_path(module);
-               if (!path) {
+               err = kernel_get_module_dso(module, &dso);
+               if (err < 0) {
+                       if (!dso || dso->load_errno == 0) {
+                               if (!strerror_r(-err, reason, STRERR_BUFSIZE))
+                                       strcpy(reason, "(unknown)");
+                       } else
+                               dso__strerror_load(dso, reason, STRERR_BUFSIZE);
                        if (!silent)
-                               pr_err("Failed to find path of %s module.\n",
-                                      module ?: "kernel");
+                               pr_err("Failed to find the path for %s: %s\n",
+                                       module ?: "kernel", reason);
                        return NULL;
                }
+               path = dso->long_name;
        }
        ret = debuginfo__new(path);
        if (!ret && !silent) {
@@ -413,6 +429,41 @@ static struct debuginfo *open_debuginfo(const char *module, bool silent)
        return ret;
 }
 
+/* For caching the last debuginfo */
+static struct debuginfo *debuginfo_cache;
+static char *debuginfo_cache_path;
+
+static struct debuginfo *debuginfo_cache__open(const char *module, bool silent)
+{
+       if ((debuginfo_cache_path && !strcmp(debuginfo_cache_path, module)) ||
+           (!debuginfo_cache_path && !module && debuginfo_cache))
+               goto out;
+
+       /* Copy module path */
+       free(debuginfo_cache_path);
+       if (module) {
+               debuginfo_cache_path = strdup(module);
+               if (!debuginfo_cache_path) {
+                       debuginfo__delete(debuginfo_cache);
+                       debuginfo_cache = NULL;
+                       goto out;
+               }
+       }
+
+       debuginfo_cache = open_debuginfo(module, silent);
+       if (!debuginfo_cache)
+               zfree(&debuginfo_cache_path);
+out:
+       return debuginfo_cache;
+}
+
+static void debuginfo_cache__exit(void)
+{
+       debuginfo__delete(debuginfo_cache);
+       debuginfo_cache = NULL;
+       zfree(&debuginfo_cache_path);
+}
+
 
 static int get_text_start_address(const char *exec, unsigned long *address)
 {
@@ -474,12 +525,11 @@ static int find_perf_probe_point_from_dwarf(struct probe_trace_point *tp,
        pr_debug("try to find information at %" PRIx64 " in %s\n", addr,
                 tp->module ? : "kernel");
 
-       dinfo = open_debuginfo(tp->module, verbose == 0);
-       if (dinfo) {
+       dinfo = debuginfo_cache__open(tp->module, verbose == 0);
+       if (dinfo)
                ret = debuginfo__find_probe_point(dinfo,
                                                 (unsigned long)addr, pp);
-               debuginfo__delete(dinfo);
-       } else
+       else
                ret = -ENOENT;
 
        if (ret > 0) {
@@ -558,7 +608,7 @@ static int post_process_probe_trace_events(struct probe_trace_event *tevs,
 {
        struct ref_reloc_sym *reloc_sym;
        char *tmp;
-       int i;
+       int i, skipped = 0;
 
        if (uprobe)
                return add_exec_to_probe_trace_events(tevs, ntevs, module);
@@ -574,31 +624,40 @@ static int post_process_probe_trace_events(struct probe_trace_event *tevs,
        }
 
        for (i = 0; i < ntevs; i++) {
-               if (tevs[i].point.address && !tevs[i].point.retprobe) {
+               if (!tevs[i].point.address || tevs[i].point.retprobe)
+                       continue;
+               /* If we found a wrong one, mark it by NULL symbol */
+               if (kprobe_warn_out_range(tevs[i].point.symbol,
+                                         tevs[i].point.address)) {
+                       tmp = NULL;
+                       skipped++;
+               } else {
                        tmp = strdup(reloc_sym->name);
                        if (!tmp)
                                return -ENOMEM;
-                       free(tevs[i].point.symbol);
-                       tevs[i].point.symbol = tmp;
-                       tevs[i].point.offset = tevs[i].point.address -
-                                              reloc_sym->unrelocated_addr;
                }
+               /* If we have no realname, use symbol for it */
+               if (!tevs[i].point.realname)
+                       tevs[i].point.realname = tevs[i].point.symbol;
+               else
+                       free(tevs[i].point.symbol);
+               tevs[i].point.symbol = tmp;
+               tevs[i].point.offset = tevs[i].point.address -
+                                      reloc_sym->unrelocated_addr;
        }
-       return 0;
+       return skipped;
 }
 
 /* Try to find perf_probe_event with debuginfo */
 static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
-                                         struct probe_trace_event **tevs,
-                                         int max_tevs, const char *target)
+                                         struct probe_trace_event **tevs)
 {
        bool need_dwarf = perf_probe_event_need_dwarf(pev);
        struct perf_probe_point tmp;
        struct debuginfo *dinfo;
        int ntevs, ret = 0;
 
-       dinfo = open_debuginfo(target, !need_dwarf);
-
+       dinfo = open_debuginfo(pev->target, !need_dwarf);
        if (!dinfo) {
                if (need_dwarf)
                        return -ENOENT;
@@ -608,13 +667,12 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
 
        pr_debug("Try to find probe point from debuginfo.\n");
        /* Searching trace events corresponding to a probe event */
-       ntevs = debuginfo__find_trace_events(dinfo, pev, tevs, max_tevs);
+       ntevs = debuginfo__find_trace_events(dinfo, pev, tevs);
 
        if (ntevs == 0) {  /* Not found, retry with an alternative */
-               ret = get_alternative_probe_event(dinfo, pev, &tmp, target);
+               ret = get_alternative_probe_event(dinfo, pev, &tmp);
                if (!ret) {
-                       ntevs = debuginfo__find_trace_events(dinfo, pev,
-                                                            tevs, max_tevs);
+                       ntevs = debuginfo__find_trace_events(dinfo, pev, tevs);
                        /*
                         * Write back to the original probe_event for
                         * setting appropriate (user given) event name
@@ -629,12 +687,15 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
        if (ntevs > 0) {        /* Succeeded to find trace events */
                pr_debug("Found %d probe_trace_events.\n", ntevs);
                ret = post_process_probe_trace_events(*tevs, ntevs,
-                                                       target, pev->uprobes);
-               if (ret < 0) {
+                                               pev->target, pev->uprobes);
+               if (ret < 0 || ret == ntevs) {
                        clear_probe_trace_events(*tevs, ntevs);
                        zfree(tevs);
                }
-               return ret < 0 ? ret : ntevs;
+               if (ret != ntevs)
+                       return ret < 0 ? ret : ntevs;
+               ntevs = 0;
+               /* Fall through */
        }
 
        if (ntevs == 0) {       /* No error but failed to find probe point. */
@@ -809,8 +870,7 @@ int show_line_range(struct line_range *lr, const char *module, bool user)
 
 static int show_available_vars_at(struct debuginfo *dinfo,
                                  struct perf_probe_event *pev,
-                                 int max_vls, struct strfilter *_filter,
-                                 bool externs, const char *target)
+                                 struct strfilter *_filter)
 {
        char *buf;
        int ret, i, nvars;
@@ -824,13 +884,12 @@ static int show_available_vars_at(struct debuginfo *dinfo,
                return -EINVAL;
        pr_debug("Searching variables at %s\n", buf);
 
-       ret = debuginfo__find_available_vars_at(dinfo, pev, &vls,
-                                               max_vls, externs);
+       ret = debuginfo__find_available_vars_at(dinfo, pev, &vls);
        if (!ret) {  /* Not found, retry with an alternative */
-               ret = get_alternative_probe_event(dinfo, pev, &tmp, target);
+               ret = get_alternative_probe_event(dinfo, pev, &tmp);
                if (!ret) {
                        ret = debuginfo__find_available_vars_at(dinfo, pev,
-                                               &vls, max_vls, externs);
+                                                               &vls);
                        /* Release the old probe_point */
                        clear_perf_probe_point(&tmp);
                }
@@ -877,8 +936,7 @@ end:
 
 /* Show available variables on given probe point */
 int show_available_vars(struct perf_probe_event *pevs, int npevs,
-                       int max_vls, const char *module,
-                       struct strfilter *_filter, bool externs)
+                       struct strfilter *_filter)
 {
        int i, ret = 0;
        struct debuginfo *dinfo;
@@ -887,7 +945,7 @@ int show_available_vars(struct perf_probe_event *pevs, int npevs,
        if (ret < 0)
                return ret;
 
-       dinfo = open_debuginfo(module, false);
+       dinfo = open_debuginfo(pevs->target, false);
        if (!dinfo) {
                ret = -ENOENT;
                goto out;
@@ -896,8 +954,7 @@ int show_available_vars(struct perf_probe_event *pevs, int npevs,
        setup_pager();
 
        for (i = 0; i < npevs && ret >= 0; i++)
-               ret = show_available_vars_at(dinfo, &pevs[i], max_vls, _filter,
-                                            externs, module);
+               ret = show_available_vars_at(dinfo, &pevs[i], _filter);
 
        debuginfo__delete(dinfo);
 out:
@@ -907,6 +964,10 @@ out:
 
 #else  /* !HAVE_DWARF_SUPPORT */
 
+static void debuginfo_cache__exit(void)
+{
+}
+
 static int
 find_perf_probe_point_from_dwarf(struct probe_trace_point *tp __maybe_unused,
                                 struct perf_probe_point *pp __maybe_unused,
@@ -916,9 +977,7 @@ find_perf_probe_point_from_dwarf(struct probe_trace_point *tp __maybe_unused,
 }
 
 static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
-                               struct probe_trace_event **tevs __maybe_unused,
-                               int max_tevs __maybe_unused,
-                               const char *target __maybe_unused)
+                               struct probe_trace_event **tevs __maybe_unused)
 {
        if (perf_probe_event_need_dwarf(pev)) {
                pr_warning("Debuginfo-analysis is not supported.\n");
@@ -937,10 +996,8 @@ int show_line_range(struct line_range *lr __maybe_unused,
 }
 
 int show_available_vars(struct perf_probe_event *pevs __maybe_unused,
-                       int npevs __maybe_unused, int max_vls __maybe_unused,
-                       const char *module __maybe_unused,
-                       struct strfilter *filter __maybe_unused,
-                       bool externs __maybe_unused)
+                       int npevs __maybe_unused,
+                       struct strfilter *filter __maybe_unused)
 {
        pr_warning("Debuginfo-analysis is not supported.\n");
        return -ENOSYS;
@@ -980,6 +1037,18 @@ static int parse_line_num(char **ptr, int *val, const char *what)
        return 0;
 }
 
+/* Check the name is good for event, group or function */
+static bool is_c_func_name(const char *name)
+{
+       if (!isalpha(*name) && *name != '_')
+               return false;
+       while (*++name != '\0') {
+               if (!isalpha(*name) && !isdigit(*name) && *name != '_')
+                       return false;
+       }
+       return true;
+}
+
 /*
  * Stuff 'lr' according to the line range described by 'arg'.
  * The line range syntax is described by:
@@ -1048,10 +1117,15 @@ int parse_line_range_desc(const char *arg, struct line_range *lr)
                        goto err;
                }
                lr->function = name;
-       } else if (strchr(name, '.'))
+       } else if (strchr(name, '/') || strchr(name, '.'))
                lr->file = name;
-       else
+       else if (is_c_func_name(name))/* We reuse it for checking funcname */
                lr->function = name;
+       else {  /* Invalid name */
+               semantic_error("'%s' is not a valid function name.\n", name);
+               err = -EINVAL;
+               goto err;
+       }
 
        return 0;
 err:
@@ -1059,24 +1133,13 @@ err:
        return err;
 }
 
-/* Check the name is good for event/group */
-static bool check_event_name(const char *name)
-{
-       if (!isalpha(*name) && *name != '_')
-               return false;
-       while (*++name != '\0') {
-               if (!isalpha(*name) && !isdigit(*name) && *name != '_')
-                       return false;
-       }
-       return true;
-}
-
 /* Parse probepoint definition. */
 static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)
 {
        struct perf_probe_point *pp = &pev->point;
        char *ptr, *tmp;
        char c, nc = 0;
+       bool file_spec = false;
        /*
         * <Syntax>
         * perf probe [EVENT=]SRC[:LN|;PTN]
@@ -1084,6 +1147,8 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)
         *
         * TODO:Group name support
         */
+       if (!arg)
+               return -EINVAL;
 
        ptr = strpbrk(arg, ";=@+%");
        if (ptr && *ptr == '=') {       /* Event name */
@@ -1093,7 +1158,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)
                        semantic_error("Group name is not supported yet.\n");
                        return -ENOTSUP;
                }
-               if (!check_event_name(arg)) {
+               if (!is_c_func_name(arg)) {
                        semantic_error("%s is bad for event name -it must "
                                       "follow C symbol-naming rule.\n", arg);
                        return -EINVAL;
@@ -1105,6 +1170,23 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)
                arg = tmp;
        }
 
+       /*
+        * Check arg is function or file name and copy it.
+        *
+        * We consider arg to be a file spec if and only if it satisfies
+        * all of the below criteria::
+        * - it does not include any of "+@%",
+        * - it includes one of ":;", and
+        * - it has a period '.' in the name.
+        *
+        * Otherwise, we consider arg to be a function specification.
+        */
+       if (!strpbrk(arg, "+@%") && (ptr = strpbrk(arg, ";:")) != NULL) {
+               /* This is a file spec if it includes a '.' before ; or : */
+               if (memchr(arg, '.', ptr - arg))
+                       file_spec = true;
+       }
+
        ptr = strpbrk(arg, ";:+@%");
        if (ptr) {
                nc = *ptr;
@@ -1115,10 +1197,9 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)
        if (tmp == NULL)
                return -ENOMEM;
 
-       /* Check arg is function or file and copy it */
-       if (strchr(tmp, '.'))   /* File */
+       if (file_spec)
                pp->file = tmp;
-       else                    /* Function */
+       else
                pp->function = tmp;
 
        /* Parse other options */
@@ -1760,8 +1841,7 @@ static int find_perf_probe_point_from_map(struct probe_trace_point *tp,
 
 out:
        if (map && !is_kprobe) {
-               dso__delete(map->dso);
-               map__delete(map);
+               map__put(map);
        }
 
        return ret;
@@ -1875,6 +1955,7 @@ static void clear_probe_trace_event(struct probe_trace_event *tev)
        free(tev->event);
        free(tev->group);
        free(tev->point.symbol);
+       free(tev->point.realname);
        free(tev->point.module);
        for (i = 0; i < tev->nargs; i++) {
                free(tev->args[i].name);
@@ -1952,7 +2033,7 @@ static int open_probe_events(const char *trace_file, bool readwrite)
        if (ret >= 0) {
                pr_debug("Opening %s write=%d\n", buf, readwrite);
                if (readwrite && !probe_event_dry_run)
-                       ret = open(buf, O_RDWR, O_APPEND);
+                       ret = open(buf, O_RDWR | O_APPEND, 0);
                else
                        ret = open(buf, O_RDONLY, 0);
 
@@ -2093,9 +2174,31 @@ kprobe_blacklist__find_by_address(struct list_head *blacklist,
        return NULL;
 }
 
-/* Show an event */
-static int show_perf_probe_event(struct perf_probe_event *pev,
-                                const char *module)
+static LIST_HEAD(kprobe_blacklist);
+
+static void kprobe_blacklist__init(void)
+{
+       if (!list_empty(&kprobe_blacklist))
+               return;
+
+       if (kprobe_blacklist__load(&kprobe_blacklist) < 0)
+               pr_debug("No kprobe blacklist support, ignored\n");
+}
+
+static void kprobe_blacklist__release(void)
+{
+       kprobe_blacklist__delete(&kprobe_blacklist);
+}
+
+static bool kprobe_blacklist__listed(unsigned long address)
+{
+       return !!kprobe_blacklist__find_by_address(&kprobe_blacklist, address);
+}
+
+static int perf_probe_event__sprintf(const char *group, const char *event,
+                                    struct perf_probe_event *pev,
+                                    const char *module,
+                                    struct strbuf *result)
 {
        int i, ret;
        char buf[128];
@@ -2106,30 +2209,67 @@ static int show_perf_probe_event(struct perf_probe_event *pev,
        if (!place)
                return -EINVAL;
 
-       ret = e_snprintf(buf, 128, "%s:%s", pev->group, pev->event);
+       ret = e_snprintf(buf, 128, "%s:%s", group, event);
        if (ret < 0)
-               return ret;
+               goto out;
 
-       pr_info("  %-20s (on %s", buf, place);
+       strbuf_addf(result, "  %-20s (on %s", buf, place);
        if (module)
-               pr_info(" in %s", module);
+               strbuf_addf(result, " in %s", module);
 
        if (pev->nargs > 0) {
-               pr_info(" with");
+               strbuf_addstr(result, " with");
                for (i = 0; i < pev->nargs; i++) {
                        ret = synthesize_perf_probe_arg(&pev->args[i],
                                                        buf, 128);
                        if (ret < 0)
-                               break;
-                       pr_info(" %s", buf);
+                               goto out;
+                       strbuf_addf(result, " %s", buf);
                }
        }
-       pr_info(")\n");
+       strbuf_addch(result, ')');
+out:
        free(place);
        return ret;
 }
 
-static int __show_perf_probe_events(int fd, bool is_kprobe)
+/* Show an event */
+static int show_perf_probe_event(const char *group, const char *event,
+                                struct perf_probe_event *pev,
+                                const char *module, bool use_stdout)
+{
+       struct strbuf buf = STRBUF_INIT;
+       int ret;
+
+       ret = perf_probe_event__sprintf(group, event, pev, module, &buf);
+       if (ret >= 0) {
+               if (use_stdout)
+                       printf("%s\n", buf.buf);
+               else
+                       pr_info("%s\n", buf.buf);
+       }
+       strbuf_release(&buf);
+
+       return ret;
+}
+
+static bool filter_probe_trace_event(struct probe_trace_event *tev,
+                                    struct strfilter *filter)
+{
+       char tmp[128];
+
+       /* At first, check the event name itself */
+       if (strfilter__compare(filter, tev->event))
+               return true;
+
+       /* Next, check the combination of name and group */
+       if (e_snprintf(tmp, 128, "%s:%s", tev->group, tev->event) < 0)
+               return false;
+       return strfilter__compare(filter, tmp);
+}
+
+static int __show_perf_probe_events(int fd, bool is_kprobe,
+                                   struct strfilter *filter)
 {
        int ret = 0;
        struct probe_trace_event tev;
@@ -2147,24 +2287,31 @@ static int __show_perf_probe_events(int fd, bool is_kprobe)
        strlist__for_each(ent, rawlist) {
                ret = parse_probe_trace_command(ent->s, &tev);
                if (ret >= 0) {
+                       if (!filter_probe_trace_event(&tev, filter))
+                               goto next;
                        ret = convert_to_perf_probe_event(&tev, &pev,
                                                                is_kprobe);
-                       if (ret >= 0)
-                               ret = show_perf_probe_event(&pev,
-                                                           tev.point.module);
+                       if (ret < 0)
+                               goto next;
+                       ret = show_perf_probe_event(pev.group, pev.event,
+                                                   &pev, tev.point.module,
+                                                   true);
                }
+next:
                clear_perf_probe_event(&pev);
                clear_probe_trace_event(&tev);
                if (ret < 0)
                        break;
        }
        strlist__delete(rawlist);
+       /* Cleanup cached debuginfo if needed */
+       debuginfo_cache__exit();
 
        return ret;
 }
 
 /* List up current perf-probe events */
-int show_perf_probe_events(void)
+int show_perf_probe_events(struct strfilter *filter)
 {
        int kp_fd, up_fd, ret;
 
@@ -2176,7 +2323,7 @@ int show_perf_probe_events(void)
 
        kp_fd = open_kprobe_events(false);
        if (kp_fd >= 0) {
-               ret = __show_perf_probe_events(kp_fd, true);
+               ret = __show_perf_probe_events(kp_fd, true, filter);
                close(kp_fd);
                if (ret < 0)
                        goto out;
@@ -2190,7 +2337,7 @@ int show_perf_probe_events(void)
        }
 
        if (up_fd >= 0) {
-               ret = __show_perf_probe_events(up_fd, false);
+               ret = __show_perf_probe_events(up_fd, false, filter);
                close(up_fd);
        }
 out:
@@ -2264,6 +2411,10 @@ static int get_new_event_name(char *buf, size_t len, const char *base,
                              struct strlist *namelist, bool allow_suffix)
 {
        int i, ret;
+       char *p;
+
+       if (*base == '.')
+               base++;
 
        /* Try no suffix */
        ret = e_snprintf(buf, len, "%s", base);
@@ -2271,6 +2422,10 @@ static int get_new_event_name(char *buf, size_t len, const char *base,
                pr_debug("snprintf() failed: %d\n", ret);
                return ret;
        }
+       /* Cut off the postfixes (e.g. .const, .isra)*/
+       p = strchr(buf, '.');
+       if (p && p != buf)
+               *p = '\0';
        if (!strlist__has_entry(namelist, buf))
                return 0;
 
@@ -2326,10 +2481,9 @@ static int __add_probe_trace_events(struct perf_probe_event *pev,
        int i, fd, ret;
        struct probe_trace_event *tev = NULL;
        char buf[64];
-       const char *event, *group;
+       const char *event = NULL, *group = NULL;
        struct strlist *namelist;
-       LIST_HEAD(blacklist);
-       struct kprobe_blacklist_node *node;
+       bool safename;
 
        if (pev->uprobes)
                fd = open_uprobe_events(true);
@@ -2345,34 +2499,26 @@ static int __add_probe_trace_events(struct perf_probe_event *pev,
        namelist = get_probe_trace_event_names(fd, false);
        if (!namelist) {
                pr_debug("Failed to get current event list.\n");
-               return -EIO;
-       }
-       /* Get kprobe blacklist if exists */
-       if (!pev->uprobes) {
-               ret = kprobe_blacklist__load(&blacklist);
-               if (ret < 0)
-                       pr_debug("No kprobe blacklist support, ignored\n");
+               ret = -ENOMEM;
+               goto close_out;
        }
 
+       safename = (pev->point.function && !strisglob(pev->point.function));
        ret = 0;
        pr_info("Added new event%s\n", (ntevs > 1) ? "s:" : ":");
        for (i = 0; i < ntevs; i++) {
                tev = &tevs[i];
-               /* Ensure that the address is NOT blacklisted */
-               node = kprobe_blacklist__find_by_address(&blacklist,
-                                                        tev->point.address);
-               if (node) {
-                       pr_warning("Warning: Skipped probing on blacklisted function: %s\n", node->symbol);
+               /* Skip if the symbol is out of .text or blacklisted */
+               if (!tev->point.symbol)
                        continue;
-               }
 
                if (pev->event)
                        event = pev->event;
                else
-                       if (pev->point.function)
+                       if (safename)
                                event = pev->point.function;
                        else
-                               event = tev->point.symbol;
+                               event = tev->point.realname;
                if (pev->group)
                        group = pev->group;
                else
@@ -2397,15 +2543,12 @@ static int __add_probe_trace_events(struct perf_probe_event *pev,
                /* Add added event name to namelist */
                strlist__add(namelist, event);
 
-               /* Trick here - save current event/group */
-               event = pev->event;
-               group = pev->group;
-               pev->event = tev->event;
-               pev->group = tev->group;
-               show_perf_probe_event(pev, tev->point.module);
-               /* Trick here - restore current event/group */
-               pev->event = (char *)event;
-               pev->group = (char *)group;
+               /* We use tev's name for showing new events */
+               show_perf_probe_event(tev->group, tev->event, pev,
+                                     tev->point.module, false);
+               /* Save the last valid name */
+               event = tev->event;
+               group = tev->group;
 
                /*
                 * Probes after the first probe which comes from same
@@ -2419,26 +2562,34 @@ static int __add_probe_trace_events(struct perf_probe_event *pev,
                warn_uprobe_event_compat(tev);
 
        /* Note that it is possible to skip all events because of blacklist */
-       if (ret >= 0 && tev->event) {
+       if (ret >= 0 && event) {
                /* Show how to use the event. */
                pr_info("\nYou can now use it in all perf tools, such as:\n\n");
-               pr_info("\tperf record -e %s:%s -aR sleep 1\n\n", tev->group,
-                        tev->event);
+               pr_info("\tperf record -e %s:%s -aR sleep 1\n\n", group, event);
        }
 
-       kprobe_blacklist__delete(&blacklist);
        strlist__delete(namelist);
+close_out:
        close(fd);
        return ret;
 }
 
-static int find_probe_functions(struct map *map, char *name)
+static int find_probe_functions(struct map *map, char *name,
+                               struct symbol **syms)
 {
        int found = 0;
        struct symbol *sym;
+       struct rb_node *tmp;
+
+       if (map__load(map, NULL) < 0)
+               return 0;
 
-       map__for_each_symbol_by_name(map, name, sym) {
-               found++;
+       map__for_each_symbol(map, sym, tmp) {
+               if (strglobmatch(sym->name, name)) {
+                       found++;
+                       if (syms && found < probe_conf.max_probes)
+                               syms[found - 1] = sym;
+               }
        }
 
        return found;
@@ -2447,42 +2598,52 @@ static int find_probe_functions(struct map *map, char *name)
 #define strdup_or_goto(str, label)     \
        ({ char *__p = strdup(str); if (!__p) goto label; __p; })
 
+void __weak arch__fix_tev_from_maps(struct perf_probe_event *pev __maybe_unused,
+                               struct probe_trace_event *tev __maybe_unused,
+                               struct map *map __maybe_unused) { }
+
 /*
  * Find probe function addresses from map.
  * Return an error or the number of found probe_trace_event
  */
 static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
-                                           struct probe_trace_event **tevs,
-                                           int max_tevs, const char *target)
+                                           struct probe_trace_event **tevs)
 {
        struct map *map = NULL;
        struct ref_reloc_sym *reloc_sym = NULL;
        struct symbol *sym;
+       struct symbol **syms = NULL;
        struct probe_trace_event *tev;
        struct perf_probe_point *pp = &pev->point;
        struct probe_trace_point *tp;
        int num_matched_functions;
-       int ret, i;
+       int ret, i, j, skipped = 0;
 
-       map = get_target_map(target, pev->uprobes);
+       map = get_target_map(pev->target, pev->uprobes);
        if (!map) {
                ret = -EINVAL;
                goto out;
        }
 
+       syms = malloc(sizeof(struct symbol *) * probe_conf.max_probes);
+       if (!syms) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
        /*
         * Load matched symbols: Since the different local symbols may have
         * same name but different addresses, this lists all the symbols.
         */
-       num_matched_functions = find_probe_functions(map, pp->function);
+       num_matched_functions = find_probe_functions(map, pp->function, syms);
        if (num_matched_functions == 0) {
                pr_err("Failed to find symbol %s in %s\n", pp->function,
-                       target ? : "kernel");
+                       pev->target ? : "kernel");
                ret = -ENOENT;
                goto out;
-       } else if (num_matched_functions > max_tevs) {
+       } else if (num_matched_functions > probe_conf.max_probes) {
                pr_err("Too many functions matched in %s\n",
-                       target ? : "kernel");
+                       pev->target ? : "kernel");
                ret = -E2BIG;
                goto out;
        }
@@ -2505,7 +2666,9 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
 
        ret = 0;
 
-       map__for_each_symbol_by_name(map, pp->function, sym) {
+       for (j = 0; j < num_matched_functions; j++) {
+               sym = syms[j];
+
                tev = (*tevs) + ret;
                tp = &tev->point;
                if (ret == num_matched_functions) {
@@ -2522,16 +2685,24 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
                }
                /* Add one probe point */
                tp->address = map->unmap_ip(map, sym->start) + pp->offset;
-               if (reloc_sym) {
+               /* If we found a wrong one, mark it by NULL symbol */
+               if (!pev->uprobes &&
+                   kprobe_warn_out_range(sym->name, tp->address)) {
+                       tp->symbol = NULL;      /* Skip it */
+                       skipped++;
+               } else if (reloc_sym) {
                        tp->symbol = strdup_or_goto(reloc_sym->name, nomem_out);
                        tp->offset = tp->address - reloc_sym->addr;
                } else {
                        tp->symbol = strdup_or_goto(sym->name, nomem_out);
                        tp->offset = pp->offset;
                }
+               tp->realname = strdup_or_goto(sym->name, nomem_out);
+
                tp->retprobe = pp->retprobe;
-               if (target)
-                       tev->point.module = strdup_or_goto(target, nomem_out);
+               if (pev->target)
+                       tev->point.module = strdup_or_goto(pev->target,
+                                                          nomem_out);
                tev->uprobes = pev->uprobes;
                tev->nargs = pev->nargs;
                if (tev->nargs) {
@@ -2553,10 +2724,16 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
                                        strdup_or_goto(pev->args[i].type,
                                                        nomem_out);
                }
+               arch__fix_tev_from_maps(pev, tev, map);
+       }
+       if (ret == skipped) {
+               ret = -ENOENT;
+               goto err_out;
        }
 
 out:
        put_target_map(map, pev->uprobes);
+       free(syms);
        return ret;
 
 nomem_out:
@@ -2567,27 +2744,34 @@ err_out:
        goto out;
 }
 
+bool __weak arch__prefers_symtab(void) { return false; }
+
 static int convert_to_probe_trace_events(struct perf_probe_event *pev,
-                                         struct probe_trace_event **tevs,
-                                         int max_tevs, const char *target)
+                                        struct probe_trace_event **tevs)
 {
        int ret;
 
        if (pev->uprobes && !pev->group) {
                /* Replace group name if not given */
-               ret = convert_exec_to_group(target, &pev->group);
+               ret = convert_exec_to_group(pev->target, &pev->group);
                if (ret != 0) {
                        pr_warning("Failed to make a group name.\n");
                        return ret;
                }
        }
 
+       if (arch__prefers_symtab() && !perf_probe_event_need_dwarf(pev)) {
+               ret = find_probe_trace_events_from_map(pev, tevs);
+               if (ret > 0)
+                       return ret; /* Found in symbol table */
+       }
+
        /* Convert perf_probe_event with debuginfo */
-       ret = try_to_find_probe_trace_events(pev, tevs, max_tevs, target);
+       ret = try_to_find_probe_trace_events(pev, tevs);
        if (ret != 0)
                return ret;     /* Found in debuginfo or got an error */
 
-       return find_probe_trace_events_from_map(pev, tevs, max_tevs, target);
+       return find_probe_trace_events_from_map(pev, tevs);
 }
 
 struct __event_package {
@@ -2596,8 +2780,7 @@ struct __event_package {
        int                             ntevs;
 };
 
-int add_perf_probe_events(struct perf_probe_event *pevs, int npevs,
-                         int max_tevs, bool force_add)
+int add_perf_probe_events(struct perf_probe_event *pevs, int npevs)
 {
        int i, j, ret;
        struct __event_package *pkgs;
@@ -2617,20 +2800,24 @@ int add_perf_probe_events(struct perf_probe_event *pevs, int npevs,
        /* Loop 1: convert all events */
        for (i = 0; i < npevs; i++) {
                pkgs[i].pev = &pevs[i];
+               /* Init kprobe blacklist if needed */
+               if (!pkgs[i].pev->uprobes)
+                       kprobe_blacklist__init();
                /* Convert with or without debuginfo */
                ret  = convert_to_probe_trace_events(pkgs[i].pev,
-                                                    &pkgs[i].tevs,
-                                                    max_tevs,
-                                                    pkgs[i].pev->target);
+                                                    &pkgs[i].tevs);
                if (ret < 0)
                        goto end;
                pkgs[i].ntevs = ret;
        }
+       /* This just release blacklist only if allocated */
+       kprobe_blacklist__release();
 
        /* Loop 2: add all events */
        for (i = 0; i < npevs; i++) {
                ret = __add_probe_trace_events(pkgs[i].pev, pkgs[i].tevs,
-                                               pkgs[i].ntevs, force_add);
+                                              pkgs[i].ntevs,
+                                              probe_conf.force_add);
                if (ret < 0)
                        break;
        }
@@ -2682,40 +2869,39 @@ error:
        return ret;
 }
 
-static int del_trace_probe_event(int fd, const char *buf,
-                                                 struct strlist *namelist)
+static int del_trace_probe_events(int fd, struct strfilter *filter,
+                                 struct strlist *namelist)
 {
-       struct str_node *ent, *n;
-       int ret = -1;
+       struct str_node *ent;
+       const char *p;
+       int ret = -ENOENT;
 
-       if (strpbrk(buf, "*?")) { /* Glob-exp */
-               strlist__for_each_safe(ent, n, namelist)
-                       if (strglobmatch(ent->s, buf)) {
-                               ret = __del_trace_probe_event(fd, ent);
-                               if (ret < 0)
-                                       break;
-                               strlist__remove(namelist, ent);
-                       }
-       } else {
-               ent = strlist__find(namelist, buf);
-               if (ent) {
+       if (!namelist)
+               return -ENOENT;
+
+       strlist__for_each(ent, namelist) {
+               p = strchr(ent->s, ':');
+               if ((p && strfilter__compare(filter, p + 1)) ||
+                   strfilter__compare(filter, ent->s)) {
                        ret = __del_trace_probe_event(fd, ent);
-                       if (ret >= 0)
-                               strlist__remove(namelist, ent);
+                       if (ret < 0)
+                               break;
                }
        }
 
        return ret;
 }
 
-int del_perf_probe_events(struct strlist *dellist)
+int del_perf_probe_events(struct strfilter *filter)
 {
-       int ret = -1, ufd = -1, kfd = -1;
-       char buf[128];
-       const char *group, *event;
-       char *p, *str;
-       struct str_node *ent;
+       int ret, ret2, ufd = -1, kfd = -1;
        struct strlist *namelist = NULL, *unamelist = NULL;
+       char *str = strfilter__string(filter);
+
+       if (!str)
+               return -EINVAL;
+
+       pr_debug("Delete filter: \'%s\'\n", str);
 
        /* Get current event names */
        kfd = open_kprobe_events(true);
@@ -2728,49 +2914,23 @@ int del_perf_probe_events(struct strlist *dellist)
 
        if (kfd < 0 && ufd < 0) {
                print_both_open_warning(kfd, ufd);
+               ret = kfd;
                goto error;
        }
 
-       if (namelist == NULL && unamelist == NULL)
+       ret = del_trace_probe_events(kfd, filter, namelist);
+       if (ret < 0 && ret != -ENOENT)
                goto error;
 
-       strlist__for_each(ent, dellist) {
-               str = strdup(ent->s);
-               if (str == NULL) {
-                       ret = -ENOMEM;
-                       goto error;
-               }
-               pr_debug("Parsing: %s\n", str);
-               p = strchr(str, ':');
-               if (p) {
-                       group = str;
-                       *p = '\0';
-                       event = p + 1;
-               } else {
-                       group = "*";
-                       event = str;
-               }
-
-               ret = e_snprintf(buf, 128, "%s:%s", group, event);
-               if (ret < 0) {
-                       pr_err("Failed to copy event.");
-                       free(str);
-                       goto error;
-               }
-
-               pr_debug("Group: %s, Event: %s\n", group, event);
-
-               if (namelist)
-                       ret = del_trace_probe_event(kfd, buf, namelist);
-
-               if (unamelist && ret != 0)
-                       ret = del_trace_probe_event(ufd, buf, unamelist);
-
-               if (ret != 0)
-                       pr_info("Info: Event \"%s\" does not exist.\n", buf);
-
-               free(str);
+       ret2 = del_trace_probe_events(ufd, filter, unamelist);
+       if (ret2 < 0 && ret2 != -ENOENT) {
+               ret = ret2;
+               goto error;
        }
+       if (ret == -ENOENT && ret2 == -ENOENT)
+               pr_debug("\"%s\" does not hit any event.\n", str);
+               /* Note that this is silently ignored */
+       ret = 0;
 
 error:
        if (kfd >= 0) {
@@ -2782,6 +2942,7 @@ error:
                strlist__delete(unamelist);
                close(ufd);
        }
+       free(str);
 
        return ret;
 }
@@ -2835,8 +2996,7 @@ int show_available_funcs(const char *target, struct strfilter *_filter,
        dso__fprintf_symbols_by_name(map->dso, map->type, stdout);
 end:
        if (user) {
-               dso__delete(map->dso);
-               map__delete(map);
+               map__put(map);
        }
        exit_symbol_maps();
 
index d6b7834..31db6ee 100644 (file)
@@ -6,10 +6,20 @@
 #include "strlist.h"
 #include "strfilter.h"
 
+/* Probe related configurations */
+struct probe_conf {
+       bool    show_ext_vars;
+       bool    show_location_range;
+       bool    force_add;
+       bool    no_inlines;
+       int     max_probes;
+};
+extern struct probe_conf probe_conf;
 extern bool probe_event_dry_run;
 
 /* kprobe-tracer and uprobe-tracer tracing point */
 struct probe_trace_point {
+       char            *realname;      /* function real name (if needed) */
        char            *symbol;        /* Base symbol */
        char            *module;        /* Module name */
        unsigned long   offset;         /* Offset from symbol */
@@ -121,20 +131,18 @@ extern void line_range__clear(struct line_range *lr);
 /* Initialize line range */
 extern int line_range__init(struct line_range *lr);
 
-/* Internal use: Return kernel/module path */
-extern const char *kernel_get_module_path(const char *module);
-
-extern int add_perf_probe_events(struct perf_probe_event *pevs, int npevs,
-                                int max_probe_points, bool force_add);
-extern int del_perf_probe_events(struct strlist *dellist);
-extern int show_perf_probe_events(void);
+extern int add_perf_probe_events(struct perf_probe_event *pevs, int npevs);
+extern int del_perf_probe_events(struct strfilter *filter);
+extern int show_perf_probe_events(struct strfilter *filter);
 extern int show_line_range(struct line_range *lr, const char *module,
                           bool user);
 extern int show_available_vars(struct perf_probe_event *pevs, int npevs,
-                              int max_probe_points, const char *module,
-                              struct strfilter *filter, bool externs);
+                              struct strfilter *filter);
 extern int show_available_funcs(const char *module, struct strfilter *filter,
                                bool user);
+bool arch__prefers_symtab(void);
+void arch__fix_tev_from_maps(struct perf_probe_event *pev,
+                            struct probe_trace_event *tev, struct map *map);
 
 /* Maximum index number of event-name postfix */
 #define MAX_EVENT_INDEX        1024
index b5bf9d5..2da65a7 100644 (file)
@@ -130,7 +130,7 @@ struct debuginfo *debuginfo__new(const char *path)
                        continue;
                dinfo = __debuginfo__new(buf);
        }
-       dso__delete(dso);
+       dso__put(dso);
 
 out:
        /* if failed to open all distro debuginfo, open given binary */
@@ -177,7 +177,7 @@ static int convert_variable_location(Dwarf_Die *vr_die, Dwarf_Addr addr,
        Dwarf_Word offs = 0;
        bool ref = false;
        const char *regs;
-       int ret;
+       int ret, ret2 = 0;
 
        if (dwarf_attr(vr_die, DW_AT_external, &attr) != NULL)
                goto static_var;
@@ -187,9 +187,19 @@ static int convert_variable_location(Dwarf_Die *vr_die, Dwarf_Addr addr,
                return -EINVAL; /* Broken DIE ? */
        if (dwarf_getlocation_addr(&attr, addr, &op, &nops, 1) <= 0) {
                ret = dwarf_entrypc(sp_die, &tmp);
-               if (ret || addr != tmp ||
-                   dwarf_tag(vr_die) != DW_TAG_formal_parameter ||
-                   dwarf_highpc(sp_die, &tmp))
+               if (ret)
+                       return -ENOENT;
+
+               if (probe_conf.show_location_range &&
+                       (dwarf_tag(vr_die) == DW_TAG_variable)) {
+                       ret2 = -ERANGE;
+               } else if (addr != tmp ||
+                       dwarf_tag(vr_die) != DW_TAG_formal_parameter) {
+                       return -ENOENT;
+               }
+
+               ret = dwarf_highpc(sp_die, &tmp);
+               if (ret)
                        return -ENOENT;
                /*
                 * This is fuzzed by fentry mcount. We try to find the
@@ -210,7 +220,7 @@ found:
        if (op->atom == DW_OP_addr) {
 static_var:
                if (!tvar)
-                       return 0;
+                       return ret2;
                /* Static variables on memory (not stack), make @varname */
                ret = strlen(dwarf_diename(vr_die));
                tvar->value = zalloc(ret + 2);
@@ -220,7 +230,7 @@ static_var:
                tvar->ref = alloc_trace_arg_ref((long)offs);
                if (tvar->ref == NULL)
                        return -ENOMEM;
-               return 0;
+               return ret2;
        }
 
        /* If this is based on frame buffer, set the offset */
@@ -250,14 +260,14 @@ static_var:
        }
 
        if (!tvar)
-               return 0;
+               return ret2;
 
        regs = get_arch_regstr(regn);
        if (!regs) {
                /* This should be a bug in DWARF or this tool */
                pr_warning("Mapping for the register number %u "
                           "missing on this architecture.\n", regn);
-               return -ERANGE;
+               return -ENOTSUP;
        }
 
        tvar->value = strdup(regs);
@@ -269,7 +279,7 @@ static_var:
                if (tvar->ref == NULL)
                        return -ENOMEM;
        }
-       return 0;
+       return ret2;
 }
 
 #define BYTES_TO_BITS(nb)      ((nb) * BITS_PER_LONG / sizeof(long))
@@ -517,10 +527,12 @@ static int convert_variable(Dwarf_Die *vr_die, struct probe_finder *pf)
 
        ret = convert_variable_location(vr_die, pf->addr, pf->fb_ops,
                                        &pf->sp_die, pf->tvar);
-       if (ret == -ENOENT || ret == -EINVAL)
-               pr_err("Failed to find the location of %s at this address.\n"
-                      " Perhaps, it has been optimized out.\n", pf->pvar->var);
-       else if (ret == -ENOTSUP)
+       if (ret == -ENOENT || ret == -EINVAL) {
+               pr_err("Failed to find the location of the '%s' variable at this address.\n"
+                      " Perhaps it has been optimized out.\n"
+                      " Use -V with the --range option to show '%s' location range.\n",
+                      pf->pvar->var, pf->pvar->var);
+       } else if (ret == -ENOTSUP)
                pr_err("Sorry, we don't support this variable location yet.\n");
        else if (ret == 0 && pf->pvar->field) {
                ret = convert_variable_fields(vr_die, pf->pvar->var,
@@ -578,10 +590,12 @@ static int find_variable(Dwarf_Die *sc_die, struct probe_finder *pf)
        /* Search child die for local variables and parameters. */
        if (!die_find_variable_at(sc_die, pf->pvar->var, pf->addr, &vr_die)) {
                /* Search again in global variables */
-               if (!die_find_variable_at(&pf->cu_die, pf->pvar->var, 0, &vr_die))
+               if (!die_find_variable_at(&pf->cu_die, pf->pvar->var,
+                                               0, &vr_die)) {
                        pr_warning("Failed to find '%s' in this function.\n",
                                   pf->pvar->var);
                        ret = -ENOENT;
+               }
        }
        if (ret >= 0)
                ret = convert_variable(&vr_die, pf);
@@ -660,9 +674,15 @@ static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf)
        /* If not a real subprogram, find a real one */
        if (!die_is_func_def(sc_die)) {
                if (!die_find_realfunc(&pf->cu_die, pf->addr, &pf->sp_die)) {
-                       pr_warning("Failed to find probe point in any "
-                                  "functions.\n");
-                       return -ENOENT;
+                       if (die_find_tailfunc(&pf->cu_die, pf->addr, &pf->sp_die)) {
+                               pr_warning("Ignoring tail call from %s\n",
+                                               dwarf_diename(&pf->sp_die));
+                               return 0;
+                       } else {
+                               pr_warning("Failed to find probe point in any "
+                                          "functions.\n");
+                               return -ENOENT;
+                       }
                }
        } else
                memcpy(&pf->sp_die, sc_die, sizeof(Dwarf_Die));
@@ -717,7 +737,7 @@ static int find_best_scope_cb(Dwarf_Die *fn_die, void *data)
        }
        /* If the function name is given, that's what user expects */
        if (fsp->function) {
-               if (die_compare_name(fn_die, fsp->function)) {
+               if (die_match_name(fn_die, fsp->function)) {
                        memcpy(fsp->die_mem, fn_die, sizeof(Dwarf_Die));
                        fsp->found = true;
                        return 1;
@@ -920,13 +940,14 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data)
 
        /* Check tag and diename */
        if (!die_is_func_def(sp_die) ||
-           !die_compare_name(sp_die, pp->function))
+           !die_match_name(sp_die, pp->function))
                return DWARF_CB_OK;
 
        /* Check declared file */
        if (pp->file && strtailcmp(pp->file, dwarf_decl_file(sp_die)))
                return DWARF_CB_OK;
 
+       pr_debug("Matched function: %s\n", dwarf_diename(sp_die));
        pf->fname = dwarf_decl_file(sp_die);
        if (pp->line) { /* Function relative line */
                dwarf_decl_line(sp_die, &pf->lno);
@@ -943,10 +964,20 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data)
                        /* TODO: Check the address in this function */
                        param->retval = call_probe_finder(sp_die, pf);
                }
-       } else
+       } else if (!probe_conf.no_inlines) {
                /* Inlined function: search instances */
                param->retval = die_walk_instances(sp_die,
                                        probe_point_inline_cb, (void *)pf);
+               /* This could be a non-existed inline definition */
+               if (param->retval == -ENOENT && strisglob(pp->function))
+                       param->retval = 0;
+       }
+
+       /* We need to find other candidates */
+       if (strisglob(pp->function) && param->retval >= 0) {
+               param->retval = 0;      /* We have to clear the result */
+               return DWARF_CB_OK;
+       }
 
        return DWARF_CB_ABORT; /* Exit; no same symbol in this CU. */
 }
@@ -975,7 +1006,7 @@ static int pubname_search_cb(Dwarf *dbg, Dwarf_Global *gl, void *data)
                if (dwarf_tag(param->sp_die) != DW_TAG_subprogram)
                        return DWARF_CB_OK;
 
-               if (die_compare_name(param->sp_die, param->function)) {
+               if (die_match_name(param->sp_die, param->function)) {
                        if (!dwarf_offdie(dbg, gl->cu_offset, param->cu_die))
                                return DWARF_CB_OK;
 
@@ -1028,7 +1059,7 @@ static int debuginfo__find_probes(struct debuginfo *dbg,
                return -ENOMEM;
 
        /* Fastpath: lookup by function name from .debug_pubnames section */
-       if (pp->function) {
+       if (pp->function && !strisglob(pp->function)) {
                struct pubname_callback_param pubname_param = {
                        .function = pp->function,
                        .file     = pp->file,
@@ -1087,6 +1118,7 @@ found:
 struct local_vars_finder {
        struct probe_finder *pf;
        struct perf_probe_arg *args;
+       bool vars;
        int max_args;
        int nargs;
        int ret;
@@ -1101,7 +1133,7 @@ static int copy_variables_cb(Dwarf_Die *die_mem, void *data)
 
        tag = dwarf_tag(die_mem);
        if (tag == DW_TAG_formal_parameter ||
-           tag == DW_TAG_variable) {
+           (tag == DW_TAG_variable && vf->vars)) {
                if (convert_variable_location(die_mem, vf->pf->addr,
                                              vf->pf->fb_ops, &pf->sp_die,
                                              NULL) == 0) {
@@ -1127,26 +1159,28 @@ static int expand_probe_args(Dwarf_Die *sc_die, struct probe_finder *pf,
        Dwarf_Die die_mem;
        int i;
        int n = 0;
-       struct local_vars_finder vf = {.pf = pf, .args = args,
+       struct local_vars_finder vf = {.pf = pf, .args = args, .vars = false,
                                .max_args = MAX_PROBE_ARGS, .ret = 0};
 
        for (i = 0; i < pf->pev->nargs; i++) {
                /* var never be NULL */
-               if (strcmp(pf->pev->args[i].var, "$vars") == 0) {
-                       pr_debug("Expanding $vars into:");
-                       vf.nargs = n;
-                       /* Special local variables */
-                       die_find_child(sc_die, copy_variables_cb, (void *)&vf,
-                                      &die_mem);
-                       pr_debug(" (%d)\n", vf.nargs - n);
-                       if (vf.ret < 0)
-                               return vf.ret;
-                       n = vf.nargs;
-               } else {
+               if (strcmp(pf->pev->args[i].var, PROBE_ARG_VARS) == 0)
+                       vf.vars = true;
+               else if (strcmp(pf->pev->args[i].var, PROBE_ARG_PARAMS) != 0) {
                        /* Copy normal argument */
                        args[n] = pf->pev->args[i];
                        n++;
+                       continue;
                }
+               pr_debug("Expanding %s into:", pf->pev->args[i].var);
+               vf.nargs = n;
+               /* Special local variables */
+               die_find_child(sc_die, copy_variables_cb, (void *)&vf,
+                              &die_mem);
+               pr_debug(" (%d)\n", vf.nargs - n);
+               if (vf.ret < 0)
+                       return vf.ret;
+               n = vf.nargs;
        }
        return n;
 }
@@ -1174,6 +1208,10 @@ static int add_probe_trace_event(Dwarf_Die *sc_die, struct probe_finder *pf)
        if (ret < 0)
                return ret;
 
+       tev->point.realname = strdup(dwarf_diename(sc_die));
+       if (!tev->point.realname)
+               return -ENOMEM;
+
        pr_debug("Probe point found: %s+%lu\n", tev->point.symbol,
                 tev->point.offset);
 
@@ -1211,15 +1249,15 @@ end:
 /* Find probe_trace_events specified by perf_probe_event from debuginfo */
 int debuginfo__find_trace_events(struct debuginfo *dbg,
                                 struct perf_probe_event *pev,
-                                struct probe_trace_event **tevs, int max_tevs)
+                                struct probe_trace_event **tevs)
 {
        struct trace_event_finder tf = {
                        .pf = {.pev = pev, .callback = add_probe_trace_event},
-                       .mod = dbg->mod, .max_tevs = max_tevs};
+                       .max_tevs = probe_conf.max_probes, .mod = dbg->mod};
        int ret;
 
        /* Allocate result tevs array */
-       *tevs = zalloc(sizeof(struct probe_trace_event) * max_tevs);
+       *tevs = zalloc(sizeof(struct probe_trace_event) * tf.max_tevs);
        if (*tevs == NULL)
                return -ENOMEM;
 
@@ -1235,14 +1273,11 @@ int debuginfo__find_trace_events(struct debuginfo *dbg,
        return (ret < 0) ? ret : tf.ntevs;
 }
 
-#define MAX_VAR_LEN 64
-
 /* Collect available variables in this scope */
 static int collect_variables_cb(Dwarf_Die *die_mem, void *data)
 {
        struct available_var_finder *af = data;
        struct variable_list *vl;
-       char buf[MAX_VAR_LEN];
        int tag, ret;
 
        vl = &af->vls[af->nvls - 1];
@@ -1253,11 +1288,38 @@ static int collect_variables_cb(Dwarf_Die *die_mem, void *data)
                ret = convert_variable_location(die_mem, af->pf.addr,
                                                af->pf.fb_ops, &af->pf.sp_die,
                                                NULL);
-               if (ret == 0) {
-                       ret = die_get_varname(die_mem, buf, MAX_VAR_LEN);
-                       pr_debug2("Add new var: %s\n", buf);
-                       if (ret > 0)
-                               strlist__add(vl->vars, buf);
+               if (ret == 0 || ret == -ERANGE) {
+                       int ret2;
+                       bool externs = !af->child;
+                       struct strbuf buf;
+
+                       strbuf_init(&buf, 64);
+
+                       if (probe_conf.show_location_range) {
+                               if (!externs) {
+                                       if (ret)
+                                               strbuf_addf(&buf, "[INV]\t");
+                                       else
+                                               strbuf_addf(&buf, "[VAL]\t");
+                               } else
+                                       strbuf_addf(&buf, "[EXT]\t");
+                       }
+
+                       ret2 = die_get_varname(die_mem, &buf);
+
+                       if (!ret2 && probe_conf.show_location_range &&
+                               !externs) {
+                               strbuf_addf(&buf, "\t");
+                               ret2 = die_get_var_range(&af->pf.sp_die,
+                                                       die_mem, &buf);
+                       }
+
+                       pr_debug("Add new var: %s\n", buf.buf);
+                       if (ret2 == 0) {
+                               strlist__add(vl->vars,
+                                       strbuf_detach(&buf, NULL));
+                       }
+                       strbuf_release(&buf);
                }
        }
 
@@ -1300,9 +1362,9 @@ static int add_available_vars(Dwarf_Die *sc_die, struct probe_finder *pf)
        die_find_child(sc_die, collect_variables_cb, (void *)af, &die_mem);
 
        /* Find external variables */
-       if (!af->externs)
+       if (!probe_conf.show_ext_vars)
                goto out;
-       /* Don't need to search child DIE for externs. */
+       /* Don't need to search child DIE for external vars. */
        af->child = false;
        die_find_child(&pf->cu_die, collect_variables_cb, (void *)af, &die_mem);
 
@@ -1322,17 +1384,16 @@ out:
  */
 int debuginfo__find_available_vars_at(struct debuginfo *dbg,
                                      struct perf_probe_event *pev,
-                                     struct variable_list **vls,
-                                     int max_vls, bool externs)
+                                     struct variable_list **vls)
 {
        struct available_var_finder af = {
                        .pf = {.pev = pev, .callback = add_available_vars},
                        .mod = dbg->mod,
-                       .max_vls = max_vls, .externs = externs};
+                       .max_vls = probe_conf.max_probes};
        int ret;
 
        /* Allocate result vls array */
-       *vls = zalloc(sizeof(struct variable_list) * max_vls);
+       *vls = zalloc(sizeof(struct variable_list) * af.max_vls);
        if (*vls == NULL)
                return -ENOMEM;
 
@@ -1533,7 +1594,7 @@ static int line_range_search_cb(Dwarf_Die *sp_die, void *data)
                return DWARF_CB_OK;
 
        if (die_is_func_def(sp_die) &&
-           die_compare_name(sp_die, lr->function)) {
+           die_match_name(sp_die, lr->function)) {
                lf->fname = dwarf_decl_file(sp_die);
                dwarf_decl_line(sp_die, &lr->offset);
                pr_debug("fname: %s, lineno:%d\n", lf->fname, lr->offset);
index ebf8c8c..bed8271 100644 (file)
@@ -10,6 +10,9 @@
 #define MAX_PROBES              128
 #define MAX_PROBE_ARGS          128
 
+#define PROBE_ARG_VARS         "$vars"
+#define PROBE_ARG_PARAMS       "$params"
+
 static inline int is_c_varname(const char *name)
 {
        /* TODO */
@@ -37,8 +40,7 @@ extern void debuginfo__delete(struct debuginfo *dbg);
 /* Find probe_trace_events specified by perf_probe_event from debuginfo */
 extern int debuginfo__find_trace_events(struct debuginfo *dbg,
                                        struct perf_probe_event *pev,
-                                       struct probe_trace_event **tevs,
-                                       int max_tevs);
+                                       struct probe_trace_event **tevs);
 
 /* Find a perf_probe_point from debuginfo */
 extern int debuginfo__find_probe_point(struct debuginfo *dbg,
@@ -52,8 +54,7 @@ extern int debuginfo__find_line_range(struct debuginfo *dbg,
 /* Find available variables */
 extern int debuginfo__find_available_vars_at(struct debuginfo *dbg,
                                             struct perf_probe_event *pev,
-                                            struct variable_list **vls,
-                                            int max_points, bool externs);
+                                            struct variable_list **vls);
 
 /* Find a src file from a DWARF tag path */
 int get_real_path(const char *raw_path, const char *comp_dir,
@@ -96,7 +97,6 @@ struct available_var_finder {
        struct variable_list    *vls;           /* Found variable lists */
        int                     nvls;           /* Number of variable lists */
        int                     max_vls;        /* Max no. of variable lists */
-       bool                    externs;        /* Find external vars too */
        bool                    child;          /* Search child scopes */
 };
 
index a126e6c..b234a6e 100644 (file)
@@ -74,3 +74,10 @@ void *pstack__pop(struct pstack *pstack)
        pstack->entries[pstack->top] = NULL;
        return ret;
 }
+
+void *pstack__peek(struct pstack *pstack)
+{
+       if (pstack->top == 0)
+               return NULL;
+       return pstack->entries[pstack->top - 1];
+}
index c3cb658..ded7f2e 100644 (file)
@@ -10,5 +10,6 @@ bool pstack__empty(const struct pstack *pstack);
 void pstack__remove(struct pstack *pstack, void *key);
 void pstack__push(struct pstack *pstack, void *key);
 void *pstack__pop(struct pstack *pstack);
+void *pstack__peek(struct pstack *pstack);
 
 #endif /* _PERF_PSTACK_ */
index 4d28624..5925fec 100644 (file)
@@ -16,6 +16,7 @@ util/util.c
 util/xyarray.c
 util/cgroup.c
 util/rblist.c
+util/stat.c
 util/strlist.c
 util/trace-event.c
 ../../lib/rbtree.c
index 8acd0df..d457c52 100644 (file)
@@ -20,7 +20,7 @@ static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str)
        if (!evlist)
                return -ENOMEM;
 
-       if (parse_events(evlist, str))
+       if (parse_events(evlist, str, NULL))
                goto out_delete;
 
        evsel = perf_evlist__first(evlist);
@@ -119,7 +119,16 @@ void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts)
                        evsel->attr.comm_exec = 1;
        }
 
-       if (evlist->nr_entries > 1) {
+       if (opts->full_auxtrace) {
+               /*
+                * Need to be able to synthesize and parse selected events with
+                * arbitrary sample types, which requires always being able to
+                * match the id.
+                */
+               use_sample_identifier = perf_can_sample_identifier();
+               evlist__for_each(evlist, evsel)
+                       perf_evsel__set_sample_id(evsel, use_sample_identifier);
+       } else if (evlist->nr_entries > 1) {
                struct perf_evsel *first = perf_evlist__first(evlist);
 
                evlist__for_each(evlist, evsel) {
@@ -207,7 +216,7 @@ bool perf_evlist__can_select_event(struct perf_evlist *evlist, const char *str)
        if (!temp_evlist)
                return false;
 
-       err = parse_events(temp_evlist, str);
+       err = parse_events(temp_evlist, str, NULL);
        if (err)
                goto out_delete;
 
index 0c74012..aa482c1 100644 (file)
 #include "cpumap.h"
 #include "perf_regs.h"
 #include "asm/bug.h"
+#include "auxtrace.h"
+#include "thread-stack.h"
 
-static int machines__deliver_event(struct machines *machines,
-                                  struct perf_evlist *evlist,
-                                  union perf_event *event,
-                                  struct perf_sample *sample,
-                                  struct perf_tool *tool, u64 file_offset);
+static int perf_session__deliver_event(struct perf_session *session,
+                                      union perf_event *event,
+                                      struct perf_sample *sample,
+                                      struct perf_tool *tool,
+                                      u64 file_offset);
 
 static int perf_session__open(struct perf_session *session)
 {
@@ -105,8 +107,8 @@ static int ordered_events__deliver_event(struct ordered_events *oe,
                return ret;
        }
 
-       return machines__deliver_event(&session->machines, session->evlist, event->event,
-                                      &sample, session->tool, event->file_offset);
+       return perf_session__deliver_event(session, event->event, &sample,
+                                          session->tool, event->file_offset);
 }
 
 struct perf_session *perf_session__new(struct perf_data_file *file,
@@ -119,6 +121,7 @@ struct perf_session *perf_session__new(struct perf_data_file *file,
 
        session->repipe = repipe;
        session->tool   = tool;
+       INIT_LIST_HEAD(&session->auxtrace_index);
        machines__init(&session->machines);
        ordered_events__init(&session->ordered_events, ordered_events__deliver_event);
 
@@ -185,6 +188,8 @@ static void perf_session_env__delete(struct perf_session_env *env)
 
 void perf_session__delete(struct perf_session *session)
 {
+       auxtrace__free(session);
+       auxtrace_index__free(&session->auxtrace_index);
        perf_session__destroy_kernel_maps(session);
        perf_session__delete_threads(session);
        perf_session_env__delete(&session->header.env);
@@ -262,6 +267,49 @@ static int process_id_index_stub(struct perf_tool *tool __maybe_unused,
        return 0;
 }
 
+static int process_event_auxtrace_info_stub(struct perf_tool *tool __maybe_unused,
+                               union perf_event *event __maybe_unused,
+                               struct perf_session *session __maybe_unused)
+{
+       dump_printf(": unhandled!\n");
+       return 0;
+}
+
+static int skipn(int fd, off_t n)
+{
+       char buf[4096];
+       ssize_t ret;
+
+       while (n > 0) {
+               ret = read(fd, buf, min(n, (off_t)sizeof(buf)));
+               if (ret <= 0)
+                       return ret;
+               n -= ret;
+       }
+
+       return 0;
+}
+
+static s64 process_event_auxtrace_stub(struct perf_tool *tool __maybe_unused,
+                                      union perf_event *event,
+                                      struct perf_session *session
+                                      __maybe_unused)
+{
+       dump_printf(": unhandled!\n");
+       if (perf_data_file__is_pipe(session->file))
+               skipn(perf_data_file__fd(session->file), event->auxtrace.size);
+       return event->auxtrace.size;
+}
+
+static
+int process_event_auxtrace_error_stub(struct perf_tool *tool __maybe_unused,
+                                     union perf_event *event __maybe_unused,
+                                     struct perf_session *session __maybe_unused)
+{
+       dump_printf(": unhandled!\n");
+       return 0;
+}
+
 void perf_tool__fill_defaults(struct perf_tool *tool)
 {
        if (tool->sample == NULL)
@@ -278,6 +326,12 @@ void perf_tool__fill_defaults(struct perf_tool *tool)
                tool->exit = process_event_stub;
        if (tool->lost == NULL)
                tool->lost = perf_event__process_lost;
+       if (tool->lost_samples == NULL)
+               tool->lost_samples = perf_event__process_lost_samples;
+       if (tool->aux == NULL)
+               tool->aux = perf_event__process_aux;
+       if (tool->itrace_start == NULL)
+               tool->itrace_start = perf_event__process_itrace_start;
        if (tool->read == NULL)
                tool->read = process_event_sample_stub;
        if (tool->throttle == NULL)
@@ -298,6 +352,12 @@ void perf_tool__fill_defaults(struct perf_tool *tool)
        }
        if (tool->id_index == NULL)
                tool->id_index = process_id_index_stub;
+       if (tool->auxtrace_info == NULL)
+               tool->auxtrace_info = process_event_auxtrace_info_stub;
+       if (tool->auxtrace == NULL)
+               tool->auxtrace = process_event_auxtrace_stub;
+       if (tool->auxtrace_error == NULL)
+               tool->auxtrace_error = process_event_auxtrace_error_stub;
 }
 
 static void swap_sample_id_all(union perf_event *event, void *data)
@@ -390,6 +450,26 @@ static void perf_event__read_swap(union perf_event *event, bool sample_id_all)
                swap_sample_id_all(event, &event->read + 1);
 }
 
+static void perf_event__aux_swap(union perf_event *event, bool sample_id_all)
+{
+       event->aux.aux_offset = bswap_64(event->aux.aux_offset);
+       event->aux.aux_size   = bswap_64(event->aux.aux_size);
+       event->aux.flags      = bswap_64(event->aux.flags);
+
+       if (sample_id_all)
+               swap_sample_id_all(event, &event->aux + 1);
+}
+
+static void perf_event__itrace_start_swap(union perf_event *event,
+                                         bool sample_id_all)
+{
+       event->itrace_start.pid  = bswap_32(event->itrace_start.pid);
+       event->itrace_start.tid  = bswap_32(event->itrace_start.tid);
+
+       if (sample_id_all)
+               swap_sample_id_all(event, &event->itrace_start + 1);
+}
+
 static void perf_event__throttle_swap(union perf_event *event,
                                      bool sample_id_all)
 {
@@ -438,19 +518,42 @@ void perf_event__attr_swap(struct perf_event_attr *attr)
 {
        attr->type              = bswap_32(attr->type);
        attr->size              = bswap_32(attr->size);
-       attr->config            = bswap_64(attr->config);
-       attr->sample_period     = bswap_64(attr->sample_period);
-       attr->sample_type       = bswap_64(attr->sample_type);
-       attr->read_format       = bswap_64(attr->read_format);
-       attr->wakeup_events     = bswap_32(attr->wakeup_events);
-       attr->bp_type           = bswap_32(attr->bp_type);
-       attr->bp_addr           = bswap_64(attr->bp_addr);
-       attr->bp_len            = bswap_64(attr->bp_len);
-       attr->branch_sample_type = bswap_64(attr->branch_sample_type);
-       attr->sample_regs_user   = bswap_64(attr->sample_regs_user);
-       attr->sample_stack_user  = bswap_32(attr->sample_stack_user);
 
-       swap_bitfield((u8 *) (&attr->read_format + 1), sizeof(u64));
+#define bswap_safe(f, n)                                       \
+       (attr->size > (offsetof(struct perf_event_attr, f) +    \
+                      sizeof(attr->f) * (n)))
+#define bswap_field(f, sz)                     \
+do {                                           \
+       if (bswap_safe(f, 0))                   \
+               attr->f = bswap_##sz(attr->f);  \
+} while(0)
+#define bswap_field_32(f) bswap_field(f, 32)
+#define bswap_field_64(f) bswap_field(f, 64)
+
+       bswap_field_64(config);
+       bswap_field_64(sample_period);
+       bswap_field_64(sample_type);
+       bswap_field_64(read_format);
+       bswap_field_32(wakeup_events);
+       bswap_field_32(bp_type);
+       bswap_field_64(bp_addr);
+       bswap_field_64(bp_len);
+       bswap_field_64(branch_sample_type);
+       bswap_field_64(sample_regs_user);
+       bswap_field_32(sample_stack_user);
+       bswap_field_32(aux_watermark);
+
+       /*
+        * After read_format are bitfields. Check read_format because
+        * we are unable to use offsetof on bitfield.
+        */
+       if (bswap_safe(read_format, 1))
+               swap_bitfield((u8 *) (&attr->read_format + 1),
+                             sizeof(u64));
+#undef bswap_field_64
+#undef bswap_field_32
+#undef bswap_field
+#undef bswap_safe
 }
 
 static void perf_event__hdr_attr_swap(union perf_event *event,
@@ -478,6 +581,40 @@ static void perf_event__tracing_data_swap(union perf_event *event,
        event->tracing_data.size = bswap_32(event->tracing_data.size);
 }
 
+static void perf_event__auxtrace_info_swap(union perf_event *event,
+                                          bool sample_id_all __maybe_unused)
+{
+       size_t size;
+
+       event->auxtrace_info.type = bswap_32(event->auxtrace_info.type);
+
+       size = event->header.size;
+       size -= (void *)&event->auxtrace_info.priv - (void *)event;
+       mem_bswap_64(event->auxtrace_info.priv, size);
+}
+
+static void perf_event__auxtrace_swap(union perf_event *event,
+                                     bool sample_id_all __maybe_unused)
+{
+       event->auxtrace.size      = bswap_64(event->auxtrace.size);
+       event->auxtrace.offset    = bswap_64(event->auxtrace.offset);
+       event->auxtrace.reference = bswap_64(event->auxtrace.reference);
+       event->auxtrace.idx       = bswap_32(event->auxtrace.idx);
+       event->auxtrace.tid       = bswap_32(event->auxtrace.tid);
+       event->auxtrace.cpu       = bswap_32(event->auxtrace.cpu);
+}
+
+static void perf_event__auxtrace_error_swap(union perf_event *event,
+                                           bool sample_id_all __maybe_unused)
+{
+       event->auxtrace_error.type = bswap_32(event->auxtrace_error.type);
+       event->auxtrace_error.code = bswap_32(event->auxtrace_error.code);
+       event->auxtrace_error.cpu  = bswap_32(event->auxtrace_error.cpu);
+       event->auxtrace_error.pid  = bswap_32(event->auxtrace_error.pid);
+       event->auxtrace_error.tid  = bswap_32(event->auxtrace_error.tid);
+       event->auxtrace_error.ip   = bswap_64(event->auxtrace_error.ip);
+}
+
 typedef void (*perf_event__swap_op)(union perf_event *event,
                                    bool sample_id_all);
 
@@ -492,11 +629,17 @@ static perf_event__swap_op perf_event__swap_ops[] = {
        [PERF_RECORD_THROTTLE]            = perf_event__throttle_swap,
        [PERF_RECORD_UNTHROTTLE]          = perf_event__throttle_swap,
        [PERF_RECORD_SAMPLE]              = perf_event__all64_swap,
+       [PERF_RECORD_AUX]                 = perf_event__aux_swap,
+       [PERF_RECORD_ITRACE_START]        = perf_event__itrace_start_swap,
+       [PERF_RECORD_LOST_SAMPLES]        = perf_event__all64_swap,
        [PERF_RECORD_HEADER_ATTR]         = perf_event__hdr_attr_swap,
        [PERF_RECORD_HEADER_EVENT_TYPE]   = perf_event__event_type_swap,
        [PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap,
        [PERF_RECORD_HEADER_BUILD_ID]     = NULL,
        [PERF_RECORD_ID_INDEX]            = perf_event__all64_swap,
+       [PERF_RECORD_AUXTRACE_INFO]       = perf_event__auxtrace_info_swap,
+       [PERF_RECORD_AUXTRACE]            = perf_event__auxtrace_swap,
+       [PERF_RECORD_AUXTRACE_ERROR]      = perf_event__auxtrace_error_swap,
        [PERF_RECORD_HEADER_MAX]          = NULL,
 };
 
@@ -921,6 +1064,8 @@ static int machines__deliver_event(struct machines *machines,
        case PERF_RECORD_MMAP:
                return tool->mmap(tool, event, sample, machine);
        case PERF_RECORD_MMAP2:
+               if (event->header.misc & PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT)
+                       ++evlist->stats.nr_proc_map_timeout;
                return tool->mmap2(tool, event, sample, machine);
        case PERF_RECORD_COMM:
                return tool->comm(tool, event, sample, machine);
@@ -932,18 +1077,44 @@ static int machines__deliver_event(struct machines *machines,
                if (tool->lost == perf_event__process_lost)
                        evlist->stats.total_lost += event->lost.lost;
                return tool->lost(tool, event, sample, machine);
+       case PERF_RECORD_LOST_SAMPLES:
+               if (tool->lost_samples == perf_event__process_lost_samples)
+                       evlist->stats.total_lost_samples += event->lost_samples.lost;
+               return tool->lost_samples(tool, event, sample, machine);
        case PERF_RECORD_READ:
                return tool->read(tool, event, sample, evsel, machine);
        case PERF_RECORD_THROTTLE:
                return tool->throttle(tool, event, sample, machine);
        case PERF_RECORD_UNTHROTTLE:
                return tool->unthrottle(tool, event, sample, machine);
+       case PERF_RECORD_AUX:
+               return tool->aux(tool, event, sample, machine);
+       case PERF_RECORD_ITRACE_START:
+               return tool->itrace_start(tool, event, sample, machine);
        default:
                ++evlist->stats.nr_unknown_events;
                return -1;
        }
 }
 
+static int perf_session__deliver_event(struct perf_session *session,
+                                      union perf_event *event,
+                                      struct perf_sample *sample,
+                                      struct perf_tool *tool,
+                                      u64 file_offset)
+{
+       int ret;
+
+       ret = auxtrace__process_event(session, event, sample, tool);
+       if (ret < 0)
+               return ret;
+       if (ret > 0)
+               return 0;
+
+       return machines__deliver_event(&session->machines, session->evlist,
+                                      event, sample, tool, file_offset);
+}
+
 static s64 perf_session__process_user_event(struct perf_session *session,
                                            union perf_event *event,
                                            u64 file_offset)
@@ -980,6 +1151,15 @@ static s64 perf_session__process_user_event(struct perf_session *session,
                return tool->finished_round(tool, event, oe);
        case PERF_RECORD_ID_INDEX:
                return tool->id_index(tool, event, session);
+       case PERF_RECORD_AUXTRACE_INFO:
+               return tool->auxtrace_info(tool, event, session);
+       case PERF_RECORD_AUXTRACE:
+               /* setup for reading amidst mmap */
+               lseek(fd, file_offset + event->header.size, SEEK_SET);
+               return tool->auxtrace(tool, event, session);
+       case PERF_RECORD_AUXTRACE_ERROR:
+               perf_session__auxtrace_error_inc(session, event);
+               return tool->auxtrace_error(tool, event, session);
        default:
                return -EINVAL;
        }
@@ -1034,7 +1214,7 @@ int perf_session__peek_event(struct perf_session *session, off_t file_offset,
                return -1;
 
        if (lseek(fd, file_offset, SEEK_SET) == (off_t)-1 ||
-           readn(fd, &buf, hdr_sz) != (ssize_t)hdr_sz)
+           readn(fd, buf, hdr_sz) != (ssize_t)hdr_sz)
                return -1;
 
        event = (union perf_event *)buf;
@@ -1042,12 +1222,12 @@ int perf_session__peek_event(struct perf_session *session, off_t file_offset,
        if (session->header.needs_swap)
                perf_event_header__bswap(&event->header);
 
-       if (event->header.size < hdr_sz)
+       if (event->header.size < hdr_sz || event->header.size > buf_sz)
                return -1;
 
        rest = event->header.size - hdr_sz;
 
-       if (readn(fd, &buf, rest) != (ssize_t)rest)
+       if (readn(fd, buf, rest) != (ssize_t)rest)
                return -1;
 
        if (session->header.needs_swap)
@@ -1096,8 +1276,8 @@ static s64 perf_session__process_event(struct perf_session *session,
                        return ret;
        }
 
-       return machines__deliver_event(&session->machines, evlist, event,
-                                      &sample, tool, file_offset);
+       return perf_session__deliver_event(session, event, &sample, tool,
+                                          file_offset);
 }
 
 void perf_event_header__bswap(struct perf_event_header *hdr)
@@ -1138,6 +1318,18 @@ static void perf_session__warn_about_errors(const struct perf_session *session)
                            stats->nr_events[PERF_RECORD_LOST]);
        }
 
+       if (session->tool->lost_samples == perf_event__process_lost_samples) {
+               double drop_rate;
+
+               drop_rate = (double)stats->total_lost_samples /
+                           (double) (stats->nr_events[PERF_RECORD_SAMPLE] + stats->total_lost_samples);
+               if (drop_rate > 0.05) {
+                       ui__warning("Processed %" PRIu64 " samples and lost %3.2f%% samples!\n\n",
+                                   stats->nr_events[PERF_RECORD_SAMPLE] + stats->total_lost_samples,
+                                   drop_rate * 100.0);
+               }
+       }
+
        if (stats->nr_unknown_events != 0) {
                ui__warning("Found %u unknown events!\n\n"
                            "Is this an older tool processing a perf.data "
@@ -1168,6 +1360,32 @@ static void perf_session__warn_about_errors(const struct perf_session *session)
 
        if (oe->nr_unordered_events != 0)
                ui__warning("%u out of order events recorded.\n", oe->nr_unordered_events);
+
+       events_stats__auxtrace_error_warn(stats);
+
+       if (stats->nr_proc_map_timeout != 0) {
+               ui__warning("%d map information files for pre-existing threads were\n"
+                           "not processed, if there are samples for addresses they\n"
+                           "will not be resolved, you may find out which are these\n"
+                           "threads by running with -v and redirecting the output\n"
+                           "to a file.\n"
+                           "The time limit to process proc map is too short?\n"
+                           "Increase it by --proc-map-timeout\n",
+                           stats->nr_proc_map_timeout);
+       }
+}
+
+static int perf_session__flush_thread_stack(struct thread *thread,
+                                           void *p __maybe_unused)
+{
+       return thread_stack__flush(thread);
+}
+
+static int perf_session__flush_thread_stacks(struct perf_session *session)
+{
+       return machines__for_each_thread(&session->machines,
+                                        perf_session__flush_thread_stack,
+                                        NULL);
 }
 
 volatile int session_done;
@@ -1256,10 +1474,17 @@ more:
 done:
        /* do the final flush for ordered samples */
        err = ordered_events__flush(oe, OE_FLUSH__FINAL);
+       if (err)
+               goto out_err;
+       err = auxtrace__flush_events(session, tool);
+       if (err)
+               goto out_err;
+       err = perf_session__flush_thread_stacks(session);
 out_err:
        free(buf);
        perf_session__warn_about_errors(session);
        ordered_events__free(&session->ordered_events);
+       auxtrace__free_events(session);
        return err;
 }
 
@@ -1402,10 +1627,17 @@ more:
 out:
        /* do the final flush for ordered samples */
        err = ordered_events__flush(oe, OE_FLUSH__FINAL);
+       if (err)
+               goto out_err;
+       err = auxtrace__flush_events(session, tool);
+       if (err)
+               goto out_err;
+       err = perf_session__flush_thread_stacks(session);
 out_err:
        ui_progress__finish();
        perf_session__warn_about_errors(session);
        ordered_events__free(&session->ordered_events);
+       auxtrace__free_events(session);
        session->one_mmap = false;
        return err;
 }
@@ -1488,7 +1720,13 @@ size_t perf_session__fprintf_dsos_buildid(struct perf_session *session, FILE *fp
 
 size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp)
 {
-       size_t ret = fprintf(fp, "Aggregated stats:\n");
+       size_t ret;
+       const char *msg = "";
+
+       if (perf_header__has_feat(&session->header, HEADER_AUXTRACE))
+               msg = " (excludes AUX area (e.g. instruction trace) decoded / synthesized events)";
+
+       ret = fprintf(fp, "Aggregated stats:%s\n", msg);
 
        ret += events_stats__fprintf(&session->evlist->stats, fp);
        return ret;
index d5fa7b7..b44afc7 100644 (file)
 struct ip_callchain;
 struct thread;
 
+struct auxtrace;
+struct itrace_synth_opts;
+
 struct perf_session {
        struct perf_header      header;
        struct machines         machines;
        struct perf_evlist      *evlist;
+       struct auxtrace         *auxtrace;
+       struct itrace_synth_opts *itrace_synth_opts;
+       struct list_head        auxtrace_index;
        struct trace_event      tevent;
        bool                    repipe;
        bool                    one_mmap;
index 4593f36..4c65a14 100644 (file)
@@ -89,14 +89,14 @@ static int64_t
 sort__comm_cmp(struct hist_entry *left, struct hist_entry *right)
 {
        /* Compare the addr that should be unique among comm */
-       return comm__str(right->comm) - comm__str(left->comm);
+       return strcmp(comm__str(right->comm), comm__str(left->comm));
 }
 
 static int64_t
 sort__comm_collapse(struct hist_entry *left, struct hist_entry *right)
 {
        /* Compare the addr that should be unique among comm */
-       return comm__str(right->comm) - comm__str(left->comm);
+       return strcmp(comm__str(right->comm), comm__str(left->comm));
 }
 
 static int64_t
@@ -182,18 +182,16 @@ static int64_t _sort__addr_cmp(u64 left_ip, u64 right_ip)
 
 static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r)
 {
-       u64 ip_l, ip_r;
-
        if (!sym_l || !sym_r)
                return cmp_null(sym_l, sym_r);
 
        if (sym_l == sym_r)
                return 0;
 
-       ip_l = sym_l->start;
-       ip_r = sym_r->start;
+       if (sym_l->start != sym_r->start)
+               return (int64_t)(sym_r->start - sym_l->start);
 
-       return (int64_t)(ip_r - ip_l);
+       return (int64_t)(sym_r->end - sym_l->end);
 }
 
 static int64_t
index 846036a..e97cd47 100644 (file)
@@ -58,15 +58,16 @@ struct he_stat {
 
 struct hist_entry_diff {
        bool    computed;
+       union {
+               /* PERF_HPP__DELTA */
+               double  period_ratio_delta;
 
-       /* PERF_HPP__DELTA */
-       double  period_ratio_delta;
-
-       /* PERF_HPP__RATIO */
-       double  period_ratio;
+               /* PERF_HPP__RATIO */
+               double  period_ratio;
 
-       /* HISTC_WEIGHTED_DIFF */
-       s64     wdiff;
+               /* HISTC_WEIGHTED_DIFF */
+               s64     wdiff;
+       };
 };
 
 /**
@@ -92,21 +93,28 @@ struct hist_entry {
        s32                     cpu;
        u8                      cpumode;
 
-       struct hist_entry_diff  diff;
-
        /* We are added by hists__add_dummy_entry. */
        bool                    dummy;
 
-       /* XXX These two should move to some tree widget lib */
-       u16                     row_offset;
-       u16                     nr_rows;
-
-       bool                    init_have_children;
        char                    level;
        u8                      filtered;
+       union {
+               /*
+                * Since perf diff only supports the stdio output, TUI
+                * fields are only accessed from perf report (or perf
+                * top).  So make it an union to reduce memory usage.
+                */
+               struct hist_entry_diff  diff;
+               struct /* for TUI */ {
+                       u16     row_offset;
+                       u16     nr_rows;
+                       bool    init_have_children;
+                       bool    unfolded;
+                       bool    has_children;
+               };
+       };
        char                    *srcline;
        struct symbol           *parent;
-       unsigned long           position;
        struct rb_root          sorted_chain;
        struct branch_info      *branch_info;
        struct hists            *hists;
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
new file mode 100644 (file)
index 0000000..53e8bb7
--- /dev/null
@@ -0,0 +1,434 @@
+#include <stdio.h>
+#include "evsel.h"
+#include "stat.h"
+#include "color.h"
+
+enum {
+       CTX_BIT_USER    = 1 << 0,
+       CTX_BIT_KERNEL  = 1 << 1,
+       CTX_BIT_HV      = 1 << 2,
+       CTX_BIT_HOST    = 1 << 3,
+       CTX_BIT_IDLE    = 1 << 4,
+       CTX_BIT_MAX     = 1 << 5,
+};
+
+#define NUM_CTX CTX_BIT_MAX
+
+static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
+static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_stalled_cycles_back_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_branches_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_cacherefs_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_l1_dcache_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_l1_icache_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_ll_cache_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_itlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS];
+
+struct stats walltime_nsecs_stats;
+
+static int evsel_context(struct perf_evsel *evsel)
+{
+       int ctx = 0;
+
+       if (evsel->attr.exclude_kernel)
+               ctx |= CTX_BIT_KERNEL;
+       if (evsel->attr.exclude_user)
+               ctx |= CTX_BIT_USER;
+       if (evsel->attr.exclude_hv)
+               ctx |= CTX_BIT_HV;
+       if (evsel->attr.exclude_host)
+               ctx |= CTX_BIT_HOST;
+       if (evsel->attr.exclude_idle)
+               ctx |= CTX_BIT_IDLE;
+
+       return ctx;
+}
+
+void perf_stat__reset_shadow_stats(void)
+{
+       memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats));
+       memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats));
+       memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats));
+       memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats));
+       memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats));
+       memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats));
+       memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats));
+       memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats));
+       memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats));
+       memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats));
+       memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats));
+       memset(runtime_cycles_in_tx_stats, 0,
+                       sizeof(runtime_cycles_in_tx_stats));
+       memset(runtime_transaction_stats, 0,
+               sizeof(runtime_transaction_stats));
+       memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats));
+       memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
+}
+
+/*
+ * Update various tracking values we maintain to print
+ * more semantic information such as miss/hit ratios,
+ * instruction rates, etc:
+ */
+void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
+                                   int cpu)
+{
+       int ctx = evsel_context(counter);
+
+       if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
+               update_stats(&runtime_nsecs_stats[cpu], count[0]);
+       else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
+               update_stats(&runtime_cycles_stats[ctx][cpu], count[0]);
+       else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
+               update_stats(&runtime_transaction_stats[ctx][cpu], count[0]);
+       else if (perf_stat_evsel__is(counter, TRANSACTION_START))
+               update_stats(&runtime_transaction_stats[ctx][cpu], count[0]);
+       else if (perf_stat_evsel__is(counter, ELISION_START))
+               update_stats(&runtime_elision_stats[ctx][cpu], count[0]);
+       else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
+               update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]);
+       else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
+               update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count[0]);
+       else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
+               update_stats(&runtime_branches_stats[ctx][cpu], count[0]);
+       else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
+               update_stats(&runtime_cacherefs_stats[ctx][cpu], count[0]);
+       else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
+               update_stats(&runtime_l1_dcache_stats[ctx][cpu], count[0]);
+       else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
+               update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]);
+       else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
+               update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]);
+       else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
+               update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]);
+       else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
+               update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]);
+}
+
+/* used for get_ratio_color() */
+enum grc_type {
+       GRC_STALLED_CYCLES_FE,
+       GRC_STALLED_CYCLES_BE,
+       GRC_CACHE_MISSES,
+       GRC_MAX_NR
+};
+
+static const char *get_ratio_color(enum grc_type type, double ratio)
+{
+       static const double grc_table[GRC_MAX_NR][3] = {
+               [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
+               [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
+               [GRC_CACHE_MISSES]      = { 20.0, 10.0, 5.0 },
+       };
+       const char *color = PERF_COLOR_NORMAL;
+
+       if (ratio > grc_table[type][0])
+               color = PERF_COLOR_RED;
+       else if (ratio > grc_table[type][1])
+               color = PERF_COLOR_MAGENTA;
+       else if (ratio > grc_table[type][2])
+               color = PERF_COLOR_YELLOW;
+
+       return color;
+}
+
+static void print_stalled_cycles_frontend(FILE *out, int cpu,
+                                         struct perf_evsel *evsel
+                                         __maybe_unused, double avg)
+{
+       double total, ratio = 0.0;
+       const char *color;
+       int ctx = evsel_context(evsel);
+
+       total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
+
+       if (total)
+               ratio = avg / total * 100.0;
+
+       color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
+
+       fprintf(out, " #  ");
+       color_fprintf(out, color, "%6.2f%%", ratio);
+       fprintf(out, " frontend cycles idle   ");
+}
+
+static void print_stalled_cycles_backend(FILE *out, int cpu,
+                                        struct perf_evsel *evsel
+                                        __maybe_unused, double avg)
+{
+       double total, ratio = 0.0;
+       const char *color;
+       int ctx = evsel_context(evsel);
+
+       total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
+
+       if (total)
+               ratio = avg / total * 100.0;
+
+       color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
+
+       fprintf(out, " #  ");
+       color_fprintf(out, color, "%6.2f%%", ratio);
+       fprintf(out, " backend  cycles idle   ");
+}
+
+static void print_branch_misses(FILE *out, int cpu,
+                               struct perf_evsel *evsel __maybe_unused,
+                               double avg)
+{
+       double total, ratio = 0.0;
+       const char *color;
+       int ctx = evsel_context(evsel);
+
+       total = avg_stats(&runtime_branches_stats[ctx][cpu]);
+
+       if (total)
+               ratio = avg / total * 100.0;
+
+       color = get_ratio_color(GRC_CACHE_MISSES, ratio);
+
+       fprintf(out, " #  ");
+       color_fprintf(out, color, "%6.2f%%", ratio);
+       fprintf(out, " of all branches        ");
+}
+
+static void print_l1_dcache_misses(FILE *out, int cpu,
+                                  struct perf_evsel *evsel __maybe_unused,
+                                  double avg)
+{
+       double total, ratio = 0.0;
+       const char *color;
+       int ctx = evsel_context(evsel);
+
+       total = avg_stats(&runtime_l1_dcache_stats[ctx][cpu]);
+
+       if (total)
+               ratio = avg / total * 100.0;
+
+       color = get_ratio_color(GRC_CACHE_MISSES, ratio);
+
+       fprintf(out, " #  ");
+       color_fprintf(out, color, "%6.2f%%", ratio);
+       fprintf(out, " of all L1-dcache hits  ");
+}
+
+static void print_l1_icache_misses(FILE *out, int cpu,
+                                  struct perf_evsel *evsel __maybe_unused,
+                                  double avg)
+{
+       double total, ratio = 0.0;
+       const char *color;
+       int ctx = evsel_context(evsel);
+
+       total = avg_stats(&runtime_l1_icache_stats[ctx][cpu]);
+
+       if (total)
+               ratio = avg / total * 100.0;
+
+       color = get_ratio_color(GRC_CACHE_MISSES, ratio);
+
+       fprintf(out, " #  ");
+       color_fprintf(out, color, "%6.2f%%", ratio);
+       fprintf(out, " of all L1-icache hits  ");
+}
+
+static void print_dtlb_cache_misses(FILE *out, int cpu,
+                                   struct perf_evsel *evsel __maybe_unused,
+                                   double avg)
+{
+       double total, ratio = 0.0;
+       const char *color;
+       int ctx = evsel_context(evsel);
+
+       total = avg_stats(&runtime_dtlb_cache_stats[ctx][cpu]);
+
+       if (total)
+               ratio = avg / total * 100.0;
+
+       color = get_ratio_color(GRC_CACHE_MISSES, ratio);
+
+       fprintf(out, " #  ");
+       color_fprintf(out, color, "%6.2f%%", ratio);
+       fprintf(out, " of all dTLB cache hits ");
+}
+
+static void print_itlb_cache_misses(FILE *out, int cpu,
+                                   struct perf_evsel *evsel __maybe_unused,
+                                   double avg)
+{
+       double total, ratio = 0.0;
+       const char *color;
+       int ctx = evsel_context(evsel);
+
+       total = avg_stats(&runtime_itlb_cache_stats[ctx][cpu]);
+
+       if (total)
+               ratio = avg / total * 100.0;
+
+       color = get_ratio_color(GRC_CACHE_MISSES, ratio);
+
+       fprintf(out, " #  ");
+       color_fprintf(out, color, "%6.2f%%", ratio);
+       fprintf(out, " of all iTLB cache hits ");
+}
+
+static void print_ll_cache_misses(FILE *out, int cpu,
+                                 struct perf_evsel *evsel __maybe_unused,
+                                 double avg)
+{
+       double total, ratio = 0.0;
+       const char *color;
+       int ctx = evsel_context(evsel);
+
+       total = avg_stats(&runtime_ll_cache_stats[ctx][cpu]);
+
+       if (total)
+               ratio = avg / total * 100.0;
+
+       color = get_ratio_color(GRC_CACHE_MISSES, ratio);
+
+       fprintf(out, " #  ");
+       color_fprintf(out, color, "%6.2f%%", ratio);
+       fprintf(out, " of all LL-cache hits   ");
+}
+
+void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel,
+                                  double avg, int cpu, enum aggr_mode aggr)
+{
+       double total, ratio = 0.0, total2;
+       int ctx = evsel_context(evsel);
+
+       if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
+               total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
+               if (total) {
+                       ratio = avg / total;
+                       fprintf(out, " #   %5.2f  insns per cycle        ", ratio);
+               } else {
+                       fprintf(out, "                                   ");
+               }
+               total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]);
+               total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu]));
+
+               if (total && avg) {
+                       ratio = total / avg;
+                       fprintf(out, "\n");
+                       if (aggr == AGGR_NONE)
+                               fprintf(out, "        ");
+                       fprintf(out, "                                                  #   %5.2f  stalled cycles per insn", ratio);
+               }
+
+       } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
+                       runtime_branches_stats[ctx][cpu].n != 0) {
+               print_branch_misses(out, cpu, evsel, avg);
+       } else if (
+               evsel->attr.type == PERF_TYPE_HW_CACHE &&
+               evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1D |
+                                       ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+                                       ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
+                       runtime_l1_dcache_stats[ctx][cpu].n != 0) {
+               print_l1_dcache_misses(out, cpu, evsel, avg);
+       } else if (
+               evsel->attr.type == PERF_TYPE_HW_CACHE &&
+               evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1I |
+                                       ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+                                       ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
+                       runtime_l1_icache_stats[ctx][cpu].n != 0) {
+               print_l1_icache_misses(out, cpu, evsel, avg);
+       } else if (
+               evsel->attr.type == PERF_TYPE_HW_CACHE &&
+               evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_DTLB |
+                                       ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+                                       ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
+                       runtime_dtlb_cache_stats[ctx][cpu].n != 0) {
+               print_dtlb_cache_misses(out, cpu, evsel, avg);
+       } else if (
+               evsel->attr.type == PERF_TYPE_HW_CACHE &&
+               evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_ITLB |
+                                       ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+                                       ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
+                       runtime_itlb_cache_stats[ctx][cpu].n != 0) {
+               print_itlb_cache_misses(out, cpu, evsel, avg);
+       } else if (
+               evsel->attr.type == PERF_TYPE_HW_CACHE &&
+               evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_LL |
+                                       ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+                                       ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
+                       runtime_ll_cache_stats[ctx][cpu].n != 0) {
+               print_ll_cache_misses(out, cpu, evsel, avg);
+       } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
+                       runtime_cacherefs_stats[ctx][cpu].n != 0) {
+               total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]);
+
+               if (total)
+                       ratio = avg * 100 / total;
+
+               fprintf(out, " # %8.3f %% of all cache refs    ", ratio);
+
+       } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
+               print_stalled_cycles_frontend(out, cpu, evsel, avg);
+       } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
+               print_stalled_cycles_backend(out, cpu, evsel, avg);
+       } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
+               total = avg_stats(&runtime_nsecs_stats[cpu]);
+
+               if (total) {
+                       ratio = avg / total;
+                       fprintf(out, " # %8.3f GHz                    ", ratio);
+               } else {
+                       fprintf(out, "                                   ");
+               }
+       } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
+               total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
+               if (total)
+                       fprintf(out,
+                               " #   %5.2f%% transactional cycles   ",
+                               100.0 * (avg / total));
+       } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
+               total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
+               total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
+               if (total2 < avg)
+                       total2 = avg;
+               if (total)
+                       fprintf(out,
+                               " #   %5.2f%% aborted cycles         ",
+                               100.0 * ((total2-avg) / total));
+       } else if (perf_stat_evsel__is(evsel, TRANSACTION_START) &&
+                  avg > 0 &&
+                  runtime_cycles_in_tx_stats[ctx][cpu].n != 0) {
+               total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
+
+               if (total)
+                       ratio = total / avg;
+
+               fprintf(out, " # %8.0f cycles / transaction   ", ratio);
+       } else if (perf_stat_evsel__is(evsel, ELISION_START) &&
+                  avg > 0 &&
+                  runtime_cycles_in_tx_stats[ctx][cpu].n != 0) {
+               total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
+
+               if (total)
+                       ratio = total / avg;
+
+               fprintf(out, " # %8.0f cycles / elision       ", ratio);
+       } else if (runtime_nsecs_stats[cpu].n != 0) {
+               char unit = 'M';
+
+               total = avg_stats(&runtime_nsecs_stats[cpu]);
+
+               if (total)
+                       ratio = 1000.0 * avg / total;
+               if (ratio < 0.001) {
+                       ratio *= 1000;
+                       unit = 'K';
+               }
+
+               fprintf(out, " # %8.3f %c/sec                  ", ratio, unit);
+       } else {
+               fprintf(out, "                                   ");
+       }
+}
index 6506b3d..4014b70 100644 (file)
@@ -1,6 +1,6 @@
 #include <math.h>
-
 #include "stat.h"
+#include "evsel.h"
 
 void update_stats(struct stats *stats, u64 val)
 {
@@ -61,3 +61,72 @@ double rel_stddev_stats(double stddev, double avg)
 
        return pct;
 }
+
+bool __perf_evsel_stat__is(struct perf_evsel *evsel,
+                          enum perf_stat_evsel_id id)
+{
+       struct perf_stat *ps = evsel->priv;
+
+       return ps->id == id;
+}
+
+#define ID(id, name) [PERF_STAT_EVSEL_ID__##id] = #name
+static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = {
+       ID(NONE,                x),
+       ID(CYCLES_IN_TX,        cpu/cycles-t/),
+       ID(TRANSACTION_START,   cpu/tx-start/),
+       ID(ELISION_START,       cpu/el-start/),
+       ID(CYCLES_IN_TX_CP,     cpu/cycles-ct/),
+};
+#undef ID
+
+void perf_stat_evsel_id_init(struct perf_evsel *evsel)
+{
+       struct perf_stat *ps = evsel->priv;
+       int i;
+
+       /* ps->id is 0 hence PERF_STAT_EVSEL_ID__NONE by default */
+
+       for (i = 0; i < PERF_STAT_EVSEL_ID__MAX; i++) {
+               if (!strcmp(perf_evsel__name(evsel), id_str[i])) {
+                       ps->id = i;
+                       break;
+               }
+       }
+}
+
+struct perf_counts *perf_counts__new(int ncpus)
+{
+       int size = sizeof(struct perf_counts) +
+                  ncpus * sizeof(struct perf_counts_values);
+
+       return zalloc(size);
+}
+
+void perf_counts__delete(struct perf_counts *counts)
+{
+       free(counts);
+}
+
+static void perf_counts__reset(struct perf_counts *counts, int ncpus)
+{
+       memset(counts, 0, (sizeof(*counts) +
+              (ncpus * sizeof(struct perf_counts_values))));
+}
+
+void perf_evsel__reset_counts(struct perf_evsel *evsel, int ncpus)
+{
+       perf_counts__reset(evsel->counts, ncpus);
+}
+
+int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus)
+{
+       evsel->counts = perf_counts__new(ncpus);
+       return evsel->counts != NULL ? 0 : -ENOMEM;
+}
+
+void perf_evsel__free_counts(struct perf_evsel *evsel)
+{
+       perf_counts__delete(evsel->counts);
+       evsel->counts = NULL;
+}
index 5667fc3..093dc3c 100644 (file)
@@ -2,6 +2,7 @@
 #define __PERF_STATS_H
 
 #include <linux/types.h>
+#include <stdio.h>
 
 struct stats
 {
@@ -9,6 +10,27 @@ struct stats
        u64 max, min;
 };
 
+enum perf_stat_evsel_id {
+       PERF_STAT_EVSEL_ID__NONE = 0,
+       PERF_STAT_EVSEL_ID__CYCLES_IN_TX,
+       PERF_STAT_EVSEL_ID__TRANSACTION_START,
+       PERF_STAT_EVSEL_ID__ELISION_START,
+       PERF_STAT_EVSEL_ID__CYCLES_IN_TX_CP,
+       PERF_STAT_EVSEL_ID__MAX,
+};
+
+struct perf_stat {
+       struct stats            res_stats[3];
+       enum perf_stat_evsel_id id;
+};
+
+enum aggr_mode {
+       AGGR_NONE,
+       AGGR_GLOBAL,
+       AGGR_SOCKET,
+       AGGR_CORE,
+};
+
 void update_stats(struct stats *stats, u64 val);
 double avg_stats(struct stats *stats);
 double stddev_stats(struct stats *stats);
@@ -22,4 +44,28 @@ static inline void init_stats(struct stats *stats)
        stats->min  = (u64) -1;
        stats->max  = 0;
 }
+
+struct perf_evsel;
+bool __perf_evsel_stat__is(struct perf_evsel *evsel,
+                          enum perf_stat_evsel_id id);
+
+#define perf_stat_evsel__is(evsel, id) \
+       __perf_evsel_stat__is(evsel, PERF_STAT_EVSEL_ID__ ## id)
+
+void perf_stat_evsel_id_init(struct perf_evsel *evsel);
+
+extern struct stats walltime_nsecs_stats;
+
+void perf_stat__reset_shadow_stats(void);
+void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
+                                   int cpu);
+void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel,
+                                  double avg, int cpu, enum aggr_mode aggr);
+
+struct perf_counts *perf_counts__new(int ncpus);
+void perf_counts__delete(struct perf_counts *counts);
+
+void perf_evsel__reset_counts(struct perf_evsel *evsel, int ncpus);
+int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus);
+void perf_evsel__free_counts(struct perf_evsel *evsel);
 #endif
index 79a757a..bcae659 100644 (file)
@@ -170,6 +170,46 @@ struct strfilter *strfilter__new(const char *rules, const char **err)
        return filter;
 }
 
+static int strfilter__append(struct strfilter *filter, bool _or,
+                            const char *rules, const char **err)
+{
+       struct strfilter_node *right, *root;
+       const char *ep = NULL;
+
+       if (!filter || !rules)
+               return -EINVAL;
+
+       right = strfilter_node__new(rules, &ep);
+       if (!right || *ep != '\0') {
+               if (err)
+                       *err = ep;
+               goto error;
+       }
+       root = strfilter_node__alloc(_or ? OP_or : OP_and, filter->root, right);
+       if (!root) {
+               ep = NULL;
+               goto error;
+       }
+
+       filter->root = root;
+       return 0;
+
+error:
+       strfilter_node__delete(right);
+       return ep ? -EINVAL : -ENOMEM;
+}
+
+int strfilter__or(struct strfilter *filter, const char *rules, const char **err)
+{
+       return strfilter__append(filter, true, rules, err);
+}
+
+int strfilter__and(struct strfilter *filter, const char *rules,
+                  const char **err)
+{
+       return strfilter__append(filter, false, rules, err);
+}
+
 static bool strfilter_node__compare(struct strfilter_node *node,
                                    const char *str)
 {
@@ -197,3 +237,70 @@ bool strfilter__compare(struct strfilter *filter, const char *str)
                return false;
        return strfilter_node__compare(filter->root, str);
 }
+
+static int strfilter_node__sprint(struct strfilter_node *node, char *buf);
+
+/* sprint node in parenthesis if needed */
+static int strfilter_node__sprint_pt(struct strfilter_node *node, char *buf)
+{
+       int len;
+       int pt = node->r ? 2 : 0;       /* don't need to check node->l */
+
+       if (buf && pt)
+               *buf++ = '(';
+       len = strfilter_node__sprint(node, buf);
+       if (len < 0)
+               return len;
+       if (buf && pt)
+               *(buf + len) = ')';
+       return len + pt;
+}
+
+static int strfilter_node__sprint(struct strfilter_node *node, char *buf)
+{
+       int len = 0, rlen;
+
+       if (!node || !node->p)
+               return -EINVAL;
+
+       switch (*node->p) {
+       case '|':
+       case '&':
+               len = strfilter_node__sprint_pt(node->l, buf);
+               if (len < 0)
+                       return len;
+       case '!':
+               if (buf) {
+                       *(buf + len++) = *node->p;
+                       buf += len;
+               } else
+                       len++;
+               rlen = strfilter_node__sprint_pt(node->r, buf);
+               if (rlen < 0)
+                       return rlen;
+               len += rlen;
+               break;
+       default:
+               len = strlen(node->p);
+               if (buf)
+                       strcpy(buf, node->p);
+       }
+
+       return len;
+}
+
+char *strfilter__string(struct strfilter *filter)
+{
+       int len;
+       char *ret = NULL;
+
+       len = strfilter_node__sprint(filter->root, NULL);
+       if (len < 0)
+               return NULL;
+
+       ret = malloc(len + 1);
+       if (ret)
+               strfilter_node__sprint(filter->root, ret);
+
+       return ret;
+}
index fe611f3..cff5eda 100644 (file)
@@ -29,6 +29,32 @@ struct strfilter {
 struct strfilter *strfilter__new(const char *rules, const char **err);
 
 /**
+ * strfilter__or - Append an additional rule by logical-or
+ * @filter: Original string filter
+ * @rules: Filter rule to be appended at left of the root of
+ *         @filter by using logical-or.
+ * @err: Pointer which points an error detected on @rules
+ *
+ * Parse @rules and join it to the @filter by using logical-or.
+ * Return 0 if success, or return the error code.
+ */
+int strfilter__or(struct strfilter *filter,
+                 const char *rules, const char **err);
+
+/**
+ * strfilter__add - Append an additional rule by logical-and
+ * @filter: Original string filter
+ * @rules: Filter rule to be appended at left of the root of
+ *         @filter by using logical-and.
+ * @err: Pointer which points an error detected on @rules
+ *
+ * Parse @rules and join it to the @filter by using logical-and.
+ * Return 0 if success, or return the error code.
+ */
+int strfilter__and(struct strfilter *filter,
+                  const char *rules, const char **err);
+
+/**
  * strfilter__compare - compare given string and a string filter
  * @filter: String filter
  * @str: target string
@@ -45,4 +71,13 @@ bool strfilter__compare(struct strfilter *filter, const char *str);
  */
 void strfilter__delete(struct strfilter *filter);
 
+/**
+ * strfilter__string - Reconstruct a rule string from filter
+ * @filter: String filter to reconstruct
+ *
+ * Reconstruct a rule string from @filter. This will be good for
+ * debug messages. Note that returning string must be freed afterward.
+ */
+char *strfilter__string(struct strfilter *filter);
+
 #endif
index a7ab606..65f7e38 100644 (file)
@@ -630,6 +630,11 @@ void symsrc__destroy(struct symsrc *ss)
        close(ss->fd);
 }
 
+bool __weak elf__needs_adjust_symbols(GElf_Ehdr ehdr)
+{
+       return ehdr.e_type == ET_EXEC || ehdr.e_type == ET_REL;
+}
+
 int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
                 enum dso_binary_type type)
 {
@@ -678,6 +683,7 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
                }
 
                if (!dso__build_id_equal(dso, build_id)) {
+                       pr_debug("%s: build id mismatch for %s.\n", __func__, name);
                        dso->load_errno = DSO_LOAD_ERRNO__MISMATCHING_BUILDID;
                        goto out_elf_end;
                }
@@ -711,8 +717,7 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
                                                     ".gnu.prelink_undo",
                                                     NULL) != NULL);
        } else {
-               ss->adjust_symbols = ehdr.e_type == ET_EXEC ||
-                                    ehdr.e_type == ET_REL;
+               ss->adjust_symbols = elf__needs_adjust_symbols(ehdr);
        }
 
        ss->name   = strdup(name);
@@ -771,6 +776,8 @@ static bool want_demangle(bool is_kernel_sym)
        return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle;
 }
 
+void __weak arch__elf_sym_adjust(GElf_Sym *sym __maybe_unused) { }
+
 int dso__load_sym(struct dso *dso, struct map *map,
                  struct symsrc *syms_ss, struct symsrc *runtime_ss,
                  symbol_filter_t filter, int kmodule)
@@ -935,6 +942,8 @@ int dso__load_sym(struct dso *dso, struct map *map,
                    (sym.st_value & 1))
                        --sym.st_value;
 
+               arch__elf_sym_adjust(&sym);
+
                if (dso->kernel || kmodule) {
                        char dso_name[PATH_MAX];
 
@@ -963,8 +972,10 @@ int dso__load_sym(struct dso *dso, struct map *map,
                                        map->unmap_ip = map__unmap_ip;
                                        /* Ensure maps are correctly ordered */
                                        if (kmaps) {
+                                               map__get(map);
                                                map_groups__remove(kmaps, map);
                                                map_groups__insert(kmaps, map);
+                                               map__put(map);
                                        }
                                }
 
@@ -1005,7 +1016,7 @@ int dso__load_sym(struct dso *dso, struct map *map,
                                curr_map = map__new2(start, curr_dso,
                                                     map->type);
                                if (curr_map == NULL) {
-                                       dso__delete(curr_dso);
+                                       dso__put(curr_dso);
                                        goto out_elf_end;
                                }
                                if (adjust_kernel_syms) {
@@ -1020,11 +1031,7 @@ int dso__load_sym(struct dso *dso, struct map *map,
                                }
                                curr_dso->symtab_type = dso->symtab_type;
                                map_groups__insert(kmaps, curr_map);
-                               /*
-                                * The new DSO should go to the kernel DSOS
-                                */
-                               dsos__add(&map->groups->machine->kernel_dsos,
-                                         curr_dso);
+                               dsos__add(&map->groups->machine->dsos, curr_dso);
                                dso__set_loaded(curr_dso, map->type);
                        } else
                                curr_dso = curr_map->dso;
index 201f6c4..504f2d7 100644 (file)
@@ -85,8 +85,17 @@ static int prefix_underscores_count(const char *str)
        return tail - str;
 }
 
-#define SYMBOL_A 0
-#define SYMBOL_B 1
+int __weak arch__choose_best_symbol(struct symbol *syma,
+                                   struct symbol *symb __maybe_unused)
+{
+       /* Avoid "SyS" kernel syscall aliases */
+       if (strlen(syma->name) >= 3 && !strncmp(syma->name, "SyS", 3))
+               return SYMBOL_B;
+       if (strlen(syma->name) >= 10 && !strncmp(syma->name, "compat_SyS", 10))
+               return SYMBOL_B;
+
+       return SYMBOL_A;
+}
 
 static int choose_best_symbol(struct symbol *syma, struct symbol *symb)
 {
@@ -134,13 +143,7 @@ static int choose_best_symbol(struct symbol *syma, struct symbol *symb)
        else if (na < nb)
                return SYMBOL_B;
 
-       /* Avoid "SyS" kernel syscall aliases */
-       if (na >= 3 && !strncmp(syma->name, "SyS", 3))
-               return SYMBOL_B;
-       if (na >= 10 && !strncmp(syma->name, "compat_SyS", 10))
-               return SYMBOL_B;
-
-       return SYMBOL_A;
+       return arch__choose_best_symbol(syma, symb);
 }
 
 void symbols__fixup_duplicate(struct rb_root *symbols)
@@ -199,18 +202,18 @@ void symbols__fixup_end(struct rb_root *symbols)
 
 void __map_groups__fixup_end(struct map_groups *mg, enum map_type type)
 {
-       struct map *prev, *curr;
-       struct rb_node *nd, *prevnd = rb_first(&mg->maps[type]);
+       struct maps *maps = &mg->maps[type];
+       struct map *next, *curr;
 
-       if (prevnd == NULL)
-               return;
+       pthread_rwlock_wrlock(&maps->lock);
 
-       curr = rb_entry(prevnd, struct map, rb_node);
+       curr = maps__first(maps);
+       if (curr == NULL)
+               goto out_unlock;
 
-       for (nd = rb_next(prevnd); nd; nd = rb_next(nd)) {
-               prev = curr;
-               curr = rb_entry(nd, struct map, rb_node);
-               prev->end = curr->start;
+       for (next = map__next(curr); next; next = map__next(curr)) {
+               curr->end = next->start;
+               curr = next;
        }
 
        /*
@@ -218,6 +221,9 @@ void __map_groups__fixup_end(struct map_groups *mg, enum map_type type)
         * last map final address.
         */
        curr->end = ~0ULL;
+
+out_unlock:
+       pthread_rwlock_unlock(&maps->lock);
 }
 
 struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name)
@@ -397,7 +403,7 @@ static struct symbol *symbols__find_by_name(struct rb_root *symbols,
                                            const char *name)
 {
        struct rb_node *n;
-       struct symbol_name_rb_node *s;
+       struct symbol_name_rb_node *s = NULL;
 
        if (symbols == NULL)
                return NULL;
@@ -408,7 +414,7 @@ static struct symbol *symbols__find_by_name(struct rb_root *symbols,
                int cmp;
 
                s = rb_entry(n, struct symbol_name_rb_node, rb_node);
-               cmp = strcmp(name, s->sym.name);
+               cmp = arch__compare_symbol_names(name, s->sym.name);
 
                if (cmp < 0)
                        n = n->rb_left;
@@ -426,7 +432,7 @@ static struct symbol *symbols__find_by_name(struct rb_root *symbols,
                struct symbol_name_rb_node *tmp;
 
                tmp = rb_entry(n, struct symbol_name_rb_node, rb_node);
-               if (strcmp(tmp->sym.name, s->sym.name))
+               if (arch__compare_symbol_names(tmp->sym.name, s->sym.name))
                        break;
 
                s = tmp;
@@ -653,14 +659,14 @@ static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map,
                curr_map = map_groups__find(kmaps, map->type, pos->start);
 
                if (!curr_map || (filter && filter(curr_map, pos))) {
-                       rb_erase(&pos->rb_node, root);
+                       rb_erase_init(&pos->rb_node, root);
                        symbol__delete(pos);
                } else {
                        pos->start -= curr_map->start - curr_map->pgoff;
                        if (pos->end)
                                pos->end -= curr_map->start - curr_map->pgoff;
                        if (curr_map != map) {
-                               rb_erase(&pos->rb_node, root);
+                               rb_erase_init(&pos->rb_node, root);
                                symbols__insert(
                                        &curr_map->dso->symbols[curr_map->type],
                                        pos);
@@ -780,7 +786,7 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta,
 
                        curr_map = map__new2(pos->start, ndso, map->type);
                        if (curr_map == NULL) {
-                               dso__delete(ndso);
+                               dso__put(ndso);
                                return -1;
                        }
 
@@ -1167,20 +1173,23 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
        /* Add new maps */
        while (!list_empty(&md.maps)) {
                new_map = list_entry(md.maps.next, struct map, node);
-               list_del(&new_map->node);
+               list_del_init(&new_map->node);
                if (new_map == replacement_map) {
                        map->start      = new_map->start;
                        map->end        = new_map->end;
                        map->pgoff      = new_map->pgoff;
                        map->map_ip     = new_map->map_ip;
                        map->unmap_ip   = new_map->unmap_ip;
-                       map__delete(new_map);
                        /* Ensure maps are correctly ordered */
+                       map__get(map);
                        map_groups__remove(kmaps, map);
                        map_groups__insert(kmaps, map);
+                       map__put(map);
                } else {
                        map_groups__insert(kmaps, new_map);
                }
+
+               map__put(new_map);
        }
 
        /*
@@ -1205,8 +1214,8 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
 out_err:
        while (!list_empty(&md.maps)) {
                map = list_entry(md.maps.next, struct map, node);
-               list_del(&map->node);
-               map__delete(map);
+               list_del_init(&map->node);
+               map__put(map);
        }
        close(fd);
        return -EINVAL;
@@ -1355,7 +1364,7 @@ static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod,
        case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP:
                /*
                 * kernel modules know their symtab type - it's set when
-                * creating a module dso in machine__new_module().
+                * creating a module dso in machine__findnew_module_map().
                 */
                return kmod && dso->symtab_type == type;
 
@@ -1380,12 +1389,22 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
        struct symsrc *syms_ss = NULL, *runtime_ss = NULL;
        bool kmod;
 
-       dso__set_loaded(dso, map->type);
+       pthread_mutex_lock(&dso->lock);
+
+       /* check again under the dso->lock */
+       if (dso__loaded(dso, map->type)) {
+               ret = 1;
+               goto out;
+       }
 
-       if (dso->kernel == DSO_TYPE_KERNEL)
-               return dso__load_kernel_sym(dso, map, filter);
-       else if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
-               return dso__load_guest_kernel_sym(dso, map, filter);
+       if (dso->kernel) {
+               if (dso->kernel == DSO_TYPE_KERNEL)
+                       ret = dso__load_kernel_sym(dso, map, filter);
+               else if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
+                       ret = dso__load_guest_kernel_sym(dso, map, filter);
+
+               goto out;
+       }
 
        if (map->groups && map->groups->machine)
                machine = map->groups->machine;
@@ -1398,18 +1417,18 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
                struct stat st;
 
                if (lstat(dso->name, &st) < 0)
-                       return -1;
+                       goto out;
 
                if (st.st_uid && (st.st_uid != geteuid())) {
                        pr_warning("File %s not owned by current user or root, "
                                "ignoring it.\n", dso->name);
-                       return -1;
+                       goto out;
                }
 
                ret = dso__load_perf_map(dso, map, filter);
                dso->symtab_type = ret > 0 ? DSO_BINARY_TYPE__JAVA_JIT :
                                             DSO_BINARY_TYPE__NOT_FOUND;
-               return ret;
+               goto out;
        }
 
        if (machine)
@@ -1417,7 +1436,7 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
 
        name = malloc(PATH_MAX);
        if (!name)
-               return -1;
+               goto out;
 
        kmod = dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE ||
                dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP ||
@@ -1498,23 +1517,32 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
 out_free:
        free(name);
        if (ret < 0 && strstr(dso->name, " (deleted)") != NULL)
-               return 0;
+               ret = 0;
+out:
+       dso__set_loaded(dso, map->type);
+       pthread_mutex_unlock(&dso->lock);
+
        return ret;
 }
 
 struct map *map_groups__find_by_name(struct map_groups *mg,
                                     enum map_type type, const char *name)
 {
-       struct rb_node *nd;
+       struct maps *maps = &mg->maps[type];
+       struct map *map;
 
-       for (nd = rb_first(&mg->maps[type]); nd; nd = rb_next(nd)) {
-               struct map *map = rb_entry(nd, struct map, rb_node);
+       pthread_rwlock_rdlock(&maps->lock);
 
+       for (map = maps__first(maps); map; map = map__next(map)) {
                if (map->dso && strcmp(map->dso->short_name, name) == 0)
-                       return map;
+                       goto out_unlock;
        }
 
-       return NULL;
+       map = NULL;
+
+out_unlock:
+       pthread_rwlock_unlock(&maps->lock);
+       return map;
 }
 
 int dso__load_vmlinux(struct dso *dso, struct map *map,
@@ -1802,6 +1830,7 @@ static void vmlinux_path__exit(void)
 {
        while (--vmlinux_path__nr_entries >= 0)
                zfree(&vmlinux_path[vmlinux_path__nr_entries]);
+       vmlinux_path__nr_entries = 0;
 
        zfree(&vmlinux_path);
 }
index 0956150..bef47ea 100644 (file)
@@ -158,8 +158,6 @@ struct ref_reloc_sym {
 struct map_symbol {
        struct map    *map;
        struct symbol *sym;
-       bool          unfolded;
-       bool          has_children;
 };
 
 struct addr_map_symbol {
@@ -303,4 +301,14 @@ int setup_list(struct strlist **list, const char *list_str,
 int setup_intlist(struct intlist **list, const char *list_str,
                  const char *list_name);
 
+#ifdef HAVE_LIBELF_SUPPORT
+bool elf__needs_adjust_symbols(GElf_Ehdr ehdr);
+void arch__elf_sym_adjust(GElf_Sym *sym);
+#endif
+
+#define SYMBOL_A 0
+#define SYMBOL_B 1
+
+int arch__choose_best_symbol(struct symbol *syma, struct symbol *symb);
+
 #endif /* __PERF_SYMBOL */
index 9ed59a4..679688e 100644 (file)
@@ -219,7 +219,7 @@ static int thread_stack__call_return(struct thread *thread,
        return crp->process(&cr, crp->data);
 }
 
-static int thread_stack__flush(struct thread *thread, struct thread_stack *ts)
+static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts)
 {
        struct call_return_processor *crp = ts->crp;
        int err;
@@ -242,6 +242,14 @@ static int thread_stack__flush(struct thread *thread, struct thread_stack *ts)
        return 0;
 }
 
+int thread_stack__flush(struct thread *thread)
+{
+       if (thread->ts)
+               return __thread_stack__flush(thread, thread->ts);
+
+       return 0;
+}
+
 int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
                        u64 to_ip, u16 insn_len, u64 trace_nr)
 {
@@ -264,7 +272,7 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
         */
        if (trace_nr != thread->ts->trace_nr) {
                if (thread->ts->trace_nr)
-                       thread_stack__flush(thread, thread->ts);
+                       __thread_stack__flush(thread, thread->ts);
                thread->ts->trace_nr = trace_nr;
        }
 
@@ -297,7 +305,7 @@ void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr)
 
        if (trace_nr != thread->ts->trace_nr) {
                if (thread->ts->trace_nr)
-                       thread_stack__flush(thread, thread->ts);
+                       __thread_stack__flush(thread, thread->ts);
                thread->ts->trace_nr = trace_nr;
        }
 }
@@ -305,7 +313,7 @@ void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr)
 void thread_stack__free(struct thread *thread)
 {
        if (thread->ts) {
-               thread_stack__flush(thread, thread->ts);
+               __thread_stack__flush(thread, thread->ts);
                zfree(&thread->ts->stack);
                zfree(&thread->ts);
        }
@@ -689,7 +697,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
 
        /* Flush stack on exec */
        if (ts->comm != comm && thread->pid_ == thread->tid) {
-               err = thread_stack__flush(thread, ts);
+               err = __thread_stack__flush(thread, ts);
                if (err)
                        return err;
                ts->comm = comm;
index b843bbe..e1528f1 100644 (file)
@@ -96,6 +96,7 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
 void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr);
 void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
                          size_t sz, u64 ip);
+int thread_stack__flush(struct thread *thread);
 void thread_stack__free(struct thread *thread);
 
 struct call_return_processor *
index 1c8fbc9..28c4b74 100644 (file)
@@ -18,7 +18,7 @@ int thread__init_map_groups(struct thread *thread, struct machine *machine)
        if (pid == thread->tid || pid == -1) {
                thread->mg = map_groups__new(machine);
        } else {
-               leader = machine__findnew_thread(machine, pid, pid);
+               leader = __machine__findnew_thread(machine, pid, pid);
                if (leader)
                        thread->mg = map_groups__get(leader->mg);
        }
@@ -53,7 +53,8 @@ struct thread *thread__new(pid_t pid, pid_t tid)
                        goto err_thread;
 
                list_add(&comm->list, &thread->comm_list);
-
+               atomic_set(&thread->refcnt, 0);
+               RB_CLEAR_NODE(&thread->rb_node);
        }
 
        return thread;
@@ -67,6 +68,8 @@ void thread__delete(struct thread *thread)
 {
        struct comm *comm, *tmp;
 
+       BUG_ON(!RB_EMPTY_NODE(&thread->rb_node));
+
        thread_stack__free(thread);
 
        if (thread->mg) {
@@ -84,13 +87,14 @@ void thread__delete(struct thread *thread)
 
 struct thread *thread__get(struct thread *thread)
 {
-       ++thread->refcnt;
+       if (thread)
+               atomic_inc(&thread->refcnt);
        return thread;
 }
 
 void thread__put(struct thread *thread)
 {
-       if (thread && --thread->refcnt == 0) {
+       if (thread && atomic_dec_and_test(&thread->refcnt)) {
                list_del_init(&thread->node);
                thread__delete(thread);
        }
index 9b8a54d..a0ac031 100644 (file)
@@ -1,6 +1,7 @@
 #ifndef __PERF_THREAD_H
 #define __PERF_THREAD_H
 
+#include <linux/atomic.h>
 #include <linux/rbtree.h>
 #include <linux/list.h>
 #include <unistd.h>
@@ -21,12 +22,12 @@ struct thread {
        pid_t                   tid;
        pid_t                   ppid;
        int                     cpu;
-       int                     refcnt;
+       atomic_t                refcnt;
        char                    shortname[3];
        bool                    comm_set;
+       int                     comm_len;
        bool                    dead; /* if set thread has exited */
        struct list_head        comm_list;
-       int                     comm_len;
        u64                     db_id;
 
        void                    *priv;
index f93b973..f4822bd 100644 (file)
@@ -20,6 +20,15 @@ static int filter(const struct dirent *dir)
                return 1;
 }
 
+static struct thread_map *thread_map__realloc(struct thread_map *map, int nr)
+{
+       size_t size = sizeof(*map) + sizeof(pid_t) * nr;
+
+       return realloc(map, size);
+}
+
+#define thread_map__alloc(__nr) thread_map__realloc(NULL, __nr)
+
 struct thread_map *thread_map__new_by_pid(pid_t pid)
 {
        struct thread_map *threads;
@@ -33,7 +42,7 @@ struct thread_map *thread_map__new_by_pid(pid_t pid)
        if (items <= 0)
                return NULL;
 
-       threads = malloc(sizeof(*threads) + sizeof(pid_t) * items);
+       threads = thread_map__alloc(items);
        if (threads != NULL) {
                for (i = 0; i < items; i++)
                        threads->map[i] = atoi(namelist[i]->d_name);
@@ -49,7 +58,7 @@ struct thread_map *thread_map__new_by_pid(pid_t pid)
 
 struct thread_map *thread_map__new_by_tid(pid_t tid)
 {
-       struct thread_map *threads = malloc(sizeof(*threads) + sizeof(pid_t));
+       struct thread_map *threads = thread_map__alloc(1);
 
        if (threads != NULL) {
                threads->map[0] = tid;
@@ -65,8 +74,8 @@ struct thread_map *thread_map__new_by_uid(uid_t uid)
        int max_threads = 32, items, i;
        char path[256];
        struct dirent dirent, *next, **namelist = NULL;
-       struct thread_map *threads = malloc(sizeof(*threads) +
-                                           max_threads * sizeof(pid_t));
+       struct thread_map *threads = thread_map__alloc(max_threads);
+
        if (threads == NULL)
                goto out;
 
@@ -185,8 +194,7 @@ static struct thread_map *thread_map__new_by_pid_str(const char *pid_str)
                        goto out_free_threads;
 
                total_tasks += items;
-               nt = realloc(threads, (sizeof(*threads) +
-                                      sizeof(pid_t) * total_tasks));
+               nt = thread_map__realloc(threads, total_tasks);
                if (nt == NULL)
                        goto out_free_namelist;
 
@@ -216,7 +224,7 @@ out_free_threads:
 
 struct thread_map *thread_map__new_dummy(void)
 {
-       struct thread_map *threads = malloc(sizeof(*threads) + sizeof(pid_t));
+       struct thread_map *threads = thread_map__alloc(1);
 
        if (threads != NULL) {
                threads->map[0] = -1;
@@ -253,7 +261,7 @@ static struct thread_map *thread_map__new_by_tid_str(const char *tid_str)
                        continue;
 
                ntasks++;
-               nt = realloc(threads, sizeof(*threads) + sizeof(pid_t) * ntasks);
+               nt = thread_map__realloc(threads, ntasks);
 
                if (nt == NULL)
                        goto out_free_threads;
index 51d9e56..c307dd4 100644 (file)
@@ -3,6 +3,8 @@
 
 #include <stdbool.h>
 
+#include <linux/types.h>
+
 struct perf_session;
 union perf_event;
 struct perf_evlist;
@@ -29,6 +31,9 @@ typedef int (*event_op2)(struct perf_tool *tool, union perf_event *event,
 typedef int (*event_oe)(struct perf_tool *tool, union perf_event *event,
                        struct ordered_events *oe);
 
+typedef s64 (*event_op3)(struct perf_tool *tool, union perf_event *event,
+                        struct perf_session *session);
+
 struct perf_tool {
        event_sample    sample,
                        read;
@@ -38,13 +43,19 @@ struct perf_tool {
                        fork,
                        exit,
                        lost,
+                       lost_samples,
+                       aux,
+                       itrace_start,
                        throttle,
                        unthrottle;
        event_attr_op   attr;
        event_op2       tracing_data;
        event_oe        finished_round;
        event_op2       build_id,
-                       id_index;
+                       id_index,
+                       auxtrace_info,
+                       auxtrace_error;
+       event_op3       auxtrace;
        bool            ordered_events;
        bool            ordering_requires_timestamps;
 };
index 25d6c73..d495741 100644 (file)
@@ -173,7 +173,7 @@ void parse_ftrace_printk(struct pevent *pevent,
        char *line;
        char *next = NULL;
        char *addr_str;
-       char *fmt;
+       char *fmt = NULL;
 
        line = strtok_r(file, "\n", &next);
        while (line) {
index 7b09a44..4c00507 100644 (file)
@@ -269,13 +269,14 @@ static int read_unwind_spec_eh_frame(struct dso *dso, struct machine *machine,
        u64 offset = dso->data.eh_frame_hdr_offset;
 
        if (offset == 0) {
-               fd = dso__data_fd(dso, machine);
+               fd = dso__data_get_fd(dso, machine);
                if (fd < 0)
                        return -EINVAL;
 
                /* Check the .eh_frame section for unwinding info */
                offset = elf_section_offset(fd, ".eh_frame_hdr");
                dso->data.eh_frame_hdr_offset = offset;
+               dso__data_put_fd(dso);
        }
 
        if (offset)
@@ -294,13 +295,14 @@ static int read_unwind_spec_debug_frame(struct dso *dso,
        u64 ofs = dso->data.debug_frame_offset;
 
        if (ofs == 0) {
-               fd = dso__data_fd(dso, machine);
+               fd = dso__data_get_fd(dso, machine);
                if (fd < 0)
                        return -EINVAL;
 
                /* Check the .debug_frame section for unwinding info */
                ofs = elf_section_offset(fd, ".debug_frame");
                dso->data.debug_frame_offset = ofs;
+               dso__data_put_fd(dso);
        }
 
        *offset = ofs;
@@ -353,10 +355,13 @@ find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi,
 #ifndef NO_LIBUNWIND_DEBUG_FRAME
        /* Check the .debug_frame section for unwinding info */
        if (!read_unwind_spec_debug_frame(map->dso, ui->machine, &segbase)) {
-               int fd = dso__data_fd(map->dso, ui->machine);
+               int fd = dso__data_get_fd(map->dso, ui->machine);
                int is_exec = elf_is_exec(fd, map->dso->name);
                unw_word_t base = is_exec ? 0 : map->start;
 
+               if (fd >= 0)
+                       dso__data_put_fd(map->dso);
+
                memset(&di, 0, sizeof(di));
                if (dwarf_find_debug_frame(0, &di, ip, base, map->dso->name,
                                           map->start, map->end))
index 4ee6d0d..edc2d63 100644 (file)
@@ -72,20 +72,60 @@ int mkdir_p(char *path, mode_t mode)
        return (stat(path, &st) && mkdir(path, mode)) ? -1 : 0;
 }
 
-static int slow_copyfile(const char *from, const char *to, mode_t mode)
+int rm_rf(char *path)
+{
+       DIR *dir;
+       int ret = 0;
+       struct dirent *d;
+       char namebuf[PATH_MAX];
+
+       dir = opendir(path);
+       if (dir == NULL)
+               return 0;
+
+       while ((d = readdir(dir)) != NULL && !ret) {
+               struct stat statbuf;
+
+               if (!strcmp(d->d_name, ".") || !strcmp(d->d_name, ".."))
+                       continue;
+
+               scnprintf(namebuf, sizeof(namebuf), "%s/%s",
+                         path, d->d_name);
+
+               ret = stat(namebuf, &statbuf);
+               if (ret < 0) {
+                       pr_debug("stat failed: %s\n", namebuf);
+                       break;
+               }
+
+               if (S_ISREG(statbuf.st_mode))
+                       ret = unlink(namebuf);
+               else if (S_ISDIR(statbuf.st_mode))
+                       ret = rm_rf(namebuf);
+               else {
+                       pr_debug("unknown file: %s\n", namebuf);
+                       ret = -1;
+               }
+       }
+       closedir(dir);
+
+       if (ret < 0)
+               return ret;
+
+       return rmdir(path);
+}
+
+static int slow_copyfile(const char *from, const char *to)
 {
        int err = -1;
        char *line = NULL;
        size_t n;
        FILE *from_fp = fopen(from, "r"), *to_fp;
-       mode_t old_umask;
 
        if (from_fp == NULL)
                goto out;
 
-       old_umask = umask(mode ^ 0777);
        to_fp = fopen(to, "w");
-       umask(old_umask);
        if (to_fp == NULL)
                goto out_fclose_from;
 
@@ -102,42 +142,81 @@ out:
        return err;
 }
 
+int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size)
+{
+       void *ptr;
+       loff_t pgoff;
+
+       pgoff = off_in & ~(page_size - 1);
+       off_in -= pgoff;
+
+       ptr = mmap(NULL, off_in + size, PROT_READ, MAP_PRIVATE, ifd, pgoff);
+       if (ptr == MAP_FAILED)
+               return -1;
+
+       while (size) {
+               ssize_t ret = pwrite(ofd, ptr + off_in, size, off_out);
+               if (ret < 0 && errno == EINTR)
+                       continue;
+               if (ret <= 0)
+                       break;
+
+               size -= ret;
+               off_in += ret;
+               off_out -= ret;
+       }
+       munmap(ptr, off_in + size);
+
+       return size ? -1 : 0;
+}
+
 int copyfile_mode(const char *from, const char *to, mode_t mode)
 {
        int fromfd, tofd;
        struct stat st;
-       void *addr;
        int err = -1;
+       char *tmp = NULL, *ptr = NULL;
 
        if (stat(from, &st))
                goto out;
 
-       if (st.st_size == 0) /* /proc? do it slowly... */
-               return slow_copyfile(from, to, mode);
-
-       fromfd = open(from, O_RDONLY);
-       if (fromfd < 0)
+       /* extra 'x' at the end is to reserve space for '.' */
+       if (asprintf(&tmp, "%s.XXXXXXx", to) < 0) {
+               tmp = NULL;
                goto out;
+       }
+       ptr = strrchr(tmp, '/');
+       if (!ptr)
+               goto out;
+       ptr = memmove(ptr + 1, ptr, strlen(ptr) - 1);
+       *ptr = '.';
 
-       tofd = creat(to, mode);
+       tofd = mkstemp(tmp);
        if (tofd < 0)
-               goto out_close_from;
+               goto out;
+
+       if (fchmod(tofd, mode))
+               goto out_close_to;
+
+       if (st.st_size == 0) { /* /proc? do it slowly... */
+               err = slow_copyfile(from, tmp);
+               goto out_close_to;
+       }
 
-       addr = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fromfd, 0);
-       if (addr == MAP_FAILED)
+       fromfd = open(from, O_RDONLY);
+       if (fromfd < 0)
                goto out_close_to;
 
-       if (write(tofd, addr, st.st_size) == st.st_size)
-               err = 0;
+       err = copyfile_offset(fromfd, 0, tofd, 0, st.st_size);
 
-       munmap(addr, st.st_size);
+       close(fromfd);
 out_close_to:
        close(tofd);
-       if (err)
-               unlink(to);
-out_close_from:
-       close(fromfd);
+       if (!err)
+               err = link(tmp, to);
+       unlink(tmp);
 out:
+       free(tmp);
        return err;
 }
 
index 1ff23e0..8bce58b 100644 (file)
@@ -249,14 +249,20 @@ static inline int sane_case(int x, int high)
 }
 
 int mkdir_p(char *path, mode_t mode);
+int rm_rf(char *path);
 int copyfile(const char *from, const char *to);
 int copyfile_mode(const char *from, const char *to, mode_t mode);
+int copyfile_offset(int fromfd, loff_t from_ofs, int tofd, loff_t to_ofs, u64 size);
 
 s64 perf_atoll(const char *str);
 char **argv_split(const char *str, int *argcp);
 void argv_free(char **argv);
 bool strglobmatch(const char *str, const char *pat);
 bool strlazymatch(const char *str, const char *pat);
+static inline bool strisglob(const char *str)
+{
+       return strpbrk(str, "*?[") != NULL;
+}
 int strtailcmp(const char *s1, const char *s2);
 char *strxfrchar(char *s, char from, char to);
 unsigned long convert_unit(unsigned long value, char *unit);
index 5c7dd79..4b89118 100644 (file)
@@ -101,7 +101,7 @@ static char *get_file(struct vdso_file *vdso_file)
        return vdso;
 }
 
-void vdso__exit(struct machine *machine)
+void machine__exit_vdso(struct machine *machine)
 {
        struct vdso_info *vdso_info = machine->vdso_info;
 
@@ -120,14 +120,14 @@ void vdso__exit(struct machine *machine)
        zfree(&machine->vdso_info);
 }
 
-static struct dso *vdso__new(struct machine *machine, const char *short_name,
-                            const char *long_name)
+static struct dso *__machine__addnew_vdso(struct machine *machine, const char *short_name,
+                                         const char *long_name)
 {
        struct dso *dso;
 
        dso = dso__new(short_name);
        if (dso != NULL) {
-               dsos__add(&machine->user_dsos, dso);
+               __dsos__add(&machine->dsos, dso);
                dso__set_long_name(dso, long_name, false);
        }
 
@@ -230,27 +230,31 @@ static const char *vdso__get_compat_file(struct vdso_file *vdso_file)
        return vdso_file->temp_file_name;
 }
 
-static struct dso *vdso__findnew_compat(struct machine *machine,
-                                       struct vdso_file *vdso_file)
+static struct dso *__machine__findnew_compat(struct machine *machine,
+                                            struct vdso_file *vdso_file)
 {
        const char *file_name;
        struct dso *dso;
 
-       dso = dsos__find(&machine->user_dsos, vdso_file->dso_name, true);
+       pthread_rwlock_wrlock(&machine->dsos.lock);
+       dso = __dsos__find(&machine->dsos, vdso_file->dso_name, true);
        if (dso)
-               return dso;
+               goto out_unlock;
 
        file_name = vdso__get_compat_file(vdso_file);
        if (!file_name)
-               return NULL;
+               goto out_unlock;
 
-       return vdso__new(machine, vdso_file->dso_name, file_name);
+       dso = __machine__addnew_vdso(machine, vdso_file->dso_name, file_name);
+out_unlock:
+       pthread_rwlock_unlock(&machine->dsos.lock);
+       return dso;
 }
 
-static int vdso__dso_findnew_compat(struct machine *machine,
-                                   struct thread *thread,
-                                   struct vdso_info *vdso_info,
-                                   struct dso **dso)
+static int __machine__findnew_vdso_compat(struct machine *machine,
+                                         struct thread *thread,
+                                         struct vdso_info *vdso_info,
+                                         struct dso **dso)
 {
        enum dso_type dso_type;
 
@@ -267,10 +271,10 @@ static int vdso__dso_findnew_compat(struct machine *machine,
 
        switch (dso_type) {
        case DSO__TYPE_32BIT:
-               *dso = vdso__findnew_compat(machine, &vdso_info->vdso32);
+               *dso = __machine__findnew_compat(machine, &vdso_info->vdso32);
                return 1;
        case DSO__TYPE_X32BIT:
-               *dso = vdso__findnew_compat(machine, &vdso_info->vdsox32);
+               *dso = __machine__findnew_compat(machine, &vdso_info->vdsox32);
                return 1;
        case DSO__TYPE_UNKNOWN:
        case DSO__TYPE_64BIT:
@@ -281,35 +285,37 @@ static int vdso__dso_findnew_compat(struct machine *machine,
 
 #endif
 
-struct dso *vdso__dso_findnew(struct machine *machine,
-                             struct thread *thread __maybe_unused)
+struct dso *machine__findnew_vdso(struct machine *machine,
+                                 struct thread *thread __maybe_unused)
 {
        struct vdso_info *vdso_info;
-       struct dso *dso;
+       struct dso *dso = NULL;
 
+       pthread_rwlock_wrlock(&machine->dsos.lock);
        if (!machine->vdso_info)
                machine->vdso_info = vdso_info__new();
 
        vdso_info = machine->vdso_info;
        if (!vdso_info)
-               return NULL;
+               goto out_unlock;
 
 #if BITS_PER_LONG == 64
-       if (vdso__dso_findnew_compat(machine, thread, vdso_info, &dso))
-               return dso;
+       if (__machine__findnew_vdso_compat(machine, thread, vdso_info, &dso))
+               goto out_unlock;
 #endif
 
-       dso = dsos__find(&machine->user_dsos, DSO__NAME_VDSO, true);
+       dso = __dsos__find(&machine->dsos, DSO__NAME_VDSO, true);
        if (!dso) {
                char *file;
 
                file = get_file(&vdso_info->vdso);
-               if (!file)
-                       return NULL;
-
-               dso = vdso__new(machine, DSO__NAME_VDSO, file);
+               if (file)
+                       dso = __machine__addnew_vdso(machine, DSO__NAME_VDSO, file);
        }
 
+out_unlock:
+       dso__get(dso);
+       pthread_rwlock_unlock(&machine->dsos.lock);
        return dso;
 }
 
index d97da16..cdc4fab 100644 (file)
@@ -23,7 +23,7 @@ bool dso__is_vdso(struct dso *dso);
 struct machine;
 struct thread;
 
-struct dso *vdso__dso_findnew(struct machine *machine, struct thread *thread);
-void vdso__exit(struct machine *machine);
+struct dso *machine__findnew_vdso(struct machine *machine, struct thread *thread);
+void machine__exit_vdso(struct machine *machine);
 
 #endif /* __PERF_VDSO__ */
index 22afbf6..c10ba41 100644 (file)
@@ -9,11 +9,19 @@ struct xyarray *xyarray__new(int xlen, int ylen, size_t entry_size)
        if (xy != NULL) {
                xy->entry_size = entry_size;
                xy->row_size   = row_size;
+               xy->entries    = xlen * ylen;
        }
 
        return xy;
 }
 
+void xyarray__reset(struct xyarray *xy)
+{
+       size_t n = xy->entries * xy->entry_size;
+
+       memset(xy->contents, 0, n);
+}
+
 void xyarray__delete(struct xyarray *xy)
 {
        free(xy);
index c488a07..7f30af3 100644 (file)
@@ -6,11 +6,13 @@
 struct xyarray {
        size_t row_size;
        size_t entry_size;
+       size_t entries;
        char contents[];
 };
 
 struct xyarray *xyarray__new(int xlen, int ylen, size_t entry_size);
 void xyarray__delete(struct xyarray *xy);
+void xyarray__reset(struct xyarray *xy);
 
 static inline void *xyarray__entry(struct xyarray *xy, int x, int y)
 {
index bac98ca..323b65e 100644 (file)
@@ -52,6 +52,7 @@ unsigned int skip_c0;
 unsigned int skip_c1;
 unsigned int do_nhm_cstates;
 unsigned int do_snb_cstates;
+unsigned int do_knl_cstates;
 unsigned int do_pc2;
 unsigned int do_pc3;
 unsigned int do_pc6;
@@ -91,6 +92,7 @@ unsigned int do_gfx_perf_limit_reasons;
 unsigned int do_ring_perf_limit_reasons;
 unsigned int crystal_hz;
 unsigned long long tsc_hz;
+int base_cpu;
 
 #define RAPL_PKG               (1 << 0)
                                        /* 0x610 MSR_PKG_POWER_LIMIT */
@@ -316,7 +318,7 @@ void print_header(void)
 
        if (do_nhm_cstates)
                outp += sprintf(outp, "  CPU%%c1");
-       if (do_nhm_cstates && !do_slm_cstates)
+       if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates)
                outp += sprintf(outp, "  CPU%%c3");
        if (do_nhm_cstates)
                outp += sprintf(outp, "  CPU%%c6");
@@ -546,7 +548,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
        if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
                goto done;
 
-       if (do_nhm_cstates && !do_slm_cstates)
+       if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates)
                outp += sprintf(outp, "%8.2f", 100.0 * c->c3/t->tsc);
        if (do_nhm_cstates)
                outp += sprintf(outp, "%8.2f", 100.0 * c->c6/t->tsc);
@@ -1018,14 +1020,17 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
        if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
                return 0;
 
-       if (do_nhm_cstates && !do_slm_cstates) {
+       if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates) {
                if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
                        return -6;
        }
 
-       if (do_nhm_cstates) {
+       if (do_nhm_cstates && !do_knl_cstates) {
                if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
                        return -7;
+       } else if (do_knl_cstates) {
+               if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6))
+                       return -7;
        }
 
        if (do_snb_cstates)
@@ -1150,7 +1155,7 @@ dump_nhm_platform_info(void)
        unsigned long long msr;
        unsigned int ratio;
 
-       get_msr(0, MSR_NHM_PLATFORM_INFO, &msr);
+       get_msr(base_cpu, MSR_NHM_PLATFORM_INFO, &msr);
 
        fprintf(stderr, "cpu0: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", msr);
 
@@ -1162,7 +1167,7 @@ dump_nhm_platform_info(void)
        fprintf(stderr, "%d * %.0f = %.0f MHz base frequency\n",
                ratio, bclk, ratio * bclk);
 
-       get_msr(0, MSR_IA32_POWER_CTL, &msr);
+       get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
        fprintf(stderr, "cpu0: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
                msr, msr & 0x2 ? "EN" : "DIS");
 
@@ -1175,7 +1180,7 @@ dump_hsw_turbo_ratio_limits(void)
        unsigned long long msr;
        unsigned int ratio;
 
-       get_msr(0, MSR_TURBO_RATIO_LIMIT2, &msr);
+       get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr);
 
        fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", msr);
 
@@ -1197,7 +1202,7 @@ dump_ivt_turbo_ratio_limits(void)
        unsigned long long msr;
        unsigned int ratio;
 
-       get_msr(0, MSR_TURBO_RATIO_LIMIT1, &msr);
+       get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr);
 
        fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", msr);
 
@@ -1249,7 +1254,7 @@ dump_nhm_turbo_ratio_limits(void)
        unsigned long long msr;
        unsigned int ratio;
 
-       get_msr(0, MSR_TURBO_RATIO_LIMIT, &msr);
+       get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
 
        fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", msr);
 
@@ -1296,11 +1301,72 @@ dump_nhm_turbo_ratio_limits(void)
 }
 
 static void
+dump_knl_turbo_ratio_limits(void)
+{
+       int cores;
+       unsigned int ratio;
+       unsigned long long msr;
+       int delta_cores;
+       int delta_ratio;
+       int i;
+
+       get_msr(base_cpu, MSR_NHM_TURBO_RATIO_LIMIT, &msr);
+
+       fprintf(stderr, "cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n",
+       msr);
+
+       /**
+        * Turbo encoding in KNL is as follows:
+        * [7:0] -- Base value of number of active cores of bucket 1.
+        * [15:8] -- Base value of freq ratio of bucket 1.
+        * [20:16] -- +ve delta of number of active cores of bucket 2.
+        * i.e. active cores of bucket 2 =
+        * active cores of bucket 1 + delta
+        * [23:21] -- Negative delta of freq ratio of bucket 2.
+        * i.e. freq ratio of bucket 2 =
+        * freq ratio of bucket 1 - delta
+        * [28:24]-- +ve delta of number of active cores of bucket 3.
+        * [31:29]-- -ve delta of freq ratio of bucket 3.
+        * [36:32]-- +ve delta of number of active cores of bucket 4.
+        * [39:37]-- -ve delta of freq ratio of bucket 4.
+        * [44:40]-- +ve delta of number of active cores of bucket 5.
+        * [47:45]-- -ve delta of freq ratio of bucket 5.
+        * [52:48]-- +ve delta of number of active cores of bucket 6.
+        * [55:53]-- -ve delta of freq ratio of bucket 6.
+        * [60:56]-- +ve delta of number of active cores of bucket 7.
+        * [63:61]-- -ve delta of freq ratio of bucket 7.
+        */
+       cores = msr & 0xFF;
+       ratio = (msr >> 8) && 0xFF;
+       if (ratio > 0)
+               fprintf(stderr,
+                       "%d * %.0f = %.0f MHz max turbo %d active cores\n",
+                       ratio, bclk, ratio * bclk, cores);
+
+       for (i = 16; i < 64; i = i + 8) {
+               delta_cores = (msr >> i) & 0x1F;
+               delta_ratio = (msr >> (i + 5)) && 0x7;
+               if (!delta_cores || !delta_ratio)
+                       return;
+               cores = cores + delta_cores;
+               ratio = ratio - delta_ratio;
+
+               /** -ve ratios will make successive ratio calculations
+                * negative. Hence return instead of carrying on.
+                */
+               if (ratio > 0)
+                       fprintf(stderr,
+                               "%d * %.0f = %.0f MHz max turbo %d active cores\n",
+                               ratio, bclk, ratio * bclk, cores);
+       }
+}
+
+static void
 dump_nhm_cst_cfg(void)
 {
        unsigned long long msr;
 
-       get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
+       get_msr(base_cpu, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
 
 #define SNB_C1_AUTO_UNDEMOTE              (1UL << 27)
 #define SNB_C3_AUTO_UNDEMOTE              (1UL << 28)
@@ -1381,12 +1447,41 @@ int parse_int_file(const char *fmt, ...)
 }
 
 /*
- * cpu_is_first_sibling_in_core(cpu)
- * return 1 if given CPU is 1st HT sibling in the core
+ * get_cpu_position_in_core(cpu)
+ * return the position of the CPU among its HT siblings in the core
+ * return -1 if the sibling is not in list
  */
-int cpu_is_first_sibling_in_core(int cpu)
+int get_cpu_position_in_core(int cpu)
 {
-       return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu);
+       char path[64];
+       FILE *filep;
+       int this_cpu;
+       char character;
+       int i;
+
+       sprintf(path,
+               "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list",
+               cpu);
+       filep = fopen(path, "r");
+       if (filep == NULL) {
+               perror(path);
+               exit(1);
+       }
+
+       for (i = 0; i < topo.num_threads_per_core; i++) {
+               fscanf(filep, "%d", &this_cpu);
+               if (this_cpu == cpu) {
+                       fclose(filep);
+                       return i;
+               }
+
+               /* Account for no separator after last thread*/
+               if (i != (topo.num_threads_per_core - 1))
+                       fscanf(filep, "%c", &character);
+       }
+
+       fclose(filep);
+       return -1;
 }
 
 /*
@@ -1412,25 +1507,31 @@ int get_num_ht_siblings(int cpu)
 {
        char path[80];
        FILE *filep;
-       int sib1, sib2;
-       int matches;
+       int sib1;
+       int matches = 0;
        char character;
+       char str[100];
+       char *ch;
 
        sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu);
        filep = fopen_or_die(path, "r");
+
        /*
         * file format:
-        * if a pair of number with a character between: 2 siblings (eg. 1-2, or 1,4)
-        * otherwinse 1 sibling (self).
+        * A ',' separated or '-' separated set of numbers
+        * (eg 1-2 or 1,3,4,5)
         */
-       matches = fscanf(filep, "%d%c%d\n", &sib1, &character, &sib2);
+       fscanf(filep, "%d%c\n", &sib1, &character);
+       fseek(filep, 0, SEEK_SET);
+       fgets(str, 100, filep);
+       ch = strchr(str, character);
+       while (ch != NULL) {
+               matches++;
+               ch = strchr(ch+1, character);
+       }
 
        fclose(filep);
-
-       if (matches == 3)
-               return 2;
-       else
-               return 1;
+       return matches+1;
 }
 
 /*
@@ -1594,8 +1695,10 @@ restart:
 void check_dev_msr()
 {
        struct stat sb;
+       char pathname[32];
 
-       if (stat("/dev/cpu/0/msr", &sb))
+       sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
+       if (stat(pathname, &sb))
                if (system("/sbin/modprobe msr > /dev/null 2>&1"))
                        err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
 }
@@ -1608,6 +1711,7 @@ void check_permissions()
        cap_user_data_t cap_data = &cap_data_data;
        extern int capget(cap_user_header_t hdrp, cap_user_data_t datap);
        int do_exit = 0;
+       char pathname[32];
 
        /* check for CAP_SYS_RAWIO */
        cap_header->pid = getpid();
@@ -1622,7 +1726,8 @@ void check_permissions()
        }
 
        /* test file permissions */
-       if (euidaccess("/dev/cpu/0/msr", R_OK)) {
+       sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
+       if (euidaccess(pathname, R_OK)) {
                do_exit++;
                warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr");
        }
@@ -1704,7 +1809,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
        default:
                return 0;
        }
-       get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
+       get_msr(base_cpu, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
 
        pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
 
@@ -1753,6 +1858,21 @@ int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model)
        }
 }
 
+int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model)
+{
+       if (!genuine_intel)
+               return 0;
+
+       if (family != 6)
+               return 0;
+
+       switch (model) {
+       case 0x57:      /* Knights Landing */
+               return 1;
+       default:
+               return 0;
+       }
+}
 static void
 dump_cstate_pstate_config_info(family, model)
 {
@@ -1770,6 +1890,9 @@ dump_cstate_pstate_config_info(family, model)
        if (has_nhm_turbo_ratio_limit(family, model))
                dump_nhm_turbo_ratio_limits();
 
+       if (has_knl_turbo_ratio_limit(family, model))
+               dump_knl_turbo_ratio_limits();
+
        dump_nhm_cst_cfg();
 }
 
@@ -1801,7 +1924,7 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
        if (get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr))
                return 0;
 
-       switch (msr & 0x7) {
+       switch (msr & 0xF) {
        case ENERGY_PERF_BIAS_PERFORMANCE:
                epb_string = "performance";
                break;
@@ -1925,7 +2048,7 @@ double get_tdp(model)
        unsigned long long msr;
 
        if (do_rapl & RAPL_PKG_POWER_INFO)
-               if (!get_msr(0, MSR_PKG_POWER_INFO, &msr))
+               if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr))
                        return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
 
        switch (model) {
@@ -1950,6 +2073,7 @@ rapl_dram_energy_units_probe(int  model, double rapl_energy_units)
        case 0x3F:      /* HSX */
        case 0x4F:      /* BDX */
        case 0x56:      /* BDX-DE */
+       case 0x57:      /* KNL */
                return (rapl_dram_energy_units = 15.3 / 1000000);
        default:
                return (rapl_energy_units);
@@ -1991,6 +2115,7 @@ void rapl_probe(unsigned int family, unsigned int model)
        case 0x3F:      /* HSX */
        case 0x4F:      /* BDX */
        case 0x56:      /* BDX-DE */
+       case 0x57:      /* KNL */
                do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
                break;
        case 0x2D:
@@ -2006,7 +2131,7 @@ void rapl_probe(unsigned int family, unsigned int model)
        }
 
        /* units on package 0, verify later other packages match */
-       if (get_msr(0, MSR_RAPL_POWER_UNIT, &msr))
+       if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr))
                return;
 
        rapl_power_units = 1.0 / (1 << (msr & 0xF));
@@ -2331,6 +2456,17 @@ int is_slm(unsigned int family, unsigned int model)
        return 0;
 }
 
+int is_knl(unsigned int family, unsigned int model)
+{
+       if (!genuine_intel)
+               return 0;
+       switch (model) {
+       case 0x57:      /* KNL */
+               return 1;
+       }
+       return 0;
+}
+
 #define SLM_BCLK_FREQS 5
 double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0};
 
@@ -2340,7 +2476,7 @@ double slm_bclk(void)
        unsigned int i;
        double freq;
 
-       if (get_msr(0, MSR_FSB_FREQ, &msr))
+       if (get_msr(base_cpu, MSR_FSB_FREQ, &msr))
                fprintf(stderr, "SLM BCLK: unknown\n");
 
        i = msr & 0xf;
@@ -2408,7 +2544,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
        if (!do_nhm_platform_info)
                goto guess;
 
-       if (get_msr(0, MSR_IA32_TEMPERATURE_TARGET, &msr))
+       if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
                goto guess;
 
        target_c_local = (msr >> 16) & 0xFF;
@@ -2541,6 +2677,7 @@ void process_cpuid()
        do_c8_c9_c10 = has_hsw_msrs(family, model);
        do_skl_residency = has_skl_msrs(family, model);
        do_slm_cstates = is_slm(family, model);
+       do_knl_cstates  = is_knl(family, model);
        bclk = discover_bclk(family, model);
 
        rapl_probe(family, model);
@@ -2755,13 +2892,9 @@ int initialize_counters(int cpu_id)
 
        my_package_id = get_physical_package_id(cpu_id);
        my_core_id = get_core_id(cpu_id);
-
-       if (cpu_is_first_sibling_in_core(cpu_id)) {
-               my_thread_id = 0;
+       my_thread_id = get_cpu_position_in_core(cpu_id);
+       if (!my_thread_id)
                topo.num_cores++;
-       } else {
-               my_thread_id = 1;
-       }
 
        init_counter(EVEN_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id);
        init_counter(ODD_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id);
@@ -2785,13 +2918,24 @@ void setup_all_buffers(void)
        for_all_proc_cpus(initialize_counters);
 }
 
+void set_base_cpu(void)
+{
+       base_cpu = sched_getcpu();
+       if (base_cpu < 0)
+               err(-ENODEV, "No valid cpus found");
+
+       if (debug > 1)
+               fprintf(stderr, "base_cpu = %d\n", base_cpu);
+}
+
 void turbostat_init()
 {
+       setup_all_buffers();
+       set_base_cpu();
        check_dev_msr();
        check_permissions();
        process_cpuid();
 
-       setup_all_buffers();
 
        if (debug)
                for_all_cpus(print_epb, ODD_COUNTERS);
@@ -2870,7 +3014,7 @@ int get_and_dump_counters(void)
 }
 
 void print_version() {
-       fprintf(stderr, "turbostat version 4.5 2 Apr, 2015"
+       fprintf(stderr, "turbostat version 4.7 27-May, 2015"
                " - Len Brown <lenb@kernel.org>\n");
 }
 
index 15f1a17..3f81a10 100755 (executable)
@@ -66,7 +66,7 @@ make $buildloc $TORTURE_DEFCONFIG > $builddir/Make.defconfig.out 2>&1
 mv $builddir/.config $builddir/.config.sav
 sh $T/upd.sh < $builddir/.config.sav > $builddir/.config
 cp $builddir/.config $builddir/.config.new
-yes '' | make $buildloc oldconfig > $builddir/Make.modconfig.out 2>&1
+yes '' | make $buildloc oldconfig > $builddir/Make.oldconfig.out 2> $builddir/Make.oldconfig.err
 
 # verify new config matches specification.
 configcheck.sh $builddir/.config $c
index 4f5b20f..d86bdd6 100755 (executable)
@@ -43,6 +43,10 @@ do
                if test -f "$i/console.log"
                then
                        configcheck.sh $i/.config $i/ConfigFragment
+                       if test -r $i/Make.oldconfig.err
+                       then
+                               cat $i/Make.oldconfig.err
+                       fi
                        parse-build.sh $i/Make.out $configfile
                        parse-torture.sh $i/console.log $configfile
                        parse-console.sh $i/console.log $configfile
index dd2812c..fbe2dbf 100755 (executable)
@@ -55,7 +55,7 @@ usage () {
        echo "       --bootargs kernel-boot-arguments"
        echo "       --bootimage relative-path-to-kernel-boot-image"
        echo "       --buildonly"
-       echo "       --configs \"config-file list\""
+       echo "       --configs \"config-file list w/ repeat factor (3*TINY01)\""
        echo "       --cpus N"
        echo "       --datestamp string"
        echo "       --defconfig string"
@@ -178,13 +178,26 @@ fi
 touch $T/cfgcpu
 for CF in $configs
 do
-       if test -f "$CONFIGFRAG/$CF"
+       case $CF in
+       [0-9]\**|[0-9][0-9]\**|[0-9][0-9][0-9]\**)
+               config_reps=`echo $CF | sed -e 's/\*.*$//'`
+               CF1=`echo $CF | sed -e 's/^[^*]*\*//'`
+               ;;
+       *)
+               config_reps=1
+               CF1=$CF
+               ;;
+       esac
+       if test -f "$CONFIGFRAG/$CF1"
        then
-               cpu_count=`configNR_CPUS.sh $CONFIGFRAG/$CF`
-               cpu_count=`configfrag_boot_cpus "$TORTURE_BOOTARGS" "$CONFIGFRAG/$CF" "$cpu_count"`
-               echo $CF $cpu_count >> $T/cfgcpu
+               cpu_count=`configNR_CPUS.sh $CONFIGFRAG/$CF1`
+               cpu_count=`configfrag_boot_cpus "$TORTURE_BOOTARGS" "$CONFIGFRAG/$CF1" "$cpu_count"`
+               for ((cur_rep=0;cur_rep<$config_reps;cur_rep++))
+               do
+                       echo $CF1 $cpu_count >> $T/cfgcpu
+               done
        else
-               echo "The --configs file $CF does not exist, terminating."
+               echo "The --configs file $CF1 does not exist, terminating."
                exit 1
        fi
 done
index 4970121..f824b4c 100644 (file)
@@ -1,3 +1,5 @@
 CONFIG_RCU_TORTURE_TEST=y
 CONFIG_PRINTK_TIME=y
+CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y
 CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y
+CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y
index 9fbb41b..1a087c3 100644 (file)
@@ -5,3 +5,4 @@ CONFIG_HOTPLUG_CPU=y
 CONFIG_PREEMPT_NONE=y
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=n
+CONFIG_RCU_EXPERT=y
index 4b6f272..4837430 100644 (file)
@@ -5,3 +5,4 @@ CONFIG_HOTPLUG_CPU=y
 CONFIG_PREEMPT_NONE=n
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=y
+#CHECK#CONFIG_RCU_EXPERT=n
index 97f0a0b..2cc0e60 100644 (file)
@@ -5,5 +5,6 @@ CONFIG_PREEMPT_NONE=n
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=y
 CONFIG_DEBUG_LOCK_ALLOC=y
-CONFIG_PROVE_RCU=y
-CONFIG_TASKS_RCU=y
+CONFIG_PROVE_LOCKING=n
+#CHECK#CONFIG_PROVE_RCU=n
+CONFIG_RCU_EXPERT=y
index 696d2ea..ad2be91 100644 (file)
@@ -2,4 +2,3 @@ CONFIG_SMP=n
 CONFIG_PREEMPT_NONE=y
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=n
-CONFIG_TASKS_RCU=y
index 9c60da5..c70c51d 100644 (file)
@@ -6,8 +6,8 @@ CONFIG_HIBERNATION=n
 CONFIG_PREEMPT_NONE=n
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=y
-CONFIG_TASKS_RCU=y
 CONFIG_HZ_PERIODIC=n
 CONFIG_NO_HZ_IDLE=n
 CONFIG_NO_HZ_FULL=y
 CONFIG_NO_HZ_FULL_ALL=y
+#CHECK#CONFIG_RCU_EXPERT=n
index 36e41df..f1892e0 100644 (file)
@@ -8,7 +8,7 @@ CONFIG_NO_HZ_IDLE=n
 CONFIG_NO_HZ_FULL=n
 CONFIG_RCU_TRACE=y
 CONFIG_PROVE_LOCKING=y
-CONFIG_PROVE_RCU=y
+#CHECK#CONFIG_PROVE_RCU=y
 CONFIG_DEBUG_LOCK_ALLOC=y
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
 CONFIG_PREEMPT_COUNT=y
index f8a10a7..8e9137f 100644 (file)
@@ -16,3 +16,4 @@ CONFIG_DEBUG_LOCK_ALLOC=n
 CONFIG_RCU_CPU_STALL_INFO=n
 CONFIG_RCU_BOOST=n
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
index 629122f..aeea6a2 100644 (file)
@@ -14,10 +14,10 @@ CONFIG_SUSPEND=n
 CONFIG_HIBERNATION=n
 CONFIG_RCU_FANOUT=3
 CONFIG_RCU_FANOUT_LEAF=3
-CONFIG_RCU_FANOUT_EXACT=n
 CONFIG_RCU_NOCB_CPU=n
 CONFIG_DEBUG_LOCK_ALLOC=y
 CONFIG_PROVE_LOCKING=n
 CONFIG_RCU_CPU_STALL_INFO=n
 CONFIG_RCU_BOOST=n
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
index a25de47..2ac9e68 100644 (file)
@@ -14,7 +14,6 @@ CONFIG_SUSPEND=n
 CONFIG_HIBERNATION=n
 CONFIG_RCU_FANOUT=3
 CONFIG_RCU_FANOUT_LEAF=3
-CONFIG_RCU_FANOUT_EXACT=n
 CONFIG_RCU_NOCB_CPU=n
 CONFIG_DEBUG_LOCK_ALLOC=y
 CONFIG_PROVE_LOCKING=n
index 53f24e0..72aa7d8 100644 (file)
@@ -1,5 +1,5 @@
 CONFIG_SMP=y
-CONFIG_NR_CPUS=8
+CONFIG_NR_CPUS=16
 CONFIG_PREEMPT_NONE=n
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=y
@@ -9,12 +9,12 @@ CONFIG_NO_HZ_IDLE=n
 CONFIG_NO_HZ_FULL=n
 CONFIG_RCU_TRACE=y
 CONFIG_HOTPLUG_CPU=y
-CONFIG_RCU_FANOUT=4
-CONFIG_RCU_FANOUT_LEAF=4
-CONFIG_RCU_FANOUT_EXACT=n
+CONFIG_RCU_FANOUT=2
+CONFIG_RCU_FANOUT_LEAF=2
 CONFIG_RCU_NOCB_CPU=n
 CONFIG_DEBUG_LOCK_ALLOC=n
 CONFIG_RCU_CPU_STALL_INFO=n
 CONFIG_RCU_BOOST=y
 CONFIG_RCU_KTHREAD_PRIO=2
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
new file mode 100644 (file)
index 0000000..120c0c8
--- /dev/null
@@ -0,0 +1 @@
+rcutorture.onoff_interval=1 rcutorture.onoff_holdoff=30
index 0f84db3..3f51127 100644 (file)
@@ -13,10 +13,10 @@ CONFIG_RCU_TRACE=y
 CONFIG_HOTPLUG_CPU=n
 CONFIG_SUSPEND=n
 CONFIG_HIBERNATION=n
-CONFIG_RCU_FANOUT=2
-CONFIG_RCU_FANOUT_LEAF=2
-CONFIG_RCU_FANOUT_EXACT=n
+CONFIG_RCU_FANOUT=4
+CONFIG_RCU_FANOUT_LEAF=4
 CONFIG_RCU_NOCB_CPU=n
 CONFIG_DEBUG_LOCK_ALLOC=n
-CONFIG_RCU_CPU_STALL_INFO=y
+CONFIG_RCU_CPU_STALL_INFO=n
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
index 212e3bf..c04dfea 100644 (file)
@@ -12,11 +12,11 @@ CONFIG_RCU_TRACE=n
 CONFIG_HOTPLUG_CPU=y
 CONFIG_RCU_FANOUT=6
 CONFIG_RCU_FANOUT_LEAF=6
-CONFIG_RCU_FANOUT_EXACT=n
 CONFIG_RCU_NOCB_CPU=y
 CONFIG_RCU_NOCB_CPU_NONE=y
 CONFIG_DEBUG_LOCK_ALLOC=y
 CONFIG_PROVE_LOCKING=y
-CONFIG_PROVE_RCU=y
+#CHECK#CONFIG_PROVE_RCU=y
 CONFIG_RCU_CPU_STALL_INFO=n
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
index 7eee63b..f51d2c7 100644 (file)
@@ -14,10 +14,10 @@ CONFIG_SUSPEND=n
 CONFIG_HIBERNATION=n
 CONFIG_RCU_FANOUT=6
 CONFIG_RCU_FANOUT_LEAF=6
-CONFIG_RCU_FANOUT_EXACT=y
 CONFIG_RCU_NOCB_CPU=n
 CONFIG_DEBUG_LOCK_ALLOC=y
 CONFIG_PROVE_LOCKING=y
-CONFIG_PROVE_RCU=y
+#CHECK#CONFIG_PROVE_RCU=y
 CONFIG_RCU_CPU_STALL_INFO=n
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
+CONFIG_RCU_EXPERT=y
index da9a03a..dd90f28 100644 (file)
@@ -1,3 +1,4 @@
 rcupdate.rcu_self_test=1
 rcupdate.rcu_self_test_bh=1
 rcupdate.rcu_self_test_sched=1
+rcutree.rcu_fanout_exact=1
index 92a97fa..f422af4 100644 (file)
@@ -15,8 +15,8 @@ CONFIG_RCU_TRACE=y
 CONFIG_HOTPLUG_CPU=y
 CONFIG_RCU_FANOUT=2
 CONFIG_RCU_FANOUT_LEAF=2
-CONFIG_RCU_FANOUT_EXACT=n
 CONFIG_RCU_NOCB_CPU=n
 CONFIG_DEBUG_LOCK_ALLOC=n
-CONFIG_RCU_CPU_STALL_INFO=y
+CONFIG_RCU_CPU_STALL_INFO=n
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
index 5812027..a24d2ca 100644 (file)
@@ -1,5 +1,5 @@
 CONFIG_SMP=y
-CONFIG_NR_CPUS=16
+CONFIG_NR_CPUS=8
 CONFIG_PREEMPT_NONE=n
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=y
@@ -13,13 +13,13 @@ CONFIG_HOTPLUG_CPU=n
 CONFIG_SUSPEND=n
 CONFIG_HIBERNATION=n
 CONFIG_RCU_FANOUT=3
-CONFIG_RCU_FANOUT_EXACT=y
 CONFIG_RCU_FANOUT_LEAF=2
 CONFIG_RCU_NOCB_CPU=y
 CONFIG_RCU_NOCB_CPU_ALL=y
 CONFIG_DEBUG_LOCK_ALLOC=n
 CONFIG_PROVE_LOCKING=y
-CONFIG_PROVE_RCU=y
+#CHECK#CONFIG_PROVE_RCU=y
 CONFIG_RCU_CPU_STALL_INFO=n
 CONFIG_RCU_BOOST=n
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
index 3eaecca..b2b8cea 100644 (file)
@@ -13,7 +13,6 @@ CONFIG_HOTPLUG_CPU=n
 CONFIG_SUSPEND=n
 CONFIG_HIBERNATION=n
 CONFIG_RCU_FANOUT=3
-CONFIG_RCU_FANOUT_EXACT=y
 CONFIG_RCU_FANOUT_LEAF=2
 CONFIG_RCU_NOCB_CPU=y
 CONFIG_RCU_NOCB_CPU_ALL=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T.boot
new file mode 100644 (file)
index 0000000..883149b
--- /dev/null
@@ -0,0 +1 @@
+rcutree.rcu_fanout_exact=1
index 2561daf..fb066dc 100644 (file)
@@ -1,3 +1,4 @@
 rcutorture.torture_type=sched
 rcupdate.rcu_self_test=1
 rcupdate.rcu_self_test_sched=1
+rcutree.rcu_fanout_exact=1
index 6076b36..aa4ed08 100644 (file)
@@ -16,3 +16,4 @@ CONFIG_DEBUG_LOCK_ALLOC=n
 CONFIG_RCU_CPU_STALL_INFO=n
 CONFIG_RCU_BOOST=n
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+#CHECK#CONFIG_RCU_EXPERT=n
index ec03c88..b24c000 100644 (file)
@@ -12,13 +12,12 @@ CONFIG_NO_HZ_IDLE -- Do those not otherwise specified. (Groups of two.)
 CONFIG_NO_HZ_FULL -- Do two, one with CONFIG_NO_HZ_FULL_SYSIDLE.
 CONFIG_NO_HZ_FULL_SYSIDLE -- Do one.
 CONFIG_PREEMPT -- Do half.  (First three and #8.)
-CONFIG_PROVE_LOCKING -- Do all but two, covering CONFIG_PROVE_RCU and not.
-CONFIG_PROVE_RCU -- Do all but one under CONFIG_PROVE_LOCKING.
+CONFIG_PROVE_LOCKING -- Do several, covering CONFIG_DEBUG_LOCK_ALLOC=y and not.
+CONFIG_PROVE_RCU -- Hardwired to CONFIG_PROVE_LOCKING.
 CONFIG_RCU_BOOST -- one of PREEMPT_RCU.
 CONFIG_RCU_KTHREAD_PRIO -- set to 2 for _BOOST testing.
-CONFIG_RCU_CPU_STALL_INFO -- Do one.
-CONFIG_RCU_FANOUT -- Cover hierarchy as currently, but overlap with others.
-CONFIG_RCU_FANOUT_EXACT -- Do one.
+CONFIG_RCU_CPU_STALL_INFO -- Now default, avoid at least twice.
+CONFIG_RCU_FANOUT -- Cover hierarchy, but overlap with others.
 CONFIG_RCU_FANOUT_LEAF -- Do one non-default.
 CONFIG_RCU_FAST_NO_HZ -- Do one, but not with CONFIG_RCU_NOCB_CPU_ALL.
 CONFIG_RCU_NOCB_CPU -- Do three, see below.
@@ -27,28 +26,19 @@ CONFIG_RCU_NOCB_CPU_NONE -- Do one.
 CONFIG_RCU_NOCB_CPU_ZERO -- Do one.
 CONFIG_RCU_TRACE -- Do half.
 CONFIG_SMP -- Need one !SMP for PREEMPT_RCU.
+!RCU_EXPERT -- Do a few, but these have to be vanilla configurations.
 RCU-bh: Do one with PREEMPT and one with !PREEMPT.
 RCU-sched: Do one with PREEMPT but not BOOST.
 
 
-Hierarchy:
-
-TREE01.        CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=8, CONFIG_RCU_FANOUT_EXACT=n.
-TREE02.        CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=3, CONFIG_RCU_FANOUT_EXACT=n,
-       CONFIG_RCU_FANOUT_LEAF=3.
-TREE03.        CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=4, CONFIG_RCU_FANOUT_EXACT=n,
-       CONFIG_RCU_FANOUT_LEAF=4.
-TREE04.        CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=2, CONFIG_RCU_FANOUT_EXACT=n,
-       CONFIG_RCU_FANOUT_LEAF=2.
-TREE05.        CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=6, CONFIG_RCU_FANOUT_EXACT=n
-       CONFIG_RCU_FANOUT_LEAF=6.
-TREE06.        CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=6, CONFIG_RCU_FANOUT_EXACT=y
-       CONFIG_RCU_FANOUT_LEAF=6.
-TREE07.        CONFIG_NR_CPUS=16, CONFIG_RCU_FANOUT=2, CONFIG_RCU_FANOUT_EXACT=n,
-       CONFIG_RCU_FANOUT_LEAF=2.
-TREE08.        CONFIG_NR_CPUS=16, CONFIG_RCU_FANOUT=3, CONFIG_RCU_FANOUT_EXACT=y,
-       CONFIG_RCU_FANOUT_LEAF=2.
-TREE09.        CONFIG_NR_CPUS=1.
+Boot parameters:
+
+nohz_full - do at least one.
+maxcpu -- do at least one.
+rcupdate.rcu_self_test_bh -- Do at least one each, offloaded and not.
+rcupdate.rcu_self_test_sched -- Do at least one each, offloaded and not.
+rcupdate.rcu_self_test -- Do at least one each, offloaded and not.
+rcutree.rcu_fanout_exact -- Do at least one.
 
 
 Kconfig Parameters Ignored:
index ddf6356..9b0d8ba 100644 (file)
@@ -1,48 +1,59 @@
-.PHONY: all all_32 all_64 check_build32 clean run_tests
+all:
+
+include ../lib.mk
+
+.PHONY: all all_32 all_64 warn_32bit_failure clean
 
 TARGETS_C_BOTHBITS := sigreturn single_step_syscall
+TARGETS_C_32BIT_ONLY := entry_from_vm86
 
-BINARIES_32 := $(TARGETS_C_BOTHBITS:%=%_32)
+TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY)
+BINARIES_32 := $(TARGETS_C_32BIT_ALL:%=%_32)
 BINARIES_64 := $(TARGETS_C_BOTHBITS:%=%_64)
 
 CFLAGS := -O2 -g -std=gnu99 -pthread -Wall
 
-UNAME_P := $(shell uname -p)
+UNAME_M := $(shell uname -m)
+CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32)
+CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c)
 
-# Always build 32-bit tests
+ifeq ($(CAN_BUILD_I386),1)
 all: all_32
+TEST_PROGS += $(BINARIES_32)
+endif
 
-# If we're on a 64-bit host, build 64-bit tests as well
-ifeq ($(shell uname -p),x86_64)
+ifeq ($(CAN_BUILD_X86_64),1)
 all: all_64
+TEST_PROGS += $(BINARIES_64)
 endif
 
-all_32: check_build32 $(BINARIES_32)
+all_32: $(BINARIES_32)
 
 all_64: $(BINARIES_64)
 
 clean:
        $(RM) $(BINARIES_32) $(BINARIES_64)
 
-run_tests:
-       ./run_x86_tests.sh
-
-$(TARGETS_C_BOTHBITS:%=%_32): %_32: %.c
+$(TARGETS_C_32BIT_ALL:%=%_32): %_32: %.c
        $(CC) -m32 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl
 
 $(TARGETS_C_BOTHBITS:%=%_64): %_64: %.c
        $(CC) -m64 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl
 
-check_build32:
-       @if ! $(CC) -m32 -o /dev/null trivial_32bit_program.c; then     \
-         echo "Warning: you seem to have a broken 32-bit build" 2>&1;  \
-         echo "environment.  If you are using a Debian-like";          \
-         echo " distribution, try:";                                   \
-         echo "";                                                      \
-         echo "  apt-get install gcc-multilib libc6-i386 libc6-dev-i386"; \
-         echo "";                                                      \
-         echo "If you are using a Fedora-like distribution, try:";     \
-         echo "";                                                      \
-         echo "  yum install glibc-devel.*i686";                       \
-         exit 1;                                                       \
-       fi
+# x86_64 users should be encouraged to install 32-bit libraries
+ifeq ($(CAN_BUILD_I386)$(CAN_BUILD_X86_64),01)
+all: warn_32bit_failure
+
+warn_32bit_failure:
+       @echo "Warning: you seem to have a broken 32-bit build" 2>&1;   \
+       echo "environment.  This will reduce test coverage of 64-bit" 2>&1; \
+       echo "kernels.  If you are using a Debian-like distribution," 2>&1; \
+       echo "try:"; 2>&1; \
+       echo "";                                                        \
+       echo "  apt-get install gcc-multilib libc6-i386 libc6-dev-i386"; \
+       echo "";                                                        \
+       echo "If you are using a Fedora-like distribution, try:";       \
+       echo "";                                                        \
+       echo "  yum install glibc-devel.*i686";                         \
+       exit 0;
+endif
diff --git a/tools/testing/selftests/x86/check_cc.sh b/tools/testing/selftests/x86/check_cc.sh
new file mode 100755 (executable)
index 0000000..172d329
--- /dev/null
@@ -0,0 +1,16 @@
+#!/bin/sh
+# check_cc.sh - Helper to test userspace compilation support
+# Copyright (c) 2015 Andrew Lutomirski
+# GPL v2
+
+CC="$1"
+TESTPROG="$2"
+shift 2
+
+if "$CC" -o /dev/null "$TESTPROG" -O0 "$@" 2>/dev/null; then
+    echo 1
+else
+    echo 0
+fi
+
+exit 0
diff --git a/tools/testing/selftests/x86/entry_from_vm86.c b/tools/testing/selftests/x86/entry_from_vm86.c
new file mode 100644 (file)
index 0000000..5c38a18
--- /dev/null
@@ -0,0 +1,114 @@
+/*
+ * entry_from_vm86.c - tests kernel entries from vm86 mode
+ * Copyright (c) 2014-2015 Andrew Lutomirski
+ *
+ * This exercises a few paths that need to special-case vm86 mode.
+ *
+ * GPL v2.
+ */
+
+#define _GNU_SOURCE
+
+#include <assert.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <sys/signal.h>
+#include <sys/ucontext.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <sys/mman.h>
+#include <err.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <sys/vm86.h>
+
+static unsigned long load_addr = 0x10000;
+static int nerrs = 0;
+
+asm (
+       ".pushsection .rodata\n\t"
+       ".type vmcode_bound, @object\n\t"
+       "vmcode:\n\t"
+       "vmcode_bound:\n\t"
+       ".code16\n\t"
+       "bound %ax, (2048)\n\t"
+       "int3\n\t"
+       "vmcode_sysenter:\n\t"
+       "sysenter\n\t"
+       ".size vmcode, . - vmcode\n\t"
+       "end_vmcode:\n\t"
+       ".code32\n\t"
+       ".popsection"
+       );
+
+extern unsigned char vmcode[], end_vmcode[];
+extern unsigned char vmcode_bound[], vmcode_sysenter[];
+
+static void do_test(struct vm86plus_struct *v86, unsigned long eip,
+                   const char *text)
+{
+       long ret;
+
+       printf("[RUN]\t%s from vm86 mode\n", text);
+       v86->regs.eip = eip;
+       ret = vm86(VM86_ENTER, v86);
+
+       if (ret == -1 && errno == ENOSYS) {
+               printf("[SKIP]\tvm86 not supported\n");
+               return;
+       }
+
+       if (VM86_TYPE(ret) == VM86_INTx) {
+               char trapname[32];
+               int trapno = VM86_ARG(ret);
+               if (trapno == 13)
+                       strcpy(trapname, "GP");
+               else if (trapno == 5)
+                       strcpy(trapname, "BR");
+               else if (trapno == 14)
+                       strcpy(trapname, "PF");
+               else
+                       sprintf(trapname, "%d", trapno);
+
+               printf("[OK]\tExited vm86 mode due to #%s\n", trapname);
+       } else if (VM86_TYPE(ret) == VM86_UNKNOWN) {
+               printf("[OK]\tExited vm86 mode due to unhandled GP fault\n");
+       } else {
+               printf("[OK]\tExited vm86 mode due to type %ld, arg %ld\n",
+                      VM86_TYPE(ret), VM86_ARG(ret));
+       }
+}
+
+int main(void)
+{
+       struct vm86plus_struct v86;
+       unsigned char *addr = mmap((void *)load_addr, 4096,
+                                  PROT_READ | PROT_WRITE | PROT_EXEC,
+                                  MAP_ANONYMOUS | MAP_PRIVATE, -1,0);
+       if (addr != (unsigned char *)load_addr)
+               err(1, "mmap");
+
+       memcpy(addr, vmcode, end_vmcode - vmcode);
+       addr[2048] = 2;
+       addr[2050] = 3;
+
+       memset(&v86, 0, sizeof(v86));
+
+       v86.regs.cs = load_addr / 16;
+       v86.regs.ss = load_addr / 16;
+       v86.regs.ds = load_addr / 16;
+       v86.regs.es = load_addr / 16;
+
+       assert((v86.regs.cs & 3) == 0); /* Looks like RPL = 0 */
+
+       /* #BR -- should deliver SIG??? */
+       do_test(&v86, vmcode_bound - vmcode, "#BR");
+
+       /* SYSENTER -- should cause #GP or #UD depending on CPU */
+       do_test(&v86, vmcode_sysenter - vmcode, "SYSENTER");
+
+       return (nerrs == 0 ? 0 : 1);
+}
diff --git a/tools/testing/selftests/x86/run_x86_tests.sh b/tools/testing/selftests/x86/run_x86_tests.sh
deleted file mode 100644 (file)
index 3fc19b3..0000000
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/bin/bash
-
-# This is deliberately minimal.  IMO kselftests should provide a standard
-# script here.
-./sigreturn_32 || exit 1
-./single_step_syscall_32 || exit 1
-
-if [[ "$uname -p" -eq "x86_64" ]]; then
-    ./sigreturn_64 || exit 1
-    ./single_step_syscall_64 || exit 1
-fi
-
-exit 0
index 2e231be..fabdf0f 100644 (file)
@@ -4,6 +4,10 @@
  * GPL v2
  */
 
+#ifndef __i386__
+# error wrong architecture
+#endif
+
 #include <stdio.h>
 
 int main()
diff --git a/tools/testing/selftests/x86/trivial_64bit_program.c b/tools/testing/selftests/x86/trivial_64bit_program.c
new file mode 100644 (file)
index 0000000..b994946
--- /dev/null
@@ -0,0 +1,18 @@
+/*
+ * Trivial program to check that we have a valid 32-bit build environment.
+ * Copyright (c) 2015 Andy Lutomirski
+ * GPL v2
+ */
+
+#ifndef __x86_64__
+# error wrong architecture
+#endif
+
+#include <stdio.h>
+
+int main()
+{
+       printf("\n");
+
+       return 0;
+}
index 0788621..2e83dd3 100644 (file)
@@ -12,10 +12,6 @@ TARGET=tmon
 INSTALL_PROGRAM=install -m 755 -p
 DEL_FILE=rm -f
 
-INSTALL_CONFIGFILE=install -m 644 -p
-CONFIG_FILE=
-CONFIG_PATH=
-
 # Static builds might require -ltinfo, for instance
 ifneq ($(findstring -static, $(LDFLAGS)),)
 STATIC := --static
@@ -38,13 +34,9 @@ valgrind: tmon
 install:
        - mkdir -p $(INSTALL_ROOT)/$(BINDIR)
        - $(INSTALL_PROGRAM) "$(TARGET)" "$(INSTALL_ROOT)/$(BINDIR)/$(TARGET)"
-       - mkdir -p $(INSTALL_ROOT)/$(CONFIG_PATH)
-       - $(INSTALL_CONFIGFILE) "$(CONFIG_FILE)" "$(INSTALL_ROOT)/$(CONFIG_PATH)"
 
 uninstall:
        $(DEL_FILE) "$(INSTALL_ROOT)/$(BINDIR)/$(TARGET)"
-       $(CONFIG_FILE) "$(CONFIG_PATH)"
-
 
 clean:
        find . -name "*.o" | xargs $(DEL_FILE)
index ac884b6..93aadaf 100644 (file)
@@ -3,7 +3,7 @@
 TARGETS=page-types slabinfo page_owner_sort
 
 LIB_DIR = ../lib/api
-LIBS = $(LIB_DIR)/libapikfs.a
+LIBS = $(LIB_DIR)/libapi.a
 
 CC = $(CROSS_COMPILE)gcc
 CFLAGS = -Wall -Wextra -I../lib/