OSDN Git Service

Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
authorDavid S. Miller <davem@davemloft.net>
Sun, 17 Feb 2019 06:56:34 +0000 (22:56 -0800)
committerDavid S. Miller <davem@davemloft.net>
Sun, 17 Feb 2019 06:56:34 +0000 (22:56 -0800)
Alexei Starovoitov says:

====================
pull-request: bpf-next 2019-02-16

The following pull-request contains BPF updates for your *net-next* tree.

The main changes are:

1) numerous libbpf API improvements, from Andrii, Andrey, Yonghong.

2) test all bpf progs in alu32 mode, from Jiong.

3) skb->sk access and bpf_sk_fullsock(), bpf_tcp_sock() helpers, from Martin.

4) support for IP encap in lwt bpf progs, from Peter.

5) remove XDP_QUERY_XSK_UMEM dead code, from Jan.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
932 files changed:
Documentation/ABI/stable/sysfs-driver-mlxreg-io
Documentation/admin-guide/kernel-parameters.txt
Documentation/devicetree/bindings/Makefile
Documentation/devicetree/bindings/net/macb.txt
Documentation/devicetree/bindings/ptp/ptp-qoriq.txt
Documentation/devicetree/bindings/serio/olpc,ap-sp.txt
Documentation/networking/device_drivers/freescale/dpaa2/dpio-driver.rst
Documentation/networking/devlink-info-versions.rst
Documentation/networking/devlink-params-mlxsw.txt
Documentation/networking/operstates.txt
Documentation/networking/snmp_counter.rst
Documentation/sysctl/fs.txt
Documentation/x86/resctrl_ui.txt
MAINTAINERS
Makefile
arch/alpha/include/asm/irq.h
arch/alpha/mm/fault.c
arch/arm/boot/dts/am335x-shc.dts
arch/arm/boot/dts/da850.dtsi
arch/arm/boot/dts/imx6q-pistachio.dts
arch/arm/boot/dts/imx6sll-evk.dts
arch/arm/boot/dts/imx6sx.dtsi
arch/arm/boot/dts/meson.dtsi
arch/arm/boot/dts/meson8b-ec100.dts
arch/arm/boot/dts/meson8b-odroidc1.dts
arch/arm/boot/dts/meson8m2-mxiii-plus.dts
arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi
arch/arm/boot/dts/omap3-gta04.dtsi
arch/arm/boot/dts/omap3-n900.dts
arch/arm/boot/dts/omap3-n950-n9.dtsi
arch/arm/boot/dts/omap5-l4.dtsi
arch/arm/boot/dts/r8a7743.dtsi
arch/arm/boot/dts/sun6i-a31.dtsi
arch/arm/boot/dts/sun8i-h3-beelink-x2.dts
arch/arm/boot/dts/vf610-bk4.dts
arch/arm/mach-cns3xxx/pcie.c
arch/arm/mach-iop32x/n2100.c
arch/arm/mach-tango/pm.c
arch/arm/mach-tango/pm.h [new file with mode: 0644]
arch/arm/mach-tango/setup.c
arch/arm/plat-pxa/ssp.c
arch/arm/xen/mm.c
arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts
arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi
arch/arm64/boot/dts/amlogic/meson-gx-p23x-q20x.dtsi
arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts
arch/arm64/boot/dts/amlogic/meson-gxbb-nexbox-a95x.dts
arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts
arch/arm64/boot/dts/amlogic/meson-gxbb-p20x.dtsi
arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi
arch/arm64/boot/dts/amlogic/meson-gxbb-wetek.dtsi
arch/arm64/boot/dts/amlogic/meson-gxl-s905x-hwacom-amazetv.dts
arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts
arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts
arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi
arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts
arch/arm64/boot/dts/amlogic/meson-gxm-nexbox-a1.dts
arch/arm64/boot/dts/amlogic/meson-gxm-rbox-pro.dts
arch/arm64/boot/dts/qcom/msm8996.dtsi
arch/arm64/boot/dts/renesas/r8a774a1.dtsi
arch/arm64/boot/dts/renesas/r8a7796.dtsi
arch/arm64/boot/dts/renesas/r8a77965.dtsi
arch/arm64/kernel/hibernate.c
arch/arm64/kernel/hyp-stub.S
arch/arm64/kernel/kaslr.c
arch/arm64/kernel/machine_kexec_file.c
arch/arm64/kernel/probes/kprobes.c
arch/arm64/mm/dump.c
arch/arm64/mm/flush.c
arch/c6x/include/asm/Kbuild
arch/c6x/include/uapi/asm/Kbuild
arch/h8300/include/asm/Kbuild
arch/h8300/include/uapi/asm/Kbuild
arch/hexagon/include/asm/Kbuild
arch/hexagon/include/uapi/asm/Kbuild
arch/m68k/emu/nfblock.c
arch/m68k/include/asm/Kbuild
arch/m68k/include/uapi/asm/Kbuild
arch/microblaze/include/asm/Kbuild
arch/microblaze/include/uapi/asm/Kbuild
arch/mips/Kconfig
arch/mips/boot/dts/ingenic/ci20.dts
arch/mips/boot/dts/ingenic/jz4740.dtsi
arch/mips/boot/dts/xilfpga/nexys4ddr.dts
arch/mips/include/asm/atomic.h
arch/mips/include/asm/barrier.h
arch/mips/include/asm/bitops.h
arch/mips/include/asm/futex.h
arch/mips/include/asm/pgtable.h
arch/mips/kernel/mips-cm.c
arch/mips/kernel/process.c
arch/mips/loongson64/Platform
arch/mips/loongson64/common/reset.c
arch/mips/mm/tlbex.c
arch/mips/pci/pci-octeon.c
arch/mips/vdso/Makefile
arch/openrisc/include/asm/Kbuild
arch/openrisc/include/uapi/asm/Kbuild
arch/powerpc/include/asm/book3s/64/pgtable.h
arch/powerpc/mm/pgtable-book3s64.c
arch/powerpc/platforms/pseries/papr_scm.c
arch/riscv/Kconfig
arch/riscv/configs/defconfig
arch/riscv/include/asm/page.h
arch/riscv/include/asm/pgtable-bits.h
arch/riscv/include/asm/pgtable.h
arch/riscv/include/asm/processor.h
arch/riscv/kernel/asm-offsets.c
arch/riscv/kernel/entry.S
arch/riscv/kernel/setup.c
arch/riscv/kernel/smpboot.c
arch/riscv/kernel/vmlinux.lds.S
arch/riscv/mm/init.c
arch/s390/kernel/swsusp.S
arch/s390/pci/pci.c
arch/unicore32/include/asm/Kbuild
arch/unicore32/include/uapi/asm/Kbuild
arch/x86/Kconfig
arch/x86/boot/compressed/head_64.S
arch/x86/boot/compressed/pgtable.h
arch/x86/events/intel/core.c
arch/x86/events/intel/uncore_snbep.c
arch/x86/include/asm/intel-family.h
arch/x86/include/asm/page_64_types.h
arch/x86/include/asm/pgtable.h
arch/x86/include/asm/resctrl_sched.h
arch/x86/kernel/cpu/Makefile
arch/x86/kernel/cpu/bugs.c
arch/x86/kernel/cpu/mce/core.c
arch/x86/kernel/cpu/microcode/amd.c
arch/x86/kernel/cpu/resctrl/Makefile
arch/x86/kernel/kexec-bzimage64.c
arch/x86/kvm/vmx/nested.c
arch/x86/kvm/vmx/vmx.c
arch/x86/kvm/x86.c
arch/x86/lib/iomem.c
arch/x86/mm/fault.c
arch/x86/mm/pageattr.c
arch/xtensa/Kconfig
arch/xtensa/boot/dts/Makefile
arch/xtensa/configs/audio_kc705_defconfig
arch/xtensa/configs/cadence_csp_defconfig
arch/xtensa/configs/generic_kc705_defconfig
arch/xtensa/configs/nommu_kc705_defconfig
arch/xtensa/configs/smp_lx200_defconfig
arch/xtensa/kernel/head.S
arch/xtensa/kernel/smp.c
arch/xtensa/kernel/time.c
block/blk-core.c
block/blk-flush.c
block/blk-iolatency.c
block/blk-mq-debugfs.c
block/blk-mq.c
block/blk-mq.h
drivers/acpi/bus.c
drivers/android/binder.c
drivers/android/binder_internal.h
drivers/android/binderfs.c
drivers/ata/libata-core.c
drivers/base/cacheinfo.c
drivers/base/power/runtime.c
drivers/block/floppy.c
drivers/clk/clk.c
drivers/clk/imx/clk-frac-pll.c
drivers/clk/mmp/clk-of-mmp2.c
drivers/clk/qcom/gcc-sdm845.c
drivers/clk/ti/divider.c
drivers/cpuidle/poll_state.c
drivers/crypto/cavium/nitrox/nitrox_reqmgr.c
drivers/crypto/ccree/cc_driver.c
drivers/crypto/ccree/cc_pm.c
drivers/crypto/ccree/cc_pm.h
drivers/dma/at_xdmac.c
drivers/dma/bcm2835-dma.c
drivers/dma/dmatest.c
drivers/dma/imx-dma.c
drivers/firmware/arm_scmi/bus.c
drivers/firmware/efi/arm-runtime.c
drivers/fpga/stratix10-soc.c
drivers/gpio/gpio-altera-a10sr.c
drivers/gpio/gpio-eic-sprd.c
drivers/gpio/gpio-pcf857x.c
drivers/gpio/gpio-vf610.c
drivers/gpio/gpiolib.c
drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
drivers/gpu/drm/amd/amdgpu/soc15.c
drivers/gpu/drm/amd/amdkfd/kfd_crat.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c
drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
drivers/gpu/drm/drm_lease.c
drivers/gpu/drm/drm_modes.c
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_pmu.c
drivers/gpu/drm/i915/i915_pmu.h
drivers/gpu/drm/i915/i915_reg.h
drivers/gpu/drm/i915/intel_ddi.c
drivers/gpu/drm/i915/intel_display.c
drivers/gpu/drm/i915/intel_dp.c
drivers/gpu/drm/i915/intel_drv.h
drivers/gpu/drm/i915/intel_fbdev.c
drivers/gpu/drm/i915/intel_opregion.c
drivers/gpu/drm/i915/intel_ringbuffer.h
drivers/gpu/drm/i915/intel_sprite.c
drivers/gpu/drm/imx/imx-ldb.c
drivers/gpu/drm/imx/ipuv3-plane.c
drivers/gpu/drm/omapdrm/dss/dsi.c
drivers/gpu/drm/radeon/ci_dpm.c
drivers/gpu/drm/radeon/si_dpm.c
drivers/gpu/drm/rockchip/rockchip_rgb.c
drivers/gpu/drm/rockchip/rockchip_rgb.h
drivers/gpu/drm/scheduler/sched_entity.c
drivers/gpu/drm/sun4i/sun4i_tcon.c
drivers/gpu/drm/vkms/vkms_crc.c
drivers/gpu/drm/vkms/vkms_crtc.c
drivers/gpu/drm/vkms/vkms_drv.c
drivers/gpu/drm/vkms/vkms_drv.h
drivers/gpu/drm/vkms/vkms_gem.c
drivers/gpu/drm/vkms/vkms_output.c
drivers/gpu/drm/vkms/vkms_plane.c
drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
drivers/gpu/ipu-v3/ipu-common.c
drivers/gpu/ipu-v3/ipu-pre.c
drivers/hid/hid-debug.c
drivers/hwmon/nct6775.c
drivers/i2c/busses/i2c-omap.c
drivers/i3c/master.c
drivers/i3c/master/dw-i3c-master.c
drivers/ide/ide-atapi.c
drivers/ide/ide-io.c
drivers/ide/ide-park.c
drivers/ide/ide-probe.c
drivers/iio/adc/axp288_adc.c
drivers/iio/adc/ti-ads8688.c
drivers/iio/chemical/atlas-ph-sensor.c
drivers/infiniband/core/core_priv.h
drivers/infiniband/core/device.c
drivers/infiniband/core/umem_odp.c
drivers/infiniband/core/uverbs_main.c
drivers/infiniband/core/uverbs_std_types_device.c
drivers/infiniband/hw/hfi1/file_ops.c
drivers/infiniband/hw/hfi1/ud.c
drivers/infiniband/hw/hns/hns_roce_srq.c
drivers/infiniband/hw/mlx4/mad.c
drivers/infiniband/hw/mlx5/cmd.c
drivers/infiniband/hw/mlx5/cmd.h
drivers/infiniband/hw/mlx5/flow.c
drivers/infiniband/hw/mlx5/ib_rep.c
drivers/infiniband/hw/mlx5/ib_rep.h
drivers/infiniband/hw/mlx5/mad.c
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/hw/mlx5/mlx5_ib.h
drivers/infiniband/hw/mlx5/mr.c
drivers/infiniband/hw/mlx5/odp.c
drivers/infiniband/hw/mlx5/qp.c
drivers/infiniband/hw/qib/qib_ud.c
drivers/infiniband/sw/rdmavt/qp.c
drivers/infiniband/ulp/ipoib/ipoib.h
drivers/infiniband/ulp/ipoib/ipoib_cm.c
drivers/input/serio/olpc_apsp.c
drivers/iommu/amd_iommu.c
drivers/iommu/intel-iommu.c
drivers/iommu/mtk_iommu_v1.c
drivers/irqchip/irq-gic-v3-its.c
drivers/irqchip/irq-mmp.c
drivers/irqchip/irq-xtensa-mx.c
drivers/irqchip/irq-xtensa-pic.c
drivers/isdn/gigaset/ser-gigaset.c
drivers/isdn/hisax/hfc_pci.c
drivers/isdn/i4l/isdn_tty.c
drivers/isdn/i4l/isdn_v110.c
drivers/isdn/mISDN/timerdev.c
drivers/md/dm-crypt.c
drivers/md/dm-rq.c
drivers/md/dm-thin.c
drivers/md/dm.c
drivers/md/raid1.c
drivers/md/raid5-cache.c
drivers/md/raid5.c
drivers/mfd/Kconfig
drivers/misc/mei/client.c
drivers/misc/mei/hw-me-regs.h
drivers/misc/mei/pci-me.c
drivers/misc/mic/vop/vop_main.c
drivers/mmc/core/block.c
drivers/mmc/host/bcm2835.c
drivers/mmc/host/meson-gx-mmc.c
drivers/mmc/host/mtk-sd.c
drivers/mmc/host/sunxi-mmc.c
drivers/mtd/mtdpart.c
drivers/mtd/nand/raw/gpmi-nand/gpmi-lib.c
drivers/mtd/nand/raw/nand_base.c
drivers/mtd/nand/raw/nand_bbt.c
drivers/mtd/nand/spi/core.c
drivers/net/Kconfig
drivers/net/appletalk/cops.c
drivers/net/bonding/bond_options.c
drivers/net/caif/caif_spi.c
drivers/net/dsa/b53/b53_srab.c
drivers/net/dsa/bcm_sf2.c
drivers/net/dsa/bcm_sf2.h
drivers/net/dsa/bcm_sf2_cfp.c
drivers/net/dsa/bcm_sf2_regs.h
drivers/net/dsa/microchip/ksz9477.c
drivers/net/dsa/mv88e6xxx/chip.c
drivers/net/dsa/mv88e6xxx/global1_atu.c
drivers/net/ethernet/3com/3c515.c
drivers/net/ethernet/3com/3c59x.c
drivers/net/ethernet/adaptec/starfire.c
drivers/net/ethernet/amazon/ena/ena_netdev.c
drivers/net/ethernet/amazon/ena/ena_netdev.h
drivers/net/ethernet/amd/au1000_eth.c
drivers/net/ethernet/amd/lance.c
drivers/net/ethernet/amd/ni65.c
drivers/net/ethernet/apple/mace.c
drivers/net/ethernet/arc/emac_main.c
drivers/net/ethernet/atheros/atl1e/atl1e_main.c
drivers/net/ethernet/atheros/atlx/atl1.c
drivers/net/ethernet/broadcom/bcmsysport.c
drivers/net/ethernet/broadcom/bcmsysport.h
drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
drivers/net/ethernet/broadcom/bnxt/bnxt.c
drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h
drivers/net/ethernet/broadcom/sb1250-mac.c
drivers/net/ethernet/cadence/macb_main.c
drivers/net/ethernet/cavium/Kconfig
drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
drivers/net/ethernet/chelsio/cxgb4/sge.c
drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
drivers/net/ethernet/chelsio/cxgb4/t4_hw.h
drivers/net/ethernet/chelsio/cxgb4/t4_values.h
drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
drivers/net/ethernet/chelsio/cxgb4vf/sge.c
drivers/net/ethernet/cisco/enic/enic_main.c
drivers/net/ethernet/dec/tulip/de2104x.c
drivers/net/ethernet/dlink/dl2k.c
drivers/net/ethernet/dlink/sundance.c
drivers/net/ethernet/fealnx.c
drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
drivers/net/ethernet/freescale/enetc/Kconfig
drivers/net/ethernet/freescale/enetc/Makefile
drivers/net/ethernet/freescale/enetc/enetc_hw.h
drivers/net/ethernet/freescale/enetc/enetc_ptp.c [new file with mode: 0644]
drivers/net/ethernet/freescale/fec_main.c
drivers/net/ethernet/freescale/fec_mpc52xx.c
drivers/net/ethernet/freescale/gianfar_ethtool.c
drivers/net/ethernet/freescale/ucc_geth.c
drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
drivers/net/ethernet/i825xx/lib82596.c
drivers/net/ethernet/intel/fm10k/fm10k_main.c
drivers/net/ethernet/intel/igb/igb_main.c
drivers/net/ethernet/intel/igc/igc_main.c
drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
drivers/net/ethernet/lantiq_etop.c
drivers/net/ethernet/marvell/mvpp2/mvpp2.h
drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
drivers/net/ethernet/marvell/pxa168_eth.c
drivers/net/ethernet/marvell/skge.c
drivers/net/ethernet/mellanox/mlx4/en_rx.c
drivers/net/ethernet/mellanox/mlx4/main.c
drivers/net/ethernet/mellanox/mlx5/core/Makefile
drivers/net/ethernet/mellanox/mlx5/core/cmd.c
drivers/net/ethernet/mellanox/mlx5/core/ecpf.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/ecpf.h [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/en.h
drivers/net/ethernet/mellanox/mlx5/core/en/port.c
drivers/net/ethernet/mellanox/mlx5/core/en/port.h
drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
drivers/net/ethernet/mellanox/mlx5/core/eq.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
drivers/net/ethernet/mellanox/mlx5/core/events.c
drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
drivers/net/ethernet/mellanox/mlx5/core/health.c
drivers/net/ethernet/mellanox/mlx5/core/lag.c
drivers/net/ethernet/mellanox/mlx5/core/mad.c [deleted file]
drivers/net/ethernet/mellanox/mlx5/core/main.c
drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
drivers/net/ethernet/mellanox/mlx5/core/mr.c
drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
drivers/net/ethernet/mellanox/mlx5/core/port.c
drivers/net/ethernet/mellanox/mlx5/core/sriov.c
drivers/net/ethernet/mellanox/mlx5/core/vport.c
drivers/net/ethernet/mellanox/mlxsw/Makefile
drivers/net/ethernet/mellanox/mlxsw/core.c
drivers/net/ethernet/mellanox/mlxsw/core.h
drivers/net/ethernet/mellanox/mlxsw/core_env.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlxsw/core_env.h [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
drivers/net/ethernet/mellanox/mlxsw/i2c.c
drivers/net/ethernet/mellanox/mlxsw/reg.h
drivers/net/ethernet/mellanox/mlxsw/spectrum.c
drivers/net/ethernet/mellanox/mlxsw/spectrum.h
drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c
drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h
drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
drivers/net/ethernet/micrel/ks8695net.c
drivers/net/ethernet/moxa/moxart_ether.c
drivers/net/ethernet/moxa/moxart_ether.h
drivers/net/ethernet/myricom/myri10ge/myri10ge.c
drivers/net/ethernet/natsemi/natsemi.c
drivers/net/ethernet/natsemi/ns83820.c
drivers/net/ethernet/natsemi/sonic.c
drivers/net/ethernet/neterion/s2io.c
drivers/net/ethernet/neterion/vxge/vxge-main.c
drivers/net/ethernet/netronome/nfp/flower/action.c
drivers/net/ethernet/netronome/nfp/flower/cmsg.c
drivers/net/ethernet/netronome/nfp/flower/offload.c
drivers/net/ethernet/netronome/nfp/nfp_devlink.c
drivers/net/ethernet/nuvoton/w90p910_ether.c
drivers/net/ethernet/packetengines/hamachi.c
drivers/net/ethernet/packetengines/yellowfin.c
drivers/net/ethernet/qlogic/qed/qed.h
drivers/net/ethernet/qlogic/qed/qed_cxt.c
drivers/net/ethernet/qlogic/qed/qed_hsi.h
drivers/net/ethernet/qlogic/qed/qed_l2.c
drivers/net/ethernet/qlogic/qed/qed_main.c
drivers/net/ethernet/qlogic/qed/qed_mcp.c
drivers/net/ethernet/qlogic/qed/qed_mcp.h
drivers/net/ethernet/qlogic/qed/qed_sp.h
drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
drivers/net/ethernet/qlogic/qed/qed_spq.c
drivers/net/ethernet/qlogic/qede/qede.h
drivers/net/ethernet/qlogic/qede/qede_ethtool.c
drivers/net/ethernet/qlogic/qede/qede_fp.c
drivers/net/ethernet/qlogic/qede/qede_main.c
drivers/net/ethernet/qlogic/qlge/qlge_ethtool.c
drivers/net/ethernet/qlogic/qlge/qlge_main.c
drivers/net/ethernet/qualcomm/emac/emac-mac.c
drivers/net/ethernet/realtek/r8169.c
drivers/net/ethernet/rocker/rocker.h
drivers/net/ethernet/rocker/rocker_main.c
drivers/net/ethernet/rocker/rocker_ofdpa.c
drivers/net/ethernet/sfc/ef10.c
drivers/net/ethernet/sfc/efx.c
drivers/net/ethernet/sfc/mcdi_pcol.h
drivers/net/ethernet/sfc/rx.c
drivers/net/ethernet/sfc/tx.c
drivers/net/ethernet/sgi/ioc3-eth.c
drivers/net/ethernet/sgi/meth.c
drivers/net/ethernet/sis/sis190.c
drivers/net/ethernet/sis/sis900.c
drivers/net/ethernet/smsc/epic100.c
drivers/net/ethernet/smsc/smc911x.c
drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
drivers/net/ethernet/sun/cassini.c
drivers/net/ethernet/sun/sunbmac.c
drivers/net/ethernet/sun/sunhme.c
drivers/net/ethernet/tehuti/tehuti.c
drivers/net/ethernet/via/via-velocity.c
drivers/net/ethernet/xilinx/ll_temac_main.c
drivers/net/ethernet/xilinx/xilinx_axienet_main.c
drivers/net/ethernet/xilinx/xilinx_emaclite.c
drivers/net/ethernet/xscale/ixp4xx_eth.c
drivers/net/fddi/defxx.c
drivers/net/fddi/skfp/pcmplc.c
drivers/net/geneve.c
drivers/net/ieee802154/mcr20a.c
drivers/net/ipvlan/Makefile
drivers/net/ipvlan/ipvlan.h
drivers/net/ipvlan/ipvlan_core.c
drivers/net/ipvlan/ipvlan_l3s.c [new file with mode: 0644]
drivers/net/ipvlan/ipvlan_main.c
drivers/net/phy/aquantia.c
drivers/net/phy/dp83640.c
drivers/net/phy/marvell.c
drivers/net/phy/marvell10g.c
drivers/net/phy/phy-c45.c
drivers/net/phy/phy-core.c
drivers/net/phy/phy.c
drivers/net/phy/phy_device.c
drivers/net/phy/phylink.c
drivers/net/phy/sfp-bus.c
drivers/net/phy/sfp.c
drivers/net/phy/sfp.h
drivers/net/team/team.c
drivers/net/tun.c
drivers/net/usb/cdc-phonet.c
drivers/net/usb/pegasus.c
drivers/net/usb/rtl8150.c
drivers/net/veth.c
drivers/net/virtio_net.c
drivers/net/vxlan.c
drivers/net/wan/dscc4.c
drivers/net/wan/fsl_ucc_hdlc.c
drivers/net/wan/wanxl.c
drivers/net/wimax/i2400m/rx.c
drivers/net/wimax/i2400m/usb.c
drivers/net/wireless/ath/ath10k/core.c
drivers/net/wireless/intel/iwlwifi/Kconfig
drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c
drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.h
drivers/net/wireless/mediatek/mt76/mt76x0/phy.c
drivers/net/wireless/ti/wlcore/sdio.c
drivers/net/xen-netback/xenbus.c
drivers/nvme/host/core.c
drivers/nvme/host/nvme.h
drivers/nvme/host/pci.c
drivers/pci/controller/dwc/pci-imx6.c
drivers/pci/controller/dwc/pcie-armada8k.c
drivers/pci/quirks.c
drivers/pinctrl/intel/pinctrl-cherryview.c
drivers/pinctrl/mediatek/Kconfig
drivers/pinctrl/pinctrl-mcp23s08.c
drivers/pinctrl/sunxi/pinctrl-sun50i-h6.c
drivers/pinctrl/sunxi/pinctrl-sunxi.c
drivers/pinctrl/sunxi/pinctrl-sunxi.h
drivers/platform/x86/Kconfig
drivers/ptp/Kconfig
drivers/ptp/ptp_qoriq.c
drivers/ptp/ptp_qoriq_debugfs.c
drivers/s390/block/dasd_eckd.c
drivers/s390/crypto/ap_bus.c
drivers/s390/net/Makefile
drivers/s390/net/qeth_core.h
drivers/s390/net/qeth_core_main.c
drivers/s390/net/qeth_core_mpc.c
drivers/s390/net/qeth_core_mpc.h
drivers/s390/net/qeth_core_sys.c
drivers/s390/net/qeth_ethtool.c [new file with mode: 0644]
drivers/s390/net/qeth_l2_main.c
drivers/s390/net/qeth_l3_main.c
drivers/s390/scsi/zfcp_aux.c
drivers/s390/scsi/zfcp_scsi.c
drivers/scsi/53c700.c
drivers/scsi/aic94xx/aic94xx_init.c
drivers/scsi/bnx2fc/bnx2fc_io.c
drivers/scsi/cxlflash/main.c
drivers/scsi/libfc/fc_lport.c
drivers/scsi/libfc/fc_rport.c
drivers/scsi/scsi_debug.c
drivers/scsi/sd_zbc.c
drivers/soc/fsl/qbman/qman.c
drivers/staging/fsl-dpaa2/ethsw/ethsw.c
drivers/staging/octeon/ethernet-mdio.c
drivers/staging/speakup/spk_ttyio.c
drivers/target/target_core_configfs.c
drivers/thermal/cpu_cooling.c
drivers/thermal/of-thermal.c
drivers/tty/serial/8250/8250_mtk.c
drivers/tty/serial/8250/8250_pci.c
drivers/tty/serial/earlycon-riscv-sbi.c
drivers/tty/serial/serial_core.c
drivers/tty/serial/sh-sci.c
drivers/usb/dwc3/dwc3-exynos.c
drivers/usb/dwc3/gadget.c
drivers/usb/gadget/udc/net2272.c
drivers/usb/musb/musb_gadget.c
drivers/usb/musb/musbhsdma.c
drivers/usb/phy/Kconfig
drivers/usb/phy/phy-am335x.c
drivers/usb/typec/tcpm/tcpm.c
drivers/virtio/virtio_ring.c
fs/aio.c
fs/autofs/expire.c
fs/autofs/inode.c
fs/binfmt_script.c
fs/btrfs/ctree.c
fs/btrfs/super.c
fs/btrfs/transaction.c
fs/btrfs/volumes.c
fs/buffer.c
fs/cifs/cifsfs.h
fs/cifs/file.c
fs/cifs/smb2ops.c
fs/cifs/smb2pdu.c
fs/cifs/smb2pdu.h
fs/dcache.c
fs/debugfs/inode.c
fs/drop_caches.c
fs/ext4/fsync.c
fs/fuse/dev.c
fs/fuse/file.c
fs/fuse/inode.c
fs/gfs2/glops.c
fs/gfs2/log.c
fs/gfs2/lops.c
fs/gfs2/lops.h
fs/gfs2/ops_fstype.c
fs/gfs2/recovery.c
fs/gfs2/recovery.h
fs/gfs2/rgrp.c
fs/gfs2/super.c
fs/inode.c
fs/iomap.c
fs/nfs/super.c
fs/nfs/write.c
fs/nfsd/vfs.c
fs/proc/generic.c
fs/proc/internal.h
fs/proc/proc_net.c
fs/proc/task_mmu.c
fs/xfs/scrub/repair.c
fs/xfs/xfs_aops.c
fs/xfs/xfs_buf.c
include/asm-generic/shmparam.h [moved from include/uapi/asm-generic/shmparam.h with 100% similarity]
include/dt-bindings/clock/imx8mq-clock.h
include/dt-bindings/clock/marvell,mmp2.h
include/linux/blktrace_api.h
include/linux/cpu.h
include/linux/dcache.h
include/linux/filter.h
include/linux/fs.h
include/linux/fsl/ptp_qoriq.h
include/linux/hid-debug.h
include/linux/ide.h
include/linux/irqchip/arm-gic-v3.h
include/linux/memory_hotplug.h
include/linux/mlx5/device.h
include/linux/mlx5/driver.h
include/linux/mlx5/eswitch.h
include/linux/mlx5/mlx5_ifc.h
include/linux/mlx5/port.h
include/linux/mlx5/vport.h
include/linux/mm_types.h
include/linux/mmc/card.h
include/linux/netdevice.h
include/linux/objagg.h
include/linux/phy.h
include/linux/phylink.h
include/linux/pm_runtime.h
include/linux/qed/qed_if.h
include/linux/sched.h
include/linux/sched/coredump.h
include/linux/signal.h
include/linux/skbuff.h
include/linux/stmmac.h
include/net/act_api.h
include/net/devlink.h
include/net/flow_offload.h
include/net/inetpeer.h
include/net/l3mdev.h
include/net/netfilter/nf_tables.h
include/net/pkt_cls.h
include/net/sch_generic.h
include/net/sock.h
include/net/tc_act/tc_csum.h
include/net/tc_act/tc_gact.h
include/net/tc_act/tc_mirred.h
include/net/tc_act/tc_pedit.h
include/net/tc_act/tc_sample.h
include/net/tc_act/tc_skbedit.h
include/net/tc_act/tc_tunnel_key.h
include/net/tc_act/tc_vlan.h
include/rdma/ib_verbs.h
include/sound/compress_driver.h
include/sound/hda_codec.h
include/trace/events/mlxsw.h
include/uapi/linux/batadv_packet.h
include/uapi/linux/batman_adv.h
include/uapi/linux/errqueue.h
include/uapi/linux/inet_diag.h
include/uapi/linux/mdio.h
include/uapi/linux/pkt_cls.h
include/uapi/linux/tc_act/tc_bpf.h
include/uapi/linux/tc_act/tc_connmark.h
include/uapi/linux/tc_act/tc_csum.h
include/uapi/linux/tc_act/tc_gact.h
include/uapi/linux/tc_act/tc_ife.h
include/uapi/linux/tc_act/tc_ipt.h
include/uapi/linux/tc_act/tc_mirred.h
include/uapi/linux/tc_act/tc_nat.h
include/uapi/linux/tc_act/tc_pedit.h
include/uapi/linux/tc_act/tc_sample.h
include/uapi/linux/tc_act/tc_skbedit.h
include/uapi/linux/tc_act/tc_skbmod.h
include/uapi/linux/tc_act/tc_tunnel_key.h
include/uapi/linux/tc_act/tc_vlan.h
include/uapi/linux/time.h
include/uapi/linux/time_types.h [new file with mode: 0644]
include/uapi/linux/virtio_config.h
include/uapi/linux/virtio_ring.h
include/uapi/rdma/hns-abi.h
init/Kconfig
init/main.c
kernel/bpf/btf.c
kernel/bpf/cgroup.c
kernel/bpf/hashtab.c
kernel/bpf/percpu_freelist.c
kernel/bpf/percpu_freelist.h
kernel/bpf/syscall.c
kernel/cpu.c
kernel/events/core.c
kernel/events/ring_buffer.c
kernel/exit.c
kernel/futex.c
kernel/locking/rtmutex.c
kernel/relay.c
kernel/sched/fair.c
kernel/sched/psi.c
kernel/signal.c
kernel/smp.c
kernel/trace/bpf_trace.c
kernel/trace/trace_probe_tmpl.h
kernel/trace/trace_uprobe.c
kernel/workqueue.c
kernel/workqueue_internal.h
lib/objagg.c
lib/test_kmod.c
lib/test_objagg.c
lib/test_rhashtable.c
mm/gup.c
mm/hugetlb.c
mm/kasan/Makefile
mm/memory-failure.c
mm/memory_hotplug.c
mm/migrate.c
mm/oom_kill.c
mm/page_alloc.c
mm/page_ext.c
mm/vmscan.c
net/atm/proc.c
net/batman-adv/bat_v_elp.c
net/batman-adv/distributed-arp-table.c
net/batman-adv/gateway_client.c
net/batman-adv/gateway_common.c
net/batman-adv/gateway_common.h
net/batman-adv/hard-interface.c
net/batman-adv/netlink.c
net/batman-adv/netlink.h
net/batman-adv/soft-interface.c
net/batman-adv/sysfs.c
net/bridge/br_multicast.c
net/core/devlink.c
net/core/ethtool.c
net/core/filter.c
net/core/page_pool.c
net/core/skmsg.c
net/core/sock.c
net/dccp/ccid.h
net/dsa/dsa2.c
net/dsa/master.c
net/dsa/slave.c
net/dsa/tag_ksz.c
net/ipv4/inet_diag.c
net/ipv4/inetpeer.c
net/ipv4/ip_gre.c
net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
net/ipv4/netfilter/nf_nat_snmp_basic_main.c
net/ipv4/route.c
net/ipv6/addrconf.c
net/ipv6/ip6_gre.c
net/ipv6/netfilter.c
net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
net/ipv6/route.c
net/ipv6/seg6.c
net/ipv6/seg6_iptunnel.c
net/ipv6/sit.c
net/l2tp/l2tp_core.c
net/l2tp/l2tp_core.h
net/l2tp/l2tp_ip.c
net/l2tp/l2tp_ip6.c
net/mac80211/agg-tx.c
net/mac80211/tx.c
net/mac80211/util.c
net/mpls/mpls_iptunnel.c
net/netfilter/ipvs/Kconfig
net/netfilter/ipvs/ip_vs_core.c
net/netfilter/ipvs/ip_vs_ctl.c
net/netfilter/nf_conntrack_core.c
net/netfilter/nf_tables_api.c
net/netfilter/nft_compat.c
net/netfilter/nft_dynset.c
net/netfilter/nft_immediate.c
net/netfilter/nft_lookup.c
net/netfilter/nft_objref.c
net/netfilter/x_tables.c
net/packet/af_packet.c
net/rds/bind.c
net/rxrpc/recvmsg.c
net/sched/act_api.c
net/sched/act_bpf.c
net/sched/act_connmark.c
net/sched/act_csum.c
net/sched/act_gact.c
net/sched/act_ife.c
net/sched/act_ipt.c
net/sched/act_mirred.c
net/sched/act_nat.c
net/sched/act_pedit.c
net/sched/act_police.c
net/sched/act_sample.c
net/sched/act_simple.c
net/sched/act_skbedit.c
net/sched/act_skbmod.c
net/sched/act_tunnel_key.c
net/sched/act_vlan.c
net/sched/cls_api.c
net/sched/cls_basic.c
net/sched/cls_bpf.c
net/sched/cls_cgroup.c
net/sched/cls_flow.c
net/sched/cls_flower.c
net/sched/cls_fw.c
net/sched/cls_matchall.c
net/sched/cls_route.c
net/sched/cls_rsvp.h
net/sched/cls_tcindex.c
net/sched/cls_u32.c
net/sched/sch_api.c
net/sched/sch_generic.c
net/sctp/diag.c
net/sctp/offload.c
net/sctp/socket.c
net/sctp/stream.c
net/smc/af_smc.c
net/smc/smc_cdc.c
net/smc/smc_cdc.h
net/smc/smc_clc.c
net/smc/smc_close.c
net/smc/smc_core.c
net/smc/smc_core.h
net/smc/smc_ib.c
net/smc/smc_llc.c
net/smc/smc_pnet.c
net/smc/smc_tx.c
net/smc/smc_wr.c
net/smc/smc_wr.h
net/socket.c
net/sunrpc/xprtrdma/svc_rdma_sendto.c
net/sunrpc/xprtrdma/svc_rdma_transport.c
net/tipc/link.c
net/tipc/msg.h
net/tipc/node.c
net/tls/tls_sw.c
net/vmw_vsock/virtio_transport.c
net/vmw_vsock/vmci_transport.c
net/wireless/ap.c
net/wireless/core.c
net/wireless/core.h
net/wireless/nl80211.c
net/wireless/pmsr.c
net/wireless/sme.c
net/wireless/util.c
net/x25/af_x25.c
samples/mei/mei-amt-version.c
security/apparmor/domain.c
security/apparmor/lsm.c
sound/core/pcm_lib.c
sound/pci/hda/hda_bind.c
sound/pci/hda/hda_intel.c
sound/pci/hda/patch_ca0132.c
sound/pci/hda/patch_conexant.c
sound/pci/hda/patch_realtek.c
sound/soc/codecs/hdmi-codec.c
sound/soc/codecs/rt5682.c
sound/soc/samsung/i2s.c
sound/soc/sh/rcar/core.c
sound/soc/sh/rcar/ssi.c
sound/soc/sh/rcar/ssiu.c
sound/soc/soc-core.c
sound/soc/soc-dapm.c
sound/soc/soc-topology.c
sound/usb/pcm.c
sound/usb/quirks.c
tools/bpf/bpftool/common.c
tools/bpf/bpftool/map.c
tools/bpf/bpftool/prog.c
tools/iio/iio_generic_buffer.c
tools/include/uapi/asm/bitsperlong.h
tools/include/uapi/linux/in.h
tools/include/uapi/linux/tc_act/tc_bpf.h
tools/perf/Documentation/perf-c2c.txt
tools/perf/Documentation/perf-mem.txt
tools/perf/arch/powerpc/util/Build
tools/perf/arch/powerpc/util/mem-events.c [new file with mode: 0644]
tools/perf/builtin-script.c
tools/perf/builtin-trace.c
tools/perf/tests/attr.py
tools/perf/tests/evsel-tp-sched.c
tools/perf/ui/browsers/annotate.c
tools/perf/util/c++/clang.cpp
tools/perf/util/cpumap.c
tools/perf/util/mem-events.c
tools/perf/util/ordered-events.c
tools/perf/util/setup.py
tools/perf/util/symbol-elf.c
tools/testing/selftests/Makefile
tools/testing/selftests/bpf/bpf_util.h
tools/testing/selftests/bpf/test_btf.c
tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh
tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh
tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
tools/testing/selftests/filesystems/binderfs/.gitignore [new file with mode: 0644]
tools/testing/selftests/filesystems/binderfs/Makefile [new file with mode: 0644]
tools/testing/selftests/filesystems/binderfs/binderfs_test.c [new file with mode: 0644]
tools/testing/selftests/filesystems/binderfs/config [new file with mode: 0644]
tools/testing/selftests/ir/Makefile
tools/testing/selftests/net/Makefile
tools/testing/selftests/net/forwarding/config
tools/testing/selftests/netfilter/Makefile
tools/testing/selftests/netfilter/config
tools/testing/selftests/netfilter/nft_nat.sh [new file with mode: 0755]
tools/testing/selftests/networking/timestamping/Makefile
tools/testing/selftests/proc/.gitignore
tools/testing/selftests/proc/Makefile
tools/testing/selftests/proc/setns-dcache.c [new file with mode: 0644]
tools/testing/selftests/seccomp/seccomp_bpf.c
tools/testing/selftests/timers/Makefile
virt/kvm/kvm_main.c

index 9b64266..169fe08 100644 (file)
@@ -24,7 +24,7 @@ What:         /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/
                                                        cpld3_version
 
 Date:          November 2018
-KernelVersion: 4.21
+KernelVersion: 5.0
 Contact:       Vadim Pasternak <vadimpmellanox.com>
 Description:   These files show with which CPLD versions have been burned
                on LED board.
@@ -35,7 +35,7 @@ What:         /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/
                                                        jtag_enable
 
 Date:          November 2018
-KernelVersion: 4.21
+KernelVersion: 5.0
 Contact:       Vadim Pasternak <vadimpmellanox.com>
 Description:   These files enable and disable the access to the JTAG domain.
                By default access to the JTAG domain is disabled.
@@ -105,7 +105,7 @@ What:               /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/
                                                reset_voltmon_upgrade_fail
 
 Date:          November 2018
-KernelVersion: 4.21
+KernelVersion: 5.0
 Contact:       Vadim Pasternak <vadimpmellanox.com>
 Description:   These files show the system reset cause, as following: ComEx
                power fail, reset from ComEx, system platform reset, reset
index 7afb2fe..bcf2cd5 100644 (file)
                        By default, super page will be supported if Intel IOMMU
                        has the capability. With this option, super page will
                        not be supported.
-               sm_off [Default Off]
-                       By default, scalable mode will be supported if the
+               sm_on [Default Off]
+                       By default, scalable mode will be disabled even if the
                        hardware advertises that it has support for the scalable
                        mode translation. With this option set, scalable mode
-                       will not be used even on hardware which claims to support
-                       it.
+                       will be used on hardware which claims to support it.
                tboot_noforce [Default Off]
                        Do not force the Intel IOMMU enabled under tboot.
                        By default, tboot will force Intel IOMMU on, which
index 6e5cef0..50daa0b 100644 (file)
@@ -17,7 +17,11 @@ extra-y += $(DT_TMP_SCHEMA)
 quiet_cmd_mk_schema = SCHEMA  $@
       cmd_mk_schema = $(DT_MK_SCHEMA) $(DT_MK_SCHEMA_FLAGS) -o $@ $(filter-out FORCE, $^)
 
-DT_DOCS = $(shell cd $(srctree)/$(src) && find * -name '*.yaml')
+DT_DOCS = $(shell \
+       cd $(srctree)/$(src) && \
+       find * \( -name '*.yaml' ! -name $(DT_TMP_SCHEMA) \) \
+       )
+
 DT_SCHEMA_FILES ?= $(addprefix $(src)/,$(DT_DOCS))
 
 extra-y += $(patsubst $(src)/%.yaml,%.example.dts, $(DT_SCHEMA_FILES))
index 3e17ac1..174f292 100644 (file)
@@ -3,8 +3,8 @@
 Required properties:
 - compatible: Should be "cdns,[<chip>-]{macb|gem}"
   Use "cdns,at91rm9200-emac" Atmel at91rm9200 SoC.
-  Use "cdns,at91sam9260-macb" for Atmel at91sam9 SoCs or the 10/100Mbit IP
-  available on sama5d3 SoCs.
+  Use "cdns,at91sam9260-macb" for Atmel at91sam9 SoCs.
+  Use "cdns,sam9x60-macb" for Microchip sam9x60 SoC.
   Use "cdns,np4-macb" for NP4 SoC devices.
   Use "cdns,at32ap7000-macb" for other 10/100 usage or use the generic form: "cdns,macb".
   Use "cdns,pc302-gem" for Picochip picoXcell pc302 and later devices based on
index 8e7f855..454c937 100644 (file)
@@ -19,6 +19,9 @@ Clock Properties:
   - fsl,max-adj      Maximum frequency adjustment in parts per billion.
   - fsl,extts-fifo   The presence of this property indicates hardware
                     support for the external trigger stamp FIFO.
+  - little-endian    The presence of this property indicates the 1588 timer
+                    IP block is little-endian mode. The default endian mode
+                    is big-endian.
 
   These properties set the operational parameters for the PTP
   clock. You must choose these carefully for the clock to work right.
index 3660341..0e72183 100644 (file)
@@ -4,14 +4,10 @@ Required properties:
 - compatible : "olpc,ap-sp"
 - reg : base address and length of SoC's WTM registers
 - interrupts : SP-AP interrupt
-- clocks : phandle + clock-specifier for the clock that drives the WTM
-- clock-names:  should be "sp"
 
 Example:
        ap-sp@d4290000 {
                compatible = "olpc,ap-sp";
                reg = <0xd4290000 0x1000>;
                interrupts = <40>;
-               clocks = <&soc_clocks MMP2_CLK_SP>;
-               clock-names = "sp";
        }
index a188466..5045df9 100644 (file)
@@ -27,11 +27,12 @@ Driver Overview
 
 The DPIO driver is bound to DPIO objects discovered on the fsl-mc bus and
 provides services that:
-  A) allow other drivers, such as the Ethernet driver, to enqueue and dequeue
+
+  A. allow other drivers, such as the Ethernet driver, to enqueue and dequeue
      frames for their respective objects
-  B) allow drivers to register callbacks for data availability notifications
+  B. allow drivers to register callbacks for data availability notifications
      when data becomes available on a queue or channel
-  C) allow drivers to manage hardware buffer pools
+  C. allow drivers to manage hardware buffer pools
 
 The Linux DPIO driver consists of 3 primary components--
    DPIO object driver-- fsl-mc driver that manages the DPIO object
@@ -140,11 +141,10 @@ QBman portal interface (qbman-portal.c)
 
    The qbman-portal component provides APIs to do the low level hardware
    bit twiddling for operations such as:
-      -initializing Qman software portals
-
-      -building and sending portal commands
 
-      -portal interrupt configuration and processing
+      - initializing Qman software portals
+      - building and sending portal commands
+      - portal interrupt configuration and processing
 
    The qbman-portal APIs are not public to other drivers, and are
    only used by dpio-service.
index 7d4ecf6..c79ad85 100644 (file)
@@ -14,6 +14,11 @@ board.rev
 
 Board design revision.
 
+board.manufacture
+=================
+
+An identifier of the company or the facility which produced the part.
+
 fw.mgmt
 =======
 
index 2c5c67a..c63ea9f 100644 (file)
@@ -1,2 +1,10 @@
 fw_load_policy         [DEVICE, GENERIC]
                        Configuration mode: driverinit
+
+acl_region_rehash_interval     [DEVICE, DRIVER-SPECIFIC]
+                       Sets an interval for periodic ACL region rehashes.
+                       The value is in milliseconds, minimal value is "3000".
+                       Value "0" disables the periodic work.
+                       The first rehash will be run right after value is set.
+                       Type: u32
+                       Configuration mode: runtime
index 355c6d8..b203d13 100644 (file)
@@ -22,8 +22,9 @@ and changeable from userspace under certain rules.
 2. Querying from userspace
 
 Both admin and operational state can be queried via the netlink
-operation RTM_GETLINK. It is also possible to subscribe to RTMGRP_LINK
-to be notified of updates. This is important for setting from userspace.
+operation RTM_GETLINK. It is also possible to subscribe to RTNLGRP_LINK
+to be notified of updates while the interface is admin up. This is
+important for setting from userspace.
 
 These values contain interface state:
 
@@ -101,8 +102,9 @@ because some driver controlled protocol establishment has to
 complete. Corresponding functions are netif_dormant_on() to set the
 flag, netif_dormant_off() to clear it and netif_dormant() to query.
 
-On device allocation, networking core sets the flags equivalent to
-netif_carrier_ok() and !netif_dormant().
+On device allocation, both flags __LINK_STATE_NOCARRIER and
+__LINK_STATE_DORMANT are cleared, so the effective state is equivalent
+to netif_carrier_ok() and !netif_dormant().
 
 
 Whenever the driver CHANGES one of these flags, a workqueue event is
@@ -133,11 +135,11 @@ netif_carrier_ok() && !netif_dormant() is set by the
 driver. Afterwards, the userspace application can set IFLA_OPERSTATE
 to IF_OPER_DORMANT or IF_OPER_UP as long as the driver does not set
 netif_carrier_off() or netif_dormant_on(). Changes made by userspace
-are multicasted on the netlink group RTMGRP_LINK.
+are multicasted on the netlink group RTNLGRP_LINK.
 
 So basically a 802.1X supplicant interacts with the kernel like this:
 
--subscribe to RTMGRP_LINK
+-subscribe to RTNLGRP_LINK
 -set IFLA_LINKMODE to 1 via RTM_SETLINK
 -query RTM_GETLINK once to get initial state
 -if initial flags are not (IFF_LOWER_UP && !IFF_DORMANT), wait until
index c5642f4..52b026b 100644 (file)
@@ -367,16 +367,19 @@ to the accept queue.
 TCP Fast Open
 =============
 * TcpEstabResets
+
 Defined in `RFC1213 tcpEstabResets`_.
 
 .. _RFC1213 tcpEstabResets: https://tools.ietf.org/html/rfc1213#page-48
 
 * TcpAttemptFails
+
 Defined in `RFC1213 tcpAttemptFails`_.
 
 .. _RFC1213 tcpAttemptFails: https://tools.ietf.org/html/rfc1213#page-48
 
 * TcpOutRsts
+
 Defined in `RFC1213 tcpOutRsts`_. The RFC says this counter indicates
 the 'segments sent containing the RST flag', but in linux kernel, this
 couner indicates the segments kerenl tried to send. The sending
@@ -384,6 +387,30 @@ process might be failed due to some errors (e.g. memory alloc failed).
 
 .. _RFC1213 tcpOutRsts: https://tools.ietf.org/html/rfc1213#page-52
 
+* TcpExtTCPSpuriousRtxHostQueues
+
+When the TCP stack wants to retransmit a packet, and finds that packet
+is not lost in the network, but the packet is not sent yet, the TCP
+stack would give up the retransmission and update this counter. It
+might happen if a packet stays too long time in a qdisc or driver
+queue.
+
+* TcpEstabResets
+
+The socket receives a RST packet in Establish or CloseWait state.
+
+* TcpExtTCPKeepAlive
+
+This counter indicates many keepalive packets were sent. The keepalive
+won't be enabled by default. A userspace program could enable it by
+setting the SO_KEEPALIVE socket option.
+
+* TcpExtTCPSpuriousRTOs
+
+The spurious retransmission timeout detected by the `F-RTO`_
+algorithm.
+
+.. _F-RTO: https://tools.ietf.org/html/rfc5682
 
 TCP Fast Path
 ============
@@ -609,6 +636,29 @@ packet yet, the sender would know packet 4 is out of order. The TCP
 stack of kernel will increase TcpExtTCPSACKReorder for both of the
 above scenarios.
 
+* TcpExtTCPSlowStartRetrans
+
+The TCP stack wants to retransmit a packet and the congestion control
+state is 'Loss'.
+
+* TcpExtTCPFastRetrans
+
+The TCP stack wants to retransmit a packet and the congestion control
+state is not 'Loss'.
+
+* TcpExtTCPLostRetransmit
+
+A SACK points out that a retransmission packet is lost again.
+
+* TcpExtTCPRetransFail
+
+The TCP stack tries to deliver a retransmission packet to lower layers
+but the lower layers return an error.
+
+* TcpExtTCPSynRetrans
+
+The TCP stack retransmits a SYN packet.
+
 DSACK
 =====
 The DSACK is defined in `RFC2883`_. The receiver uses DSACK to report
@@ -790,8 +840,9 @@ unacknowledged number (more strict than `RFC 5961 section 5.2`_).
 .. _RFC 5961 section 5.2: https://tools.ietf.org/html/rfc5961#page-11
 
 TCP receive window
-=================
+==================
 * TcpExtTCPWantZeroWindowAdv
+
 Depending on current memory usage, the TCP stack tries to set receive
 window to zero. But the receive window might still be a no-zero
 value. For example, if the previous window size is 10, and the TCP
@@ -799,14 +850,16 @@ stack receives 3 bytes, the current window size would be 7 even if the
 window size calculated by the memory usage is zero.
 
 * TcpExtTCPToZeroWindowAdv
+
 The TCP receive window is set to zero from a no-zero value.
 
 * TcpExtTCPFromZeroWindowAdv
+
 The TCP receive window is set to no-zero value from zero.
 
 
 Delayed ACK
-==========
+===========
 The TCP Delayed ACK is a technique which is used for reducing the
 packet count in the network. For more details, please refer the
 `Delayed ACK wiki`_
@@ -814,10 +867,12 @@ packet count in the network. For more details, please refer the
 .. _Delayed ACK wiki: https://en.wikipedia.org/wiki/TCP_delayed_acknowledgment
 
 * TcpExtDelayedACKs
+
 A delayed ACK timer expires. The TCP stack will send a pure ACK packet
 and exit the delayed ACK mode.
 
 * TcpExtDelayedACKLocked
+
 A delayed ACK timer expires, but the TCP stack can't send an ACK
 immediately due to the socket is locked by a userspace program. The
 TCP stack will send a pure ACK later (after the userspace program
@@ -826,24 +881,147 @@ TCP stack will also update TcpExtDelayedACKs and exit the delayed ACK
 mode.
 
 * TcpExtDelayedACKLost
+
 It will be updated when the TCP stack receives a packet which has been
 ACKed. A Delayed ACK loss might cause this issue, but it would also be
 triggered by other reasons, such as a packet is duplicated in the
 network.
 
 Tail Loss Probe (TLP)
-===================
+=====================
 TLP is an algorithm which is used to detect TCP packet loss. For more
 details, please refer the `TLP paper`_.
 
 .. _TLP paper: https://tools.ietf.org/html/draft-dukkipati-tcpm-tcp-loss-probe-01
 
 * TcpExtTCPLossProbes
+
 A TLP probe packet is sent.
 
 * TcpExtTCPLossProbeRecovery
+
 A packet loss is detected and recovered by TLP.
 
+TCP Fast Open
+=============
+TCP Fast Open is a technology which allows data transfer before the
+3-way handshake complete. Please refer the `TCP Fast Open wiki`_ for a
+general description.
+
+.. _TCP Fast Open wiki: https://en.wikipedia.org/wiki/TCP_Fast_Open
+
+* TcpExtTCPFastOpenActive
+
+When the TCP stack receives an ACK packet in the SYN-SENT status, and
+the ACK packet acknowledges the data in the SYN packet, the TCP stack
+understand the TFO cookie is accepted by the other side, then it
+updates this counter.
+
+* TcpExtTCPFastOpenActiveFail
+
+This counter indicates that the TCP stack initiated a TCP Fast Open,
+but it failed. This counter would be updated in three scenarios: (1)
+the other side doesn't acknowledge the data in the SYN packet. (2) The
+SYN packet which has the TFO cookie is timeout at least once. (3)
+after the 3-way handshake, the retransmission timeout happens
+net.ipv4.tcp_retries1 times, because some middle-boxes may black-hole
+fast open after the handshake.
+
+* TcpExtTCPFastOpenPassive
+
+This counter indicates how many times the TCP stack accepts the fast
+open request.
+
+* TcpExtTCPFastOpenPassiveFail
+
+This counter indicates how many times the TCP stack rejects the fast
+open request. It is caused by either the TFO cookie is invalid or the
+TCP stack finds an error during the socket creating process.
+
+* TcpExtTCPFastOpenListenOverflow
+
+When the pending fast open request number is larger than
+fastopenq->max_qlen, the TCP stack will reject the fast open request
+and update this counter. When this counter is updated, the TCP stack
+won't update TcpExtTCPFastOpenPassive or
+TcpExtTCPFastOpenPassiveFail. The fastopenq->max_qlen is set by the
+TCP_FASTOPEN socket operation and it could not be larger than
+net.core.somaxconn. For example:
+
+setsockopt(sfd, SOL_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen));
+
+* TcpExtTCPFastOpenCookieReqd
+
+This counter indicates how many times a client wants to request a TFO
+cookie.
+
+SYN cookies
+===========
+SYN cookies are used to mitigate SYN flood, for details, please refer
+the `SYN cookies wiki`_.
+
+.. _SYN cookies wiki: https://en.wikipedia.org/wiki/SYN_cookies
+
+* TcpExtSyncookiesSent
+
+It indicates how many SYN cookies are sent.
+
+* TcpExtSyncookiesRecv
+
+How many reply packets of the SYN cookies the TCP stack receives.
+
+* TcpExtSyncookiesFailed
+
+The MSS decoded from the SYN cookie is invalid. When this counter is
+updated, the received packet won't be treated as a SYN cookie and the
+TcpExtSyncookiesRecv counter wont be updated.
+
+Challenge ACK
+=============
+For details of challenge ACK, please refer the explaination of
+TcpExtTCPACKSkippedChallenge.
+
+* TcpExtTCPChallengeACK
+
+The number of challenge acks sent.
+
+* TcpExtTCPSYNChallenge
+
+The number of challenge acks sent in response to SYN packets. After
+updates this counter, the TCP stack might send a challenge ACK and
+update the TcpExtTCPChallengeACK counter, or it might also skip to
+send the challenge and update the TcpExtTCPACKSkippedChallenge.
+
+prune
+=====
+When a socket is under memory pressure, the TCP stack will try to
+reclaim memory from the receiving queue and out of order queue. One of
+the reclaiming method is 'collapse', which means allocate a big sbk,
+copy the contiguous skbs to the single big skb, and free these
+contiguous skbs.
+
+* TcpExtPruneCalled
+
+The TCP stack tries to reclaim memory for a socket. After updates this
+counter, the TCP stack will try to collapse the out of order queue and
+the receiving queue. If the memory is still not enough, the TCP stack
+will try to discard packets from the out of order queue (and update the
+TcpExtOfoPruned counter)
+
+* TcpExtOfoPruned
+
+The TCP stack tries to discard packet on the out of order queue.
+
+* TcpExtRcvPruned
+
+After 'collapse' and discard packets from the out of order queue, if
+the actually used memory is still larger than the max allowed memory,
+this counter will be updated. It means the 'prune' fails.
+
+* TcpExtTCPRcvCollapsed
+
+This counter indicates how many skbs are freed during 'collapse'.
+
 examples
 ========
 
index 819caf8..ebc679b 100644 (file)
@@ -56,26 +56,34 @@ of any kernel data structures.
 
 dentry-state:
 
-From linux/fs/dentry.c:
+From linux/include/linux/dcache.h:
 --------------------------------------------------------------
-struct {
+struct dentry_stat_t dentry_stat {
         int nr_dentry;
         int nr_unused;
         int age_limit;         /* age in seconds */
         int want_pages;        /* pages requested by system */
-        int dummy[2];
-} dentry_stat = {0, 0, 45, 0,};
--------------------------------------------------------------- 
-
-Dentries are dynamically allocated and deallocated, and
-nr_dentry seems to be 0 all the time. Hence it's safe to
-assume that only nr_unused, age_limit and want_pages are
-used. Nr_unused seems to be exactly what its name says.
+        int nr_negative;       /* # of unused negative dentries */
+        int dummy;             /* Reserved for future use */
+};
+--------------------------------------------------------------
+
+Dentries are dynamically allocated and deallocated.
+
+nr_dentry shows the total number of dentries allocated (active
++ unused). nr_unused shows the number of dentries that are not
+actively used, but are saved in the LRU list for future reuse.
+
 Age_limit is the age in seconds after which dcache entries
 can be reclaimed when memory is short and want_pages is
 nonzero when shrink_dcache_pages() has been called and the
 dcache isn't pruned yet.
 
+nr_negative shows the number of unused dentries that are also
+negative dentries which do not map to any files. Instead,
+they help speeding up rejection of non-existing files provided
+by the users.
+
 ==============================================================
 
 dquot-max & dquot-nr:
index e8e8d14..c1f95b5 100644 (file)
@@ -9,7 +9,7 @@ Fenghua Yu <fenghua.yu@intel.com>
 Tony Luck <tony.luck@intel.com>
 Vikas Shivappa <vikas.shivappa@intel.com>
 
-This feature is enabled by the CONFIG_X86_RESCTRL and the x86 /proc/cpuinfo
+This feature is enabled by the CONFIG_X86_CPU_RESCTRL and the x86 /proc/cpuinfo
 flag bits:
 RDT (Resource Director Technology) Allocation - "rdt_a"
 CAT (Cache Allocation Technology) - "cat_l3", "cat_l2"
index b449113..8c8f509 100644 (file)
@@ -2848,6 +2848,9 @@ F:        include/uapi/linux/if_bonding.h
 BPF (Safe dynamic programs and tools)
 M:     Alexei Starovoitov <ast@kernel.org>
 M:     Daniel Borkmann <daniel@iogearbox.net>
+R:     Martin KaFai Lau <kafai@fb.com>
+R:     Song Liu <songliubraving@fb.com>
+R:     Yonghong Song <yhs@fb.com>
 L:     netdev@vger.kernel.org
 L:     linux-kernel@vger.kernel.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git
@@ -2873,6 +2876,8 @@ F:        samples/bpf/
 F:     tools/bpf/
 F:     tools/lib/bpf/
 F:     tools/testing/selftests/bpf/
+K:     bpf
+N:     bpf
 
 BPF JIT for ARM
 M:     Shubham Bansal <illusionist.neo@gmail.com>
@@ -5187,7 +5192,7 @@ DRM DRIVERS FOR XEN
 M:     Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
 T:     git git://anongit.freedesktop.org/drm/drm-misc
 L:     dri-devel@lists.freedesktop.org
-L:     xen-devel@lists.xen.org
+L:     xen-devel@lists.xenproject.org (moderated for non-subscribers)
 S:     Supported
 F:     drivers/gpu/drm/xen/
 F:     Documentation/gpu/xen-front.rst
@@ -6099,6 +6104,7 @@ FREESCALE QORIQ PTP CLOCK DRIVER
 M:     Yangbo Lu <yangbo.lu@nxp.com>
 L:     netdev@vger.kernel.org
 S:     Maintained
+F:     drivers/net/ethernet/freescale/enetc/enetc_ptp.c
 F:     drivers/ptp/ptp_qoriq.c
 F:     drivers/ptp/ptp_qoriq_debugfs.c
 F:     include/linux/fsl/ptp_qoriq.h
@@ -6159,7 +6165,7 @@ FREESCALE SOC SOUND DRIVERS
 M:     Timur Tabi <timur@kernel.org>
 M:     Nicolin Chen <nicoleotsuka@gmail.com>
 M:     Xiubo Li <Xiubo.Lee@gmail.com>
-R:     Fabio Estevam <fabio.estevam@nxp.com>
+R:     Fabio Estevam <festevam@gmail.com>
 L:     alsa-devel@alsa-project.org (moderated for non-subscribers)
 L:     linuxppc-dev@lists.ozlabs.org
 S:     Maintained
@@ -10907,7 +10913,7 @@ F:      include/linux/nvmem-consumer.h
 F:     include/linux/nvmem-provider.h
 
 NXP SGTL5000 DRIVER
-M:     Fabio Estevam <fabio.estevam@nxp.com>
+M:     Fabio Estevam <festevam@gmail.com>
 L:     alsa-devel@alsa-project.org (moderated for non-subscribers)
 S:     Maintained
 F:     Documentation/devicetree/bindings/sound/sgtl5000.txt
@@ -11321,10 +11327,12 @@ F:    include/dt-bindings/
 
 OPENCORES I2C BUS DRIVER
 M:     Peter Korsgaard <peter@korsgaard.com>
+M:     Andrew Lunn <andrew@lunn.ch>
 L:     linux-i2c@vger.kernel.org
 S:     Maintained
 F:     Documentation/i2c/busses/i2c-ocores
 F:     drivers/i2c/busses/i2c-ocores.c
+F:     include/linux/platform_data/i2c-ocores.h
 
 OPENRISC ARCHITECTURE
 M:     Jonas Bonn <jonas@southpole.se>
@@ -12890,6 +12898,13 @@ F:     Documentation/devicetree/bindings/net/dsa/realtek-smi.txt
 F:     drivers/net/dsa/realtek-smi*
 F:     drivers/net/dsa/rtl83*
 
+REDPINE WIRELESS DRIVER
+M:     Amitkumar Karwar <amitkarwar@gmail.com>
+M:     Siva Rebbagondla <siva8118@gmail.com>
+L:     linux-wireless@vger.kernel.org
+S:     Maintained
+F:     drivers/net/wireless/rsi/
+
 REGISTER MAP ABSTRACTION
 M:     Mark Brown <broonie@kernel.org>
 L:     linux-kernel@vger.kernel.org
@@ -13718,6 +13733,15 @@ L:     netdev@vger.kernel.org
 S:     Supported
 F:     drivers/net/ethernet/sfc/
 
+SFF/SFP/SFP+ MODULE SUPPORT
+M:     Russell King <linux@armlinux.org.uk>
+L:     netdev@vger.kernel.org
+S:     Maintained
+F:     drivers/net/phy/phylink.c
+F:     drivers/net/phy/sfp*
+F:     include/linux/phylink.h
+F:     include/linux/sfp.h
+
 SGI GRU DRIVER
 M:     Dimitri Sivanich <sivanich@sgi.com>
 S:     Maintained
@@ -13739,6 +13763,7 @@ F:      drivers/misc/sgi-xp/
 
 SHARED MEMORY COMMUNICATIONS (SMC) SOCKETS
 M:     Ursula Braun <ubraun@linux.ibm.com>
+M:     Karsten Graul <kgraul@linux.ibm.com>
 L:     linux-s390@vger.kernel.org
 W:     http://www.ibm.com/developerworks/linux/linux390/
 S:     Supported
@@ -16663,6 +16688,15 @@ S:     Maintained
 F:     drivers/platform/x86/
 F:     drivers/platform/olpc/
 
+X86 PLATFORM DRIVERS - ARCH
+R:     Darren Hart <dvhart@infradead.org>
+R:     Andy Shevchenko <andy@infradead.org>
+L:     platform-driver-x86@vger.kernel.org
+L:     x86@kernel.org
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/core
+S:     Maintained
+F:     arch/x86/platform
+
 X86 VDSO
 M:     Andy Lutomirski <luto@kernel.org>
 L:     linux-kernel@vger.kernel.org
index 1416532..86cf35d 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 5
 PATCHLEVEL = 0
 SUBLEVEL = 0
-EXTRAVERSION = -rc4
+EXTRAVERSION = -rc6
 NAME = Shy Crocodile
 
 # *DOCUMENTATION*
index 4d17cac..432402c 100644 (file)
 
 #elif defined(CONFIG_ALPHA_DP264) || \
       defined(CONFIG_ALPHA_LYNX)  || \
-      defined(CONFIG_ALPHA_SHARK) || \
-      defined(CONFIG_ALPHA_EIGER)
+      defined(CONFIG_ALPHA_SHARK)
 # define NR_IRQS       64
 
 #elif defined(CONFIG_ALPHA_TITAN)
 #define NR_IRQS                80
 
 #elif defined(CONFIG_ALPHA_RAWHIDE) || \
-       defined(CONFIG_ALPHA_TAKARA)
+      defined(CONFIG_ALPHA_TAKARA) || \
+      defined(CONFIG_ALPHA_EIGER)
 # define NR_IRQS       128
 
 #elif defined(CONFIG_ALPHA_WILDFIRE)
index d73dc47..188fc92 100644 (file)
@@ -78,7 +78,7 @@ __load_new_mm_context(struct mm_struct *next_mm)
 /* Macro for exception fixup code to access integer registers.  */
 #define dpf_reg(r)                                                     \
        (((unsigned long *)regs)[(r) <= 8 ? (r) : (r) <= 15 ? (r)-16 :  \
-                                (r) <= 18 ? (r)+8 : (r)-10])
+                                (r) <= 18 ? (r)+10 : (r)-10])
 
 asmlinkage void
 do_page_fault(unsigned long address, unsigned long mmcsr,
index d0fd688..5b25006 100644 (file)
        pinctrl-names = "default";
        pinctrl-0 = <&mmc1_pins>;
        bus-width = <0x4>;
-       cd-gpios = <&gpio0 6 GPIO_ACTIVE_HIGH>;
+       cd-gpios = <&gpio0 6 GPIO_ACTIVE_LOW>;
        cd-inverted;
        max-frequency = <26000000>;
        vmmc-supply = <&vmmcsd_fixed>;
index 47aa53b..559659b 100644 (file)
                clocksource: timer@20000 {
                        compatible = "ti,da830-timer";
                        reg = <0x20000 0x1000>;
-                       interrupts = <12>, <13>;
+                       interrupts = <21>, <22>;
                        interrupt-names = "tint12", "tint34";
                        clocks = <&pll0_auxclk>;
                };
index 5edf858..a31b17e 100644 (file)
                power {
                        label = "Power Button";
                        gpios = <&gpio2 12 GPIO_ACTIVE_LOW>;
-                       gpio-key,wakeup;
+                       wakeup-source;
                        linux,code = <KEY_POWER>;
                };
        };
index d816370..4a31a41 100644 (file)
        pinctrl-2 = <&pinctrl_usdhc3_200mhz>;
        cd-gpios = <&gpio3 22 GPIO_ACTIVE_LOW>;
        keep-power-in-suspend;
-       enable-sdio-wakeup;
+       wakeup-source;
        vmmc-supply = <&reg_sd3_vmmc>;
        status = "okay";
 };
index 272ff61..d1375d3 100644 (file)
                        };
 
                        gpt: gpt@2098000 {
-                               compatible = "fsl,imx6sx-gpt", "fsl,imx31-gpt";
+                               compatible = "fsl,imx6sx-gpt", "fsl,imx6dl-gpt";
                                reg = <0x02098000 0x4000>;
                                interrupts = <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>;
                                clocks = <&clks IMX6SX_CLK_GPT_BUS>,
index e4645f6..2ab7486 100644 (file)
                        compatible = "amlogic,meson6-dwmac", "snps,dwmac";
                        reg = <0xc9410000 0x10000
                               0xc1108108 0x4>;
-                       interrupts = <GIC_SPI 8 IRQ_TYPE_EDGE_RISING>;
+                       interrupts = <GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>;
                        interrupt-names = "macirq";
                        status = "disabled";
                };
index 0872f6e..d50fc2f 100644 (file)
                cap-sd-highspeed;
                disable-wp;
 
-               cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>;
-               cd-inverted;
+               cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>;
 
                vmmc-supply = <&vcc_3v3>;
        };
index 58669ab..0f0a46d 100644 (file)
                /* Realtek RTL8211F (0x001cc916) */
                eth_phy: ethernet-phy@0 {
                        reg = <0>;
-                       eee-broken-1000t;
                        interrupt-parent = <&gpio_intc>;
                        /* GPIOH_3 */
                        interrupts = <17 IRQ_TYPE_LEVEL_LOW>;
                cap-sd-highspeed;
                disable-wp;
 
-               cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>;
-               cd-inverted;
+               cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>;
 
                vmmc-supply = <&tflash_vdd>;
                vqmmc-supply = <&tf_io>;
index f585361..6ac02be 100644 (file)
                cap-sd-highspeed;
                disable-wp;
 
-               cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>;
-               cd-inverted;
+               cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>;
 
                vmmc-supply = <&vcc_3v3>;
        };
index ddc7a7b..f57acf8 100644 (file)
                        interrupts-extended = <
                                &cpcap 15 0 &cpcap 14 0 &cpcap 28 0 &cpcap 19 0
                                &cpcap 18 0 &cpcap 17 0 &cpcap 16 0 &cpcap 49 0
-                               &cpcap 48 1
+                               &cpcap 48 0
                        >;
                        interrupt-names =
                                "id_ground", "id_float", "se0conn", "vbusvld",
index e53d326..93b4209 100644 (file)
 
        vdda-supply = <&vdac>;
 
-       #address-cells = <1>;
-       #size-cells = <0>;
-
        port {
-               reg = <0>;
                venc_out: endpoint {
                        remote-endpoint = <&opa_in>;
                        ti,channels = <1>;
index 182a539..826920e 100644 (file)
        /* For debugging, it is often good idea to remove this GPIO.
           It means you can remove back cover (to reboot by removing
           battery) and still use the MMC card. */
-       cd-gpios = <&gpio6 0 GPIO_ACTIVE_HIGH>; /* 160 */
+       cd-gpios = <&gpio6 0 GPIO_ACTIVE_LOW>; /* 160 */
 };
 
 /* most boards use vaux3, only some old versions use vmmc2 instead */
index 0d9b853..e142e6c 100644 (file)
                compatible = "ti,omap2-onenand";
                reg = <0 0 0x20000>;    /* CS0, offset 0, IO size 128K */
 
+               /*
+                * These timings are based on CONFIG_OMAP_GPMC_DEBUG=y reported
+                * bootloader set values when booted with v4.19 using both N950
+                * and N9 devices (OneNAND Manufacturer: Samsung):
+                *
+                *   gpmc cs0 before gpmc_cs_program_settings:
+                *   cs0 GPMC_CS_CONFIG1: 0xfd001202
+                *   cs0 GPMC_CS_CONFIG2: 0x00181800
+                *   cs0 GPMC_CS_CONFIG3: 0x00030300
+                *   cs0 GPMC_CS_CONFIG4: 0x18001804
+                *   cs0 GPMC_CS_CONFIG5: 0x03171d1d
+                *   cs0 GPMC_CS_CONFIG6: 0x97080000
+                */
                gpmc,sync-read;
                gpmc,sync-write;
                gpmc,burst-length = <16>;
                gpmc,device-width = <2>;
                gpmc,mux-add-data = <2>;
                gpmc,cs-on-ns = <0>;
-               gpmc,cs-rd-off-ns = <87>;
-               gpmc,cs-wr-off-ns = <87>;
+               gpmc,cs-rd-off-ns = <122>;
+               gpmc,cs-wr-off-ns = <122>;
                gpmc,adv-on-ns = <0>;
-               gpmc,adv-rd-off-ns = <10>;
-               gpmc,adv-wr-off-ns = <10>;
-               gpmc,oe-on-ns = <15>;
-               gpmc,oe-off-ns = <87>;
+               gpmc,adv-rd-off-ns = <15>;
+               gpmc,adv-wr-off-ns = <15>;
+               gpmc,oe-on-ns = <20>;
+               gpmc,oe-off-ns = <122>;
                gpmc,we-on-ns = <0>;
-               gpmc,we-off-ns = <87>;
-               gpmc,rd-cycle-ns = <112>;
-               gpmc,wr-cycle-ns = <112>;
-               gpmc,access-ns = <81>;
+               gpmc,we-off-ns = <122>;
+               gpmc,rd-cycle-ns = <148>;
+               gpmc,wr-cycle-ns = <148>;
+               gpmc,access-ns = <117>;
                gpmc,page-burst-access-ns = <15>;
                gpmc,bus-turnaround-ns = <0>;
                gpmc,cycle2cycle-delay-ns = <0>;
                gpmc,wait-monitoring-ns = <0>;
-               gpmc,clk-activation-ns = <5>;
-               gpmc,wr-data-mux-bus-ns = <30>;
-               gpmc,wr-access-ns = <81>;
-               gpmc,sync-clk-ps = <15000>;
+               gpmc,clk-activation-ns = <10>;
+               gpmc,wr-data-mux-bus-ns = <40>;
+               gpmc,wr-access-ns = <117>;
+
+               gpmc,sync-clk-ps = <15000>; /* TBC; Where this value came? */
 
                /*
                 * MTD partition table corresponding to Nokia's MeeGo 1.2
index 9c7e309..0960348 100644 (file)
                                        <SYSC_IDLE_SMART>,
                                        <SYSC_IDLE_SMART_WKUP>;
                        ti,syss-mask = <1>;
-                       ti,no-reset-on-init;
-                       ti,no-idle-on-init;
                        /* Domains (V, P, C): core, core_pwrdm, l4per_clkdm */
                        clocks = <&l4per_clkctrl OMAP5_UART3_CLKCTRL 0>;
                        clock-names = "fck";
index 3cc33f7..3adc158 100644 (file)
 
                du: display@feb00000 {
                        compatible = "renesas,du-r8a7743";
-                       reg = <0 0xfeb00000 0 0x40000>,
-                             <0 0xfeb90000 0 0x1c>;
-                       reg-names = "du", "lvds.0";
+                       reg = <0 0xfeb00000 0 0x40000>;
                        interrupts = <GIC_SPI 256 IRQ_TYPE_LEVEL_HIGH>,
                                     <GIC_SPI 268 IRQ_TYPE_LEVEL_HIGH>;
                        clocks = <&cpg CPG_MOD 724>,
-                                <&cpg CPG_MOD 723>,
-                                <&cpg CPG_MOD 726>;
-                       clock-names = "du.0", "du.1", "lvds.0";
+                                <&cpg CPG_MOD 723>;
+                       clock-names = "du.0", "du.1";
                        status = "disabled";
 
                        ports {
                                port@1 {
                                        reg = <1>;
                                        du_out_lvds0: endpoint {
+                                               remote-endpoint = <&lvds0_in>;
+                                       };
+                               };
+                       };
+               };
+
+               lvds0: lvds@feb90000 {
+                       compatible = "renesas,r8a7743-lvds";
+                       reg = <0 0xfeb90000 0 0x1c>;
+                       clocks = <&cpg CPG_MOD 726>;
+                       power-domains = <&sysc R8A7743_PD_ALWAYS_ON>;
+                       resets = <&cpg 726>;
+                       status = "disabled";
+
+                       ports {
+                               #address-cells = <1>;
+                               #size-cells = <0>;
+
+                               port@0 {
+                                       reg = <0>;
+                                       lvds0_in: endpoint {
+                                               remote-endpoint = <&du_out_lvds0>;
+                                       };
+                               };
+                               port@1 {
+                                       reg = <1>;
+                                       lvds0_out: endpoint {
                                        };
                                };
                        };
index 353d90f..13304b8 100644 (file)
                        #clock-cells = <0>;
                        compatible = "fixed-clock";
                        clock-frequency = <24000000>;
+                       clock-output-names = "osc24M";
                };
 
                osc32k: clk-32k {
index 5d23667..25540b7 100644 (file)
@@ -53,7 +53,7 @@
 
        aliases {
                serial0 = &uart0;
-               /* ethernet0 is the H3 emac, defined in sun8i-h3.dtsi */
+               ethernet0 = &emac;
                ethernet1 = &sdiowifi;
        };
 
index 689c893..b08d561 100644 (file)
        bus-num = <3>;
        status = "okay";
        spi-slave;
+       #address-cells = <0>;
 
-       slave@0 {
+       slave {
                compatible = "lwn,bk4";
                spi-max-frequency = <30000000>;
-               reg = <0>;
        };
 };
 
index 318394e..95a11d5 100644 (file)
@@ -83,7 +83,7 @@ static void __iomem *cns3xxx_pci_map_bus(struct pci_bus *bus,
        } else /* remote PCI bus */
                base = cnspci->cfg1_regs + ((busno & 0xf) << 20);
 
-       return base + (where & 0xffc) + (devfn << 12);
+       return base + where + (devfn << 12);
 }
 
 static int cns3xxx_pci_read_config(struct pci_bus *bus, unsigned int devfn,
@@ -93,7 +93,7 @@ static int cns3xxx_pci_read_config(struct pci_bus *bus, unsigned int devfn,
        u32 mask = (0x1ull << (size * 8)) - 1;
        int shift = (where % 4) * 8;
 
-       ret = pci_generic_config_read32(bus, devfn, where, size, val);
+       ret = pci_generic_config_read(bus, devfn, where, size, val);
 
        if (ret == PCIBIOS_SUCCESSFUL && !bus->number && !devfn &&
            (where & 0xffc) == PCI_CLASS_REVISION)
index 3b73813..23e8c93 100644 (file)
@@ -75,8 +75,7 @@ void __init n2100_map_io(void)
 /*
  * N2100 PCI.
  */
-static int __init
-n2100_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+static int n2100_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
        int irq;
 
index 028e50c..a32c3b6 100644 (file)
@@ -3,6 +3,7 @@
 #include <linux/suspend.h>
 #include <asm/suspend.h>
 #include "smc.h"
+#include "pm.h"
 
 static int tango_pm_powerdown(unsigned long arg)
 {
@@ -24,10 +25,7 @@ static const struct platform_suspend_ops tango_pm_ops = {
        .valid = suspend_valid_only_mem,
 };
 
-static int __init tango_pm_init(void)
+void __init tango_pm_init(void)
 {
        suspend_set_ops(&tango_pm_ops);
-       return 0;
 }
-
-late_initcall(tango_pm_init);
diff --git a/arch/arm/mach-tango/pm.h b/arch/arm/mach-tango/pm.h
new file mode 100644 (file)
index 0000000..35ea705
--- /dev/null
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifdef CONFIG_SUSPEND
+void __init tango_pm_init(void);
+#else
+#define tango_pm_init NULL
+#endif
index 677dd7b..824f907 100644 (file)
@@ -2,6 +2,7 @@
 #include <asm/mach/arch.h>
 #include <asm/hardware/cache-l2x0.h>
 #include "smc.h"
+#include "pm.h"
 
 static void tango_l2c_write(unsigned long val, unsigned int reg)
 {
@@ -15,4 +16,5 @@ DT_MACHINE_START(TANGO_DT, "Sigma Tango DT")
        .dt_compat      = tango_dt_compat,
        .l2c_aux_mask   = ~0,
        .l2c_write_sec  = tango_l2c_write,
+       .init_late      = tango_pm_init,
 MACHINE_END
index ed36dca..f519199 100644 (file)
@@ -190,8 +190,6 @@ static int pxa_ssp_remove(struct platform_device *pdev)
        if (ssp == NULL)
                return -ENODEV;
 
-       iounmap(ssp->mmio_base);
-
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        release_mem_region(res->start, resource_size(res));
 
@@ -201,7 +199,6 @@ static int pxa_ssp_remove(struct platform_device *pdev)
        list_del(&ssp->node);
        mutex_unlock(&ssp_lock);
 
-       kfree(ssp);
        return 0;
 }
 
index cb44aa2..e1d44b9 100644 (file)
@@ -7,7 +7,6 @@
 #include <linux/of_address.h>
 #include <linux/slab.h>
 #include <linux/types.h>
-#include <linux/dma-mapping.h>
 #include <linux/vmalloc.h>
 #include <linux/swiotlb.h>
 
index b0c64f7..8974b5a 100644 (file)
                reg = <0x3a3>;
                interrupt-parent = <&r_intc>;
                interrupts = <0 IRQ_TYPE_LEVEL_LOW>;
+               x-powers,drive-vbus-en; /* set N_VBUSEN as output pin */
        };
 };
 
index 837a03d..2abb335 100644 (file)
                };
 
                video-codec@1c0e000 {
-                       compatible = "allwinner,sun50i-h5-video-engine";
+                       compatible = "allwinner,sun50i-a64-video-engine";
                        reg = <0x01c0e000 0x1000>;
                        clocks = <&ccu CLK_BUS_VE>, <&ccu CLK_VE>,
                                 <&ccu CLK_DRAM_VE>;
index e14e0ce..016641a 100644 (file)
        max-frequency = <100000000>;
        disable-wp;
 
-       cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>;
-       cd-inverted;
+       cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>;
 
        vmmc-supply = <&vddao_3v3>;
        vqmmc-supply = <&vddio_boot>;
index 8cd50b7..ade2ee0 100644 (file)
        max-frequency = <200000000>;
        disable-wp;
 
-       cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>;
-       cd-inverted;
+       cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>;
 
        vmmc-supply = <&vddio_ao3v3>;
        vqmmc-supply = <&vddio_tf>;
index 4cf7f6e..25105ac 100644 (file)
        max-frequency = <100000000>;
        disable-wp;
 
-       cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>;
-       cd-inverted;
+       cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>;
 
        vmmc-supply = <&vddao_3v3>;
        vqmmc-supply = <&vddio_card>;
index 2e1cd5e..1cc9dc6 100644 (file)
        max-frequency = <100000000>;
        disable-wp;
 
-       cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>;
-       cd-inverted;
+       cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>;
 
        vmmc-supply = <&tflash_vdd>;
        vqmmc-supply = <&tf_io>;
index ce86226..0be0f2a 100644 (file)
        max-frequency = <100000000>;
        disable-wp;
 
-       cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>;
-       cd-inverted;
+       cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>;
 
        vmmc-supply = <&vddao_3v3>;
        vqmmc-supply = <&vddio_card>;
index 93a4acf..ad4d50b 100644 (file)
        max-frequency = <100000000>;
        disable-wp;
 
-       cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>;
-       cd-inverted;
+       cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>;
 
        vmmc-supply = <&vcc_3v3>;
 };
index ec09bb5..2d2db78 100644 (file)
        max-frequency = <100000000>;
        disable-wp;
 
-       cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>;
-       cd-inverted;
+       cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>;
 
        vmmc-supply = <&vddao_3v3>;
        vqmmc-supply = <&vcc_3v3>;
index f1c410e..796baea 100644 (file)
        max-frequency = <100000000>;
        disable-wp;
 
-       cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>;
-       cd-inverted;
+       cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>;
 
        vmmc-supply = <&vddao_3v3>;
        vqmmc-supply = <&vddio_card>;
index db29344..255cede 100644 (file)
        max-frequency = <100000000>;
        disable-wp;
 
-       cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>;
-       cd-inverted;
+       cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>;
 
        vmmc-supply = <&vcc_3v3>;
        vqmmc-supply = <&vcc_card>;
index 6739697..9cbdb85 100644 (file)
        max-frequency = <100000000>;
        disable-wp;
 
-       cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>;
-       cd-inverted;
+       cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>;
 
        vmmc-supply = <&vddao_3v3>;
        vqmmc-supply = <&vddio_card>;
index a1b3101..bc811a2 100644 (file)
        max-frequency = <100000000>;
        disable-wp;
 
-       cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>;
-       cd-inverted;
+       cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>;
 
        vmmc-supply = <&vddao_3v3>;
        vqmmc-supply = <&vddio_boot>;
index 3c3a667..3f086ed 100644 (file)
        max-frequency = <100000000>;
        disable-wp;
 
-       cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>;
-       cd-inverted;
+       cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>;
 
        vmmc-supply = <&vddao_3v3>;
        vqmmc-supply = <&vddio_boot>;
index f7a1cff..8acfd40 100644 (file)
        max-frequency = <100000000>;
        disable-wp;
 
-       cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>;
-       cd-inverted;
+       cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>;
 
        vmmc-supply = <&vddao_3v3>;
        vqmmc-supply = <&vddio_boot>;
index 7212dc4..7fa20a8 100644 (file)
        max-frequency = <100000000>;
        disable-wp;
 
-       cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>;
-       cd-inverted;
+       cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>;
 
        vmmc-supply = <&vddao_3v3>;
        vqmmc-supply = <&vddio_boot>;
index 99b7495..838e32c 100644 (file)
                };
 
                intc: interrupt-controller@9bc0000 {
-                       compatible = "arm,gic-v3";
+                       compatible = "qcom,msm8996-gic-v3", "arm,gic-v3";
                        #interrupt-cells = <3>;
                        interrupt-controller;
                        #redistributor-regions = <1>;
index 20745a8..719ed9d 100644 (file)
                                 <&cpg CPG_CORE R8A774A1_CLK_S3D1>,
                                 <&scif_clk>;
                        clock-names = "fck", "brg_int", "scif_clk";
+                       dmas = <&dmac1 0x13>, <&dmac1 0x12>,
+                              <&dmac2 0x13>, <&dmac2 0x12>;
+                       dma-names = "tx", "rx", "tx", "rx";
                        power-domains = <&sysc R8A774A1_PD_ALWAYS_ON>;
                        resets = <&cpg 310>;
                        status = "disabled";
index afedbf5..0648d12 100644 (file)
                                 <&cpg CPG_CORE R8A7796_CLK_S3D1>,
                                 <&scif_clk>;
                        clock-names = "fck", "brg_int", "scif_clk";
+                       dmas = <&dmac1 0x13>, <&dmac1 0x12>,
+                              <&dmac2 0x13>, <&dmac2 0x12>;
+                       dma-names = "tx", "rx", "tx", "rx";
                        power-domains = <&sysc R8A7796_PD_ALWAYS_ON>;
                        resets = <&cpg 310>;
                        status = "disabled";
index 6dc9b1f..4b3730f 100644 (file)
                                 <&cpg CPG_CORE R8A77965_CLK_S3D1>,
                                 <&scif_clk>;
                        clock-names = "fck", "brg_int", "scif_clk";
+                       dmas = <&dmac1 0x13>, <&dmac1 0x12>,
+                              <&dmac2 0x13>, <&dmac2 0x12>;
+                       dma-names = "tx", "rx", "tx", "rx";
                        power-domains = <&sysc R8A77965_PD_ALWAYS_ON>;
                        resets = <&cpg 310>;
                        status = "disabled";
index 29cdc99..9859e11 100644 (file)
@@ -299,8 +299,10 @@ int swsusp_arch_suspend(void)
                dcache_clean_range(__idmap_text_start, __idmap_text_end);
 
                /* Clean kvm setup code to PoC? */
-               if (el2_reset_needed())
+               if (el2_reset_needed()) {
                        dcache_clean_range(__hyp_idmap_text_start, __hyp_idmap_text_end);
+                       dcache_clean_range(__hyp_text_start, __hyp_text_end);
+               }
 
                /* make the crash dump kernel image protected again */
                crash_post_resume();
index e1261fb..17f325b 100644 (file)
@@ -28,6 +28,8 @@
 #include <asm/virt.h>
 
        .text
+       .pushsection    .hyp.text, "ax"
+
        .align 11
 
 ENTRY(__hyp_stub_vectors)
index ba6b417..b09b6f7 100644 (file)
@@ -88,6 +88,7 @@ u64 __init kaslr_early_init(u64 dt_phys)
         * we end up running with module randomization disabled.
         */
        module_alloc_base = (u64)_etext - MODULES_VSIZE;
+       __flush_dcache_area(&module_alloc_base, sizeof(module_alloc_base));
 
        /*
         * Try to map the FDT early. If this fails, we simply bail,
index f2c211a..5887133 100644 (file)
@@ -120,10 +120,12 @@ static int create_dtb(struct kimage *image,
 {
        void *buf;
        size_t buf_size;
+       size_t cmdline_len;
        int ret;
 
+       cmdline_len = cmdline ? strlen(cmdline) : 0;
        buf_size = fdt_totalsize(initial_boot_params)
-                       + strlen(cmdline) + DTB_EXTRA_SPACE;
+                       + cmdline_len + DTB_EXTRA_SPACE;
 
        for (;;) {
                buf = vmalloc(buf_size);
index 2a5b338..f17afb9 100644 (file)
@@ -478,13 +478,13 @@ bool arch_within_kprobe_blacklist(unsigned long addr)
            addr < (unsigned long)__entry_text_end) ||
            (addr >= (unsigned long)__idmap_text_start &&
            addr < (unsigned long)__idmap_text_end) ||
+           (addr >= (unsigned long)__hyp_text_start &&
+           addr < (unsigned long)__hyp_text_end) ||
            !!search_exception_tables(addr))
                return true;
 
        if (!is_kernel_in_hyp_mode()) {
-               if ((addr >= (unsigned long)__hyp_text_start &&
-                   addr < (unsigned long)__hyp_text_end) ||
-                   (addr >= (unsigned long)__hyp_idmap_text_start &&
+               if ((addr >= (unsigned long)__hyp_idmap_text_start &&
                    addr < (unsigned long)__hyp_idmap_text_end))
                        return true;
        }
index fcb1f2a..99bb8fa 100644 (file)
@@ -286,74 +286,73 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level,
 
 }
 
-static void walk_pte(struct pg_state *st, pmd_t *pmdp, unsigned long start)
+static void walk_pte(struct pg_state *st, pmd_t *pmdp, unsigned long start,
+                    unsigned long end)
 {
-       pte_t *ptep = pte_offset_kernel(pmdp, 0UL);
-       unsigned long addr;
-       unsigned i;
+       unsigned long addr = start;
+       pte_t *ptep = pte_offset_kernel(pmdp, start);
 
-       for (i = 0; i < PTRS_PER_PTE; i++, ptep++) {
-               addr = start + i * PAGE_SIZE;
+       do {
                note_page(st, addr, 4, READ_ONCE(pte_val(*ptep)));
-       }
+       } while (ptep++, addr += PAGE_SIZE, addr != end);
 }
 
-static void walk_pmd(struct pg_state *st, pud_t *pudp, unsigned long start)
+static void walk_pmd(struct pg_state *st, pud_t *pudp, unsigned long start,
+                    unsigned long end)
 {
-       pmd_t *pmdp = pmd_offset(pudp, 0UL);
-       unsigned long addr;
-       unsigned i;
+       unsigned long next, addr = start;
+       pmd_t *pmdp = pmd_offset(pudp, start);
 
-       for (i = 0; i < PTRS_PER_PMD; i++, pmdp++) {
+       do {
                pmd_t pmd = READ_ONCE(*pmdp);
+               next = pmd_addr_end(addr, end);
 
-               addr = start + i * PMD_SIZE;
                if (pmd_none(pmd) || pmd_sect(pmd)) {
                        note_page(st, addr, 3, pmd_val(pmd));
                } else {
                        BUG_ON(pmd_bad(pmd));
-                       walk_pte(st, pmdp, addr);
+                       walk_pte(st, pmdp, addr, next);
                }
-       }
+       } while (pmdp++, addr = next, addr != end);
 }
 
-static void walk_pud(struct pg_state *st, pgd_t *pgdp, unsigned long start)
+static void walk_pud(struct pg_state *st, pgd_t *pgdp, unsigned long start,
+                    unsigned long end)
 {
-       pud_t *pudp = pud_offset(pgdp, 0UL);
-       unsigned long addr;
-       unsigned i;
+       unsigned long next, addr = start;
+       pud_t *pudp = pud_offset(pgdp, start);
 
-       for (i = 0; i < PTRS_PER_PUD; i++, pudp++) {
+       do {
                pud_t pud = READ_ONCE(*pudp);
+               next = pud_addr_end(addr, end);
 
-               addr = start + i * PUD_SIZE;
                if (pud_none(pud) || pud_sect(pud)) {
                        note_page(st, addr, 2, pud_val(pud));
                } else {
                        BUG_ON(pud_bad(pud));
-                       walk_pmd(st, pudp, addr);
+                       walk_pmd(st, pudp, addr, next);
                }
-       }
+       } while (pudp++, addr = next, addr != end);
 }
 
 static void walk_pgd(struct pg_state *st, struct mm_struct *mm,
                     unsigned long start)
 {
-       pgd_t *pgdp = pgd_offset(mm, 0UL);
-       unsigned i;
-       unsigned long addr;
+       unsigned long end = (start < TASK_SIZE_64) ? TASK_SIZE_64 : 0;
+       unsigned long next, addr = start;
+       pgd_t *pgdp = pgd_offset(mm, start);
 
-       for (i = 0; i < PTRS_PER_PGD; i++, pgdp++) {
+       do {
                pgd_t pgd = READ_ONCE(*pgdp);
+               next = pgd_addr_end(addr, end);
 
-               addr = start + i * PGDIR_SIZE;
                if (pgd_none(pgd)) {
                        note_page(st, addr, 1, pgd_val(pgd));
                } else {
                        BUG_ON(pgd_bad(pgd));
-                       walk_pud(st, pgdp, addr);
+                       walk_pud(st, pgdp, addr, next);
                }
-       }
+       } while (pgdp++, addr = next, addr != end);
 }
 
 void ptdump_walk_pgd(struct seq_file *m, struct ptdump_info *info)
index 30695a8..5c9073b 100644 (file)
@@ -33,7 +33,11 @@ void sync_icache_aliases(void *kaddr, unsigned long len)
                __clean_dcache_area_pou(kaddr, len);
                __flush_icache_all();
        } else {
-               flush_icache_range(addr, addr + len);
+               /*
+                * Don't issue kick_all_cpus_sync() after I-cache invalidation
+                * for user mappings.
+                */
+               __flush_icache_range(addr, addr + len);
        }
 }
 
index 33a2c94..63b4a17 100644 (file)
@@ -30,6 +30,7 @@ generic-y += pgalloc.h
 generic-y += preempt.h
 generic-y += segment.h
 generic-y += serial.h
+generic-y += shmparam.h
 generic-y += tlbflush.h
 generic-y += topology.h
 generic-y += trace_clock.h
index 6c6f630..0febf1a 100644 (file)
@@ -1,5 +1,4 @@
 include include/uapi/asm-generic/Kbuild.asm
 
 generic-y += kvm_para.h
-generic-y += shmparam.h
 generic-y += ucontext.h
index cd400d3..961c1dc 100644 (file)
@@ -40,6 +40,7 @@ generic-y += preempt.h
 generic-y += scatterlist.h
 generic-y += sections.h
 generic-y += serial.h
+generic-y += shmparam.h
 generic-y += sizes.h
 generic-y += spinlock.h
 generic-y += timex.h
index 6c6f630..0febf1a 100644 (file)
@@ -1,5 +1,4 @@
 include include/uapi/asm-generic/Kbuild.asm
 
 generic-y += kvm_para.h
-generic-y += shmparam.h
 generic-y += ucontext.h
index 47c4da3..b25fd42 100644 (file)
@@ -30,6 +30,7 @@ generic-y += rwsem.h
 generic-y += sections.h
 generic-y += segment.h
 generic-y += serial.h
+generic-y += shmparam.h
 generic-y += sizes.h
 generic-y += topology.h
 generic-y += trace_clock.h
index 61d955c..c1b06dc 100644 (file)
@@ -1,4 +1,3 @@
 include include/uapi/asm-generic/Kbuild.asm
 
-generic-y += shmparam.h
 generic-y += ucontext.h
index 3804935..40712e4 100644 (file)
@@ -155,18 +155,22 @@ out:
 static int __init nfhd_init(void)
 {
        u32 blocks, bsize;
+       int ret;
        int i;
 
        nfhd_id = nf_get_id("XHDI");
        if (!nfhd_id)
                return -ENODEV;
 
-       major_num = register_blkdev(major_num, "nfhd");
-       if (major_num <= 0) {
+       ret = register_blkdev(major_num, "nfhd");
+       if (ret < 0) {
                pr_warn("nfhd: unable to get major number\n");
-               return major_num;
+               return ret;
        }
 
+       if (!major_num)
+               major_num = ret;
+
        for (i = NFHD_DEV_OFFSET; i < 24; i++) {
                if (nfhd_get_capacity(i, 0, &blocks, &bsize))
                        continue;
index 9f1dd26..95f8f63 100644 (file)
@@ -20,6 +20,7 @@ generic-y += mm-arch-hooks.h
 generic-y += percpu.h
 generic-y += preempt.h
 generic-y += sections.h
+generic-y += shmparam.h
 generic-y += spinlock.h
 generic-y += topology.h
 generic-y += trace_clock.h
index b8b3525..960bf1e 100644 (file)
@@ -2,4 +2,3 @@ include include/uapi/asm-generic/Kbuild.asm
 
 generated-y += unistd_32.h
 generic-y += kvm_para.h
-generic-y += shmparam.h
index 9c7d1d2..791cc8d 100644 (file)
@@ -26,6 +26,7 @@ generic-y += parport.h
 generic-y += percpu.h
 generic-y += preempt.h
 generic-y += serial.h
+generic-y += shmparam.h
 generic-y += syscalls.h
 generic-y += topology.h
 generic-y += trace_clock.h
index 28823e3..97823ec 100644 (file)
@@ -2,5 +2,4 @@ include include/uapi/asm-generic/Kbuild.asm
 
 generated-y += unistd_32.h
 generic-y += kvm_para.h
-generic-y += shmparam.h
 generic-y += ucontext.h
index 0d14f51..a84c24d 100644 (file)
@@ -1403,6 +1403,21 @@ config LOONGSON3_ENHANCEMENT
          please say 'N' here. If you want a high-performance kernel to run on
          new Loongson 3 machines only, please say 'Y' here.
 
+config CPU_LOONGSON3_WORKAROUNDS
+       bool "Old Loongson 3 LLSC Workarounds"
+       default y if SMP
+       depends on CPU_LOONGSON3
+       help
+         Loongson 3 processors have the llsc issues which require workarounds.
+         Without workarounds the system may hang unexpectedly.
+
+         Newer Loongson 3 will fix these issues and no workarounds are needed.
+         The workarounds have no significant side effect on them but may
+         decrease the performance of the system so this option should be
+         disabled unless the kernel is intended to be run on old systems.
+
+         If unsure, please say Y.
+
 config CPU_LOONGSON2E
        bool "Loongson 2E"
        depends on SYS_HAS_CPU_LOONGSON2E
index 50cff3c..4f7b1fa 100644 (file)
@@ -76,7 +76,7 @@
        status = "okay";
 
        pinctrl-names = "default";
-       pinctrl-0 = <&pins_uart2>;
+       pinctrl-0 = <&pins_uart3>;
 };
 
 &uart4 {
                bias-disable;
        };
 
-       pins_uart2: uart2 {
-               function = "uart2";
-               groups = "uart2-data", "uart2-hwflow";
+       pins_uart3: uart3 {
+               function = "uart3";
+               groups = "uart3-data", "uart3-hwflow";
                bias-disable;
        };
 
index 6fb16fd..2beb78a 100644 (file)
                #dma-cells = <2>;
 
                interrupt-parent = <&intc>;
-               interrupts = <29>;
+               interrupts = <20>;
 
                clocks = <&cgu JZ4740_CLK_DMA>;
 
index 2152b7b..cc8dbea 100644 (file)
                interrupts = <0>;
        };
 
-       axi_i2c: i2c@10A00000 {
+       axi_i2c: i2c@10a00000 {
            compatible = "xlnx,xps-iic-2.00.a";
            interrupt-parent = <&axi_intc>;
            interrupts = <4>;
-           reg = < 0x10A00000 0x10000 >;
+           reg = < 0x10a00000 0x10000 >;
            clocks = <&ext>;
            xlnx,clk-freq = <0x5f5e100>;
            xlnx,family = "Artix7";
            #address-cells = <1>;
            #size-cells = <0>;
 
-           ad7420@4B {
+           ad7420@4b {
                compatible = "adi,adt7420";
-               reg = <0x4B>;
+               reg = <0x4b>;
            };
        } ;
 };
index 43fcd35..9409629 100644 (file)
@@ -58,6 +58,7 @@ static __inline__ void atomic_##op(int i, atomic_t * v)                             \
        if (kernel_uses_llsc) {                                               \
                int temp;                                                     \
                                                                              \
+               loongson_llsc_mb();                                           \
                __asm__ __volatile__(                                         \
                "       .set    push                                    \n"   \
                "       .set    "MIPS_ISA_LEVEL"                        \n"   \
@@ -85,6 +86,7 @@ static __inline__ int atomic_##op##_return_relaxed(int i, atomic_t * v)             \
        if (kernel_uses_llsc) {                                               \
                int temp;                                                     \
                                                                              \
+               loongson_llsc_mb();                                           \
                __asm__ __volatile__(                                         \
                "       .set    push                                    \n"   \
                "       .set    "MIPS_ISA_LEVEL"                        \n"   \
@@ -118,6 +120,7 @@ static __inline__ int atomic_fetch_##op##_relaxed(int i, atomic_t * v)            \
        if (kernel_uses_llsc) {                                               \
                int temp;                                                     \
                                                                              \
+               loongson_llsc_mb();                                           \
                __asm__ __volatile__(                                         \
                "       .set    push                                    \n"   \
                "       .set    "MIPS_ISA_LEVEL"                        \n"   \
@@ -256,6 +259,7 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v)                      \
        if (kernel_uses_llsc) {                                               \
                long temp;                                                    \
                                                                              \
+               loongson_llsc_mb();                                           \
                __asm__ __volatile__(                                         \
                "       .set    push                                    \n"   \
                "       .set    "MIPS_ISA_LEVEL"                        \n"   \
@@ -283,6 +287,7 @@ static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \
        if (kernel_uses_llsc) {                                               \
                long temp;                                                    \
                                                                              \
+               loongson_llsc_mb();                                           \
                __asm__ __volatile__(                                         \
                "       .set    push                                    \n"   \
                "       .set    "MIPS_ISA_LEVEL"                        \n"   \
@@ -316,6 +321,7 @@ static __inline__ long atomic64_fetch_##op##_relaxed(long i, atomic64_t * v)  \
        if (kernel_uses_llsc) {                                               \
                long temp;                                                    \
                                                                              \
+               loongson_llsc_mb();                                           \
                __asm__ __volatile__(                                         \
                "       .set    push                                    \n"   \
                "       .set    "MIPS_ISA_LEVEL"                        \n"   \
index a5eb1bb..b7f6ac5 100644 (file)
 #define __smp_mb__before_atomic()      __smp_mb__before_llsc()
 #define __smp_mb__after_atomic()       smp_llsc_mb()
 
+/*
+ * Some Loongson 3 CPUs have a bug wherein execution of a memory access (load,
+ * store or pref) in between an ll & sc can cause the sc instruction to
+ * erroneously succeed, breaking atomicity. Whilst it's unusual to write code
+ * containing such sequences, this bug bites harder than we might otherwise
+ * expect due to reordering & speculation:
+ *
+ * 1) A memory access appearing prior to the ll in program order may actually
+ *    be executed after the ll - this is the reordering case.
+ *
+ *    In order to avoid this we need to place a memory barrier (ie. a sync
+ *    instruction) prior to every ll instruction, in between it & any earlier
+ *    memory access instructions. Many of these cases are already covered by
+ *    smp_mb__before_llsc() but for the remaining cases, typically ones in
+ *    which multiple CPUs may operate on a memory location but ordering is not
+ *    usually guaranteed, we use loongson_llsc_mb() below.
+ *
+ *    This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later.
+ *
+ * 2) If a conditional branch exists between an ll & sc with a target outside
+ *    of the ll-sc loop, for example an exit upon value mismatch in cmpxchg()
+ *    or similar, then misprediction of the branch may allow speculative
+ *    execution of memory accesses from outside of the ll-sc loop.
+ *
+ *    In order to avoid this we need a memory barrier (ie. a sync instruction)
+ *    at each affected branch target, for which we also use loongson_llsc_mb()
+ *    defined below.
+ *
+ *    This case affects all current Loongson 3 CPUs.
+ */
+#ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS /* Loongson-3's LLSC workaround */
+#define loongson_llsc_mb()     __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory")
+#else
+#define loongson_llsc_mb()     do { } while (0)
+#endif
+
 #include <asm-generic/barrier.h>
 
 #endif /* __ASM_BARRIER_H */
index c467595..830c93a 100644 (file)
@@ -69,6 +69,7 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr)
                : "ir" (1UL << bit), GCC_OFF_SMALL_ASM() (*m));
 #if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
        } else if (kernel_uses_llsc && __builtin_constant_p(bit)) {
+               loongson_llsc_mb();
                do {
                        __asm__ __volatile__(
                        "       " __LL "%0, %1          # set_bit       \n"
@@ -79,6 +80,7 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr)
                } while (unlikely(!temp));
 #endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */
        } else if (kernel_uses_llsc) {
+               loongson_llsc_mb();
                do {
                        __asm__ __volatile__(
                        "       .set    push                            \n"
@@ -123,6 +125,7 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr)
                : "ir" (~(1UL << bit)));
 #if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
        } else if (kernel_uses_llsc && __builtin_constant_p(bit)) {
+               loongson_llsc_mb();
                do {
                        __asm__ __volatile__(
                        "       " __LL "%0, %1          # clear_bit     \n"
@@ -133,6 +136,7 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr)
                } while (unlikely(!temp));
 #endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */
        } else if (kernel_uses_llsc) {
+               loongson_llsc_mb();
                do {
                        __asm__ __volatile__(
                        "       .set    push                            \n"
@@ -193,6 +197,7 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr)
                unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
                unsigned long temp;
 
+               loongson_llsc_mb();
                do {
                        __asm__ __volatile__(
                        "       .set    push                            \n"
index c14d798..b83b039 100644 (file)
@@ -50,6 +50,7 @@
                  "i" (-EFAULT)                                         \
                : "memory");                                            \
        } else if (cpu_has_llsc) {                                      \
+               loongson_llsc_mb();                                     \
                __asm__ __volatile__(                                   \
                "       .set    push                            \n"     \
                "       .set    noat                            \n"     \
@@ -163,6 +164,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
                  "i" (-EFAULT)
                : "memory");
        } else if (cpu_has_llsc) {
+               loongson_llsc_mb();
                __asm__ __volatile__(
                "# futex_atomic_cmpxchg_inatomic                        \n"
                "       .set    push                                    \n"
@@ -192,6 +194,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
                : GCC_OFF_SMALL_ASM() (*uaddr), "Jr" (oldval), "Jr" (newval),
                  "i" (-EFAULT)
                : "memory");
+               loongson_llsc_mb();
        } else
                return -ENOSYS;
 
index 57933fc..910851c 100644 (file)
@@ -228,6 +228,7 @@ static inline void set_pte(pte_t *ptep, pte_t pteval)
                        : [buddy] "+m" (buddy->pte), [tmp] "=&r" (tmp)
                        : [global] "r" (page_global));
                } else if (kernel_uses_llsc) {
+                       loongson_llsc_mb();
                        __asm__ __volatile__ (
                        "       .set    push                            \n"
                        "       .set    "MIPS_ISA_ARCH_LEVEL"           \n"
@@ -242,6 +243,7 @@ static inline void set_pte(pte_t *ptep, pte_t pteval)
                        "       .set    pop                             \n"
                        : [buddy] "+m" (buddy->pte), [tmp] "=&r" (tmp)
                        : [global] "r" (page_global));
+                       loongson_llsc_mb();
                }
 #else /* !CONFIG_SMP */
                if (pte_none(*buddy))
index 8f5bd04..7f3f136 100644 (file)
@@ -457,5 +457,5 @@ void mips_cm_error_report(void)
        }
 
        /* reprime cause register */
-       write_gcr_error_cause(0);
+       write_gcr_error_cause(cm_error);
 }
index 6829a06..339870e 100644 (file)
@@ -371,7 +371,7 @@ static inline int is_sp_move_ins(union mips_instruction *ip, int *frame_size)
 static int get_frame_info(struct mips_frame_info *info)
 {
        bool is_mmips = IS_ENABLED(CONFIG_CPU_MICROMIPS);
-       union mips_instruction insn, *ip, *ip_end;
+       union mips_instruction insn, *ip;
        const unsigned int max_insns = 128;
        unsigned int last_insn_size = 0;
        unsigned int i;
@@ -384,10 +384,9 @@ static int get_frame_info(struct mips_frame_info *info)
        if (!ip)
                goto err;
 
-       ip_end = (void *)ip + info->func_size;
-
-       for (i = 0; i < max_insns && ip < ip_end; i++) {
+       for (i = 0; i < max_insns; i++) {
                ip = (void *)ip + last_insn_size;
+
                if (is_mmips && mm_insn_16bit(ip->halfword[0])) {
                        insn.word = ip->halfword[0] << 16;
                        last_insn_size = 2;
index 0fce460..c1a4d4d 100644 (file)
@@ -23,6 +23,29 @@ ifdef CONFIG_CPU_LOONGSON2F_WORKAROUNDS
 endif
 
 cflags-$(CONFIG_CPU_LOONGSON3) += -Wa,--trap
+
+#
+# Some versions of binutils, not currently mainline as of 2019/02/04, support
+# an -mfix-loongson3-llsc flag which emits a sync prior to each ll instruction
+# to work around a CPU bug (see loongson_llsc_mb() in asm/barrier.h for a
+# description).
+#
+# We disable this in order to prevent the assembler meddling with the
+# instruction that labels refer to, ie. if we label an ll instruction:
+#
+# 1: ll v0, 0(a0)
+#
+# ...then with the assembler fix applied the label may actually point at a sync
+# instruction inserted by the assembler, and if we were using the label in an
+# exception table the table would no longer contain the address of the ll
+# instruction.
+#
+# Avoid this by explicitly disabling that assembler behaviour. If upstream
+# binutils does not merge support for the flag then we can revisit & remove
+# this later - for now it ensures vendor toolchains don't cause problems.
+#
+cflags-$(CONFIG_CPU_LOONGSON3) += $(call as-option,-Wa$(comma)-mno-fix-loongson3-llsc,)
+
 #
 # binutils from v2.25 on and gcc starting from v4.9.0 treat -march=loongson3a
 # as MIPS64 R2; older versions as just R1.  This leaves the possibility open
index a60715e..b26892c 100644 (file)
@@ -59,7 +59,12 @@ static void loongson_poweroff(void)
 {
 #ifndef CONFIG_LEFI_FIRMWARE_INTERFACE
        mach_prepare_shutdown();
-       unreachable();
+
+       /*
+        * It needs a wait loop here, but mips/kernel/reset.c already calls
+        * a generic delay loop, machine_hang(), so simply return.
+        */
+       return;
 #else
        void (*fw_poweroff)(void) = (void *)loongson_sysconf.poweroff_addr;
 
index 37b1cb2..65b6e85 100644 (file)
@@ -932,6 +932,8 @@ build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r,
                 * to mimic that here by taking a load/istream page
                 * fault.
                 */
+               if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS))
+                       uasm_i_sync(p, 0);
                UASM_i_LA(p, ptr, (unsigned long)tlb_do_page_fault_0);
                uasm_i_jr(p, ptr);
 
@@ -1646,6 +1648,8 @@ static void
 iPTE_LW(u32 **p, unsigned int pte, unsigned int ptr)
 {
 #ifdef CONFIG_SMP
+       if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS))
+               uasm_i_sync(p, 0);
 # ifdef CONFIG_PHYS_ADDR_T_64BIT
        if (cpu_has_64bits)
                uasm_i_lld(p, pte, 0, ptr);
@@ -2259,6 +2263,8 @@ static void build_r4000_tlb_load_handler(void)
 #endif
 
        uasm_l_nopage_tlbl(&l, p);
+       if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS))
+               uasm_i_sync(&p, 0);
        build_restore_work_registers(&p);
 #ifdef CONFIG_CPU_MICROMIPS
        if ((unsigned long)tlb_do_page_fault_0 & 1) {
@@ -2313,6 +2319,8 @@ static void build_r4000_tlb_store_handler(void)
 #endif
 
        uasm_l_nopage_tlbs(&l, p);
+       if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS))
+               uasm_i_sync(&p, 0);
        build_restore_work_registers(&p);
 #ifdef CONFIG_CPU_MICROMIPS
        if ((unsigned long)tlb_do_page_fault_1 & 1) {
@@ -2368,6 +2376,8 @@ static void build_r4000_tlb_modify_handler(void)
 #endif
 
        uasm_l_nopage_tlbm(&l, p);
+       if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS))
+               uasm_i_sync(&p, 0);
        build_restore_work_registers(&p);
 #ifdef CONFIG_CPU_MICROMIPS
        if ((unsigned long)tlb_do_page_fault_1 & 1) {
index 5017d58..fc29b85 100644 (file)
@@ -568,6 +568,11 @@ static int __init octeon_pci_setup(void)
        if (octeon_has_feature(OCTEON_FEATURE_PCIE))
                return 0;
 
+       if (!octeon_is_pci_host()) {
+               pr_notice("Not in host mode, PCI Controller not initialized\n");
+               return 0;
+       }
+
        /* Point pcibios_map_irq() to the PCI version of it */
        octeon_pcibios_map_irq = octeon_pci_pcibios_map_irq;
 
@@ -579,11 +584,6 @@ static int __init octeon_pci_setup(void)
        else
                octeon_dma_bar_type = OCTEON_DMA_BAR_TYPE_BIG;
 
-       if (!octeon_is_pci_host()) {
-               pr_notice("Not in host mode, PCI Controller not initialized\n");
-               return 0;
-       }
-
        /* PCI I/O and PCI MEM values */
        set_io_port_base(OCTEON_PCI_IOSPACE_BASE);
        ioport_resource.start = 0;
index f6fd340..0ede4de 100644 (file)
@@ -8,6 +8,7 @@ ccflags-vdso := \
        $(filter -E%,$(KBUILD_CFLAGS)) \
        $(filter -mmicromips,$(KBUILD_CFLAGS)) \
        $(filter -march=%,$(KBUILD_CFLAGS)) \
+       $(filter -m%-float,$(KBUILD_CFLAGS)) \
        -D__VDSO__
 
 ifdef CONFIG_CC_IS_CLANG
@@ -129,7 +130,7 @@ $(obj)/%-o32.o: $(src)/%.c FORCE
        $(call cmd,force_checksrc)
        $(call if_changed_rule,cc_o_c)
 
-$(obj)/vdso-o32.lds: KBUILD_CPPFLAGS := -mabi=32
+$(obj)/vdso-o32.lds: KBUILD_CPPFLAGS := $(ccflags-vdso) -mabi=32
 $(obj)/vdso-o32.lds: $(src)/vdso.lds.S FORCE
        $(call if_changed_dep,cpp_lds_S)
 
@@ -169,7 +170,7 @@ $(obj)/%-n32.o: $(src)/%.c FORCE
        $(call cmd,force_checksrc)
        $(call if_changed_rule,cc_o_c)
 
-$(obj)/vdso-n32.lds: KBUILD_CPPFLAGS := -mabi=n32
+$(obj)/vdso-n32.lds: KBUILD_CPPFLAGS := $(ccflags-vdso) -mabi=n32
 $(obj)/vdso-n32.lds: $(src)/vdso.lds.S FORCE
        $(call if_changed_dep,cpp_lds_S)
 
index eb87cd8..1f04844 100644 (file)
@@ -34,6 +34,7 @@ generic-y += qrwlock_types.h
 generic-y += qrwlock.h
 generic-y += sections.h
 generic-y += segment.h
+generic-y += shmparam.h
 generic-y += string.h
 generic-y += switch_to.h
 generic-y += topology.h
index 6c6f630..0febf1a 100644 (file)
@@ -1,5 +1,4 @@
 include include/uapi/asm-generic/Kbuild.asm
 
 generic-y += kvm_para.h
-generic-y += shmparam.h
 generic-y += ucontext.h
index 2e6ada2..c9bfe52 100644 (file)
@@ -1258,21 +1258,13 @@ extern pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
 
 #define pmd_move_must_withdraw pmd_move_must_withdraw
 struct spinlock;
-static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
-                                        struct spinlock *old_pmd_ptl,
-                                        struct vm_area_struct *vma)
-{
-       if (radix_enabled())
-               return false;
-       /*
-        * Archs like ppc64 use pgtable to store per pmd
-        * specific information. So when we switch the pmd,
-        * we should also withdraw and deposit the pgtable
-        */
-       return true;
-}
-
-
+extern int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
+                                 struct spinlock *old_pmd_ptl,
+                                 struct vm_area_struct *vma);
+/*
+ * Hash translation mode use the deposited table to store hash pte
+ * slot information.
+ */
 #define arch_needs_pgtable_deposit arch_needs_pgtable_deposit
 static inline bool arch_needs_pgtable_deposit(void)
 {
index f3c31f5..ecd3156 100644 (file)
@@ -400,3 +400,25 @@ void arch_report_meminfo(struct seq_file *m)
                   atomic_long_read(&direct_pages_count[MMU_PAGE_1G]) << 20);
 }
 #endif /* CONFIG_PROC_FS */
+
+/*
+ * For hash translation mode, we use the deposited table to store hash slot
+ * information and they are stored at PTRS_PER_PMD offset from related pmd
+ * location. Hence a pmd move requires deposit and withdraw.
+ *
+ * For radix translation with split pmd ptl, we store the deposited table in the
+ * pmd page. Hence if we have different pmd page we need to withdraw during pmd
+ * move.
+ *
+ * With hash we use deposited table always irrespective of anon or not.
+ * With radix we use deposited table only for anonymous mapping.
+ */
+int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
+                          struct spinlock *old_pmd_ptl,
+                          struct vm_area_struct *vma)
+{
+       if (radix_enabled())
+               return (new_pmd_ptl != old_pmd_ptl) && vma_is_anonymous(vma);
+
+       return true;
+}
index 7d6457a..bba281b 100644 (file)
@@ -43,6 +43,7 @@ static int drc_pmem_bind(struct papr_scm_priv *p)
 {
        unsigned long ret[PLPAR_HCALL_BUFSIZE];
        uint64_t rc, token;
+       uint64_t saved = 0;
 
        /*
         * When the hypervisor cannot map all the requested memory in a single
@@ -56,6 +57,8 @@ static int drc_pmem_bind(struct papr_scm_priv *p)
                rc = plpar_hcall(H_SCM_BIND_MEM, ret, p->drc_index, 0,
                                p->blocks, BIND_ANY_ADDR, token);
                token = ret[0];
+               if (!saved)
+                       saved = ret[1];
                cond_resched();
        } while (rc == H_BUSY);
 
@@ -64,7 +67,7 @@ static int drc_pmem_bind(struct papr_scm_priv *p)
                return -ENXIO;
        }
 
-       p->bound_addr = ret[1];
+       p->bound_addr = saved;
 
        dev_dbg(&p->pdev->dev, "bound drc %x to %pR\n", p->drc_index, &p->res);
 
index e64c657..bd14990 100644 (file)
@@ -104,7 +104,7 @@ choice
        prompt "Base ISA"
        default ARCH_RV64I
        help
-         This selects the base ISA that this kernel will traget and must match
+         This selects the base ISA that this kernel will target and must match
          the target platform.
 
 config ARCH_RV32I
index f399659..2fd3461 100644 (file)
@@ -13,8 +13,6 @@ CONFIG_BLK_DEV_INITRD=y
 CONFIG_EXPERT=y
 CONFIG_BPF_SYSCALL=y
 CONFIG_SMP=y
-CONFIG_PCI=y
-CONFIG_PCIE_XILINX=y
 CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
 CONFIG_NET=y
@@ -28,6 +26,10 @@ CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
 CONFIG_IP_PNP_RARP=y
 CONFIG_NETLINK_DIAG=y
+CONFIG_PCI=y
+CONFIG_PCIEPORTBUS=y
+CONFIG_PCI_HOST_GENERIC=y
+CONFIG_PCIE_XILINX=y
 CONFIG_DEVTMPFS=y
 CONFIG_BLK_DEV_LOOP=y
 CONFIG_VIRTIO_BLK=y
@@ -63,7 +65,6 @@ CONFIG_USB_STORAGE=y
 CONFIG_USB_UAS=y
 CONFIG_VIRTIO_MMIO=y
 CONFIG_SIFIVE_PLIC=y
-CONFIG_RAS=y
 CONFIG_EXT4_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
 CONFIG_AUTOFS4_FS=y
@@ -77,5 +78,6 @@ CONFIG_NFS_V4_1=y
 CONFIG_NFS_V4_2=y
 CONFIG_ROOT_NFS=y
 CONFIG_CRYPTO_USER_API_HASH=y
+CONFIG_CRYPTO_DEV_VIRTIO=y
 CONFIG_PRINTK_TIME=y
 # CONFIG_RCU_TRACE is not set
index 06cfbb3..2a546a5 100644 (file)
@@ -80,7 +80,7 @@ typedef struct page *pgtable_t;
 #define __pgd(x)       ((pgd_t) { (x) })
 #define __pgprot(x)    ((pgprot_t) { (x) })
 
-#ifdef CONFIG_64BITS
+#ifdef CONFIG_64BIT
 #define PTE_FMT "%016lx"
 #else
 #define PTE_FMT "%08lx"
index 2fa2942..470755c 100644 (file)
 #define _PAGE_SPECIAL   _PAGE_SOFT
 #define _PAGE_TABLE     _PAGE_PRESENT
 
+/*
+ * _PAGE_PROT_NONE is set on not-present pages (and ignored by the hardware) to
+ * distinguish them from swapped out pages
+ */
+#define _PAGE_PROT_NONE _PAGE_READ
+
 #define _PAGE_PFN_SHIFT 10
 
 /* Set of bits to preserve across pte_modify() */
index 1630196..a8179a8 100644 (file)
@@ -44,7 +44,7 @@
 /* Page protection bits */
 #define _PAGE_BASE     (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER)
 
-#define PAGE_NONE              __pgprot(0)
+#define PAGE_NONE              __pgprot(_PAGE_PROT_NONE)
 #define PAGE_READ              __pgprot(_PAGE_BASE | _PAGE_READ)
 #define PAGE_WRITE             __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_WRITE)
 #define PAGE_EXEC              __pgprot(_PAGE_BASE | _PAGE_EXEC)
@@ -98,7 +98,7 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
 
 static inline int pmd_present(pmd_t pmd)
 {
-       return (pmd_val(pmd) & _PAGE_PRESENT);
+       return (pmd_val(pmd) & (_PAGE_PRESENT | _PAGE_PROT_NONE));
 }
 
 static inline int pmd_none(pmd_t pmd)
@@ -178,7 +178,7 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long addr)
 
 static inline int pte_present(pte_t pte)
 {
-       return (pte_val(pte) & _PAGE_PRESENT);
+       return (pte_val(pte) & (_PAGE_PRESENT | _PAGE_PROT_NONE));
 }
 
 static inline int pte_none(pte_t pte)
@@ -380,7 +380,7 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
  *
  * Format of swap PTE:
  *     bit            0:       _PAGE_PRESENT (zero)
- *     bit            1:       reserved for future use (zero)
+ *     bit            1:       _PAGE_PROT_NONE (zero)
  *     bits      2 to 6:       swap type
  *     bits 7 to XLEN-1:       swap offset
  */
index 0531f49..ce70bce 100644 (file)
@@ -22,7 +22,7 @@
  * This decides where the kernel will search for a free chunk of vm
  * space during mmap's.
  */
-#define TASK_UNMAPPED_BASE     PAGE_ALIGN(TASK_SIZE >> 1)
+#define TASK_UNMAPPED_BASE     PAGE_ALIGN(TASK_SIZE / 3)
 
 #define STACK_TOP              TASK_SIZE
 #define STACK_TOP_MAX          STACK_TOP
index 6a92a2f..dac9834 100644 (file)
@@ -39,6 +39,7 @@ void asm_offsets(void)
        OFFSET(TASK_STACK, task_struct, stack);
        OFFSET(TASK_TI, task_struct, thread_info);
        OFFSET(TASK_TI_FLAGS, task_struct, thread_info.flags);
+       OFFSET(TASK_TI_PREEMPT_COUNT, task_struct, thread_info.preempt_count);
        OFFSET(TASK_TI_KERNEL_SP, task_struct, thread_info.kernel_sp);
        OFFSET(TASK_TI_USER_SP, task_struct, thread_info.user_sp);
        OFFSET(TASK_TI_CPU, task_struct, thread_info.cpu);
index 355166f..fd9b57c 100644 (file)
@@ -144,6 +144,10 @@ _save_context:
        REG_L x2,  PT_SP(sp)
        .endm
 
+#if !IS_ENABLED(CONFIG_PREEMPT)
+.set resume_kernel, restore_all
+#endif
+
 ENTRY(handle_exception)
        SAVE_ALL
 
@@ -228,7 +232,7 @@ ret_from_exception:
        REG_L s0, PT_SSTATUS(sp)
        csrc sstatus, SR_SIE
        andi s0, s0, SR_SPP
-       bnez s0, restore_all
+       bnez s0, resume_kernel
 
 resume_userspace:
        /* Interrupts must be disabled here so flags are checked atomically */
@@ -250,6 +254,18 @@ restore_all:
        RESTORE_ALL
        sret
 
+#if IS_ENABLED(CONFIG_PREEMPT)
+resume_kernel:
+       REG_L s0, TASK_TI_PREEMPT_COUNT(tp)
+       bnez s0, restore_all
+need_resched:
+       REG_L s0, TASK_TI_FLAGS(tp)
+       andi s0, s0, _TIF_NEED_RESCHED
+       beqz s0, restore_all
+       call preempt_schedule_irq
+       j need_resched
+#endif
+
 work_pending:
        /* Enter slow path for supplementary processing */
        la ra, ret_from_exception
index 6e079e9..7756431 100644 (file)
@@ -181,7 +181,7 @@ static void __init setup_bootmem(void)
        BUG_ON(mem_size == 0);
 
        set_max_mapnr(PFN_DOWN(mem_size));
-       max_low_pfn = memblock_end_of_DRAM();
+       max_low_pfn = PFN_DOWN(memblock_end_of_DRAM());
 
 #ifdef CONFIG_BLK_DEV_INITRD
        setup_initrd();
index fc185ec..18cda0e 100644 (file)
@@ -57,15 +57,12 @@ void __init setup_smp(void)
 
        while ((dn = of_find_node_by_type(dn, "cpu"))) {
                hart = riscv_of_processor_hartid(dn);
-               if (hart < 0) {
-                       of_node_put(dn);
+               if (hart < 0)
                        continue;
-               }
 
                if (hart == cpuid_to_hartid_map(0)) {
                        BUG_ON(found_boot_cpu);
                        found_boot_cpu = 1;
-                       of_node_put(dn);
                        continue;
                }
 
@@ -73,7 +70,6 @@ void __init setup_smp(void)
                set_cpu_possible(cpuid, true);
                set_cpu_present(cpuid, true);
                cpuid++;
-               of_node_put(dn);
        }
 
        BUG_ON(!found_boot_cpu);
index 1e1395d..65df1df 100644 (file)
@@ -18,8 +18,6 @@
 #include <asm/cache.h>
 #include <asm/thread_info.h>
 
-#define MAX_BYTES_PER_LONG     0x10
-
 OUTPUT_ARCH(riscv)
 ENTRY(_start)
 
@@ -76,6 +74,8 @@ SECTIONS
                *(.sbss*)
        }
 
+       BSS_SECTION(PAGE_SIZE, PAGE_SIZE, 0)
+
        EXCEPTION_TABLE(0x10)
        NOTES
 
@@ -83,10 +83,6 @@ SECTIONS
                *(.rel.dyn*)
        }
 
-       BSS_SECTION(MAX_BYTES_PER_LONG,
-                   MAX_BYTES_PER_LONG,
-                   MAX_BYTES_PER_LONG)
-
        _end = .;
 
        STABS_DEBUG
index 1d9bfaf..658ebf6 100644 (file)
@@ -28,7 +28,8 @@ static void __init zone_sizes_init(void)
        unsigned long max_zone_pfns[MAX_NR_ZONES] = { 0, };
 
 #ifdef CONFIG_ZONE_DMA32
-       max_zone_pfns[ZONE_DMA32] = PFN_DOWN(min(4UL * SZ_1G, max_low_pfn));
+       max_zone_pfns[ZONE_DMA32] = PFN_DOWN(min(4UL * SZ_1G,
+                       (unsigned long) PFN_PHYS(max_low_pfn)));
 #endif
        max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
 
index 537f97f..b6796e6 100644 (file)
        .section .text
 ENTRY(swsusp_arch_suspend)
        lg      %r1,__LC_NODAT_STACK
-       aghi    %r1,-STACK_FRAME_OVERHEAD
        stmg    %r6,%r15,__SF_GPRS(%r1)
+       aghi    %r1,-STACK_FRAME_OVERHEAD
        stg     %r15,__SF_BACKCHAIN(%r1)
-       lgr     %r1,%r15
+       lgr     %r15,%r1
 
        /* Store FPU registers */
        brasl   %r14,save_fpu_regs
index a966d7b..4266a4d 100644 (file)
@@ -382,7 +382,9 @@ static void zpci_irq_handler(struct airq_struct *airq)
                        if (ai == -1UL)
                                break;
                        inc_irq_stat(IRQIO_MSI);
+                       airq_iv_lock(aibv, ai);
                        generic_handle_irq(airq_iv_get_data(aibv, ai));
+                       airq_iv_unlock(aibv, ai);
                }
        }
 }
@@ -408,7 +410,7 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
        zdev->aisb = aisb;
 
        /* Create adapter interrupt vector */
-       zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA);
+       zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK);
        if (!zdev->aibv)
                return -ENOMEM;
 
index 1372553..1d1544b 100644 (file)
@@ -28,6 +28,7 @@ generic-y += preempt.h
 generic-y += sections.h
 generic-y += segment.h
 generic-y += serial.h
+generic-y += shmparam.h
 generic-y += sizes.h
 generic-y += syscalls.h
 generic-y += topology.h
index 6c6f630..0febf1a 100644 (file)
@@ -1,5 +1,4 @@
 include include/uapi/asm-generic/Kbuild.asm
 
 generic-y += kvm_para.h
-generic-y += shmparam.h
 generic-y += ucontext.h
index 26387c7..6826143 100644 (file)
@@ -446,12 +446,12 @@ config RETPOLINE
          branches. Requires a compiler with -mindirect-branch=thunk-extern
          support for full protection. The kernel may run slower.
 
-config X86_RESCTRL
-       bool "Resource Control support"
+config X86_CPU_RESCTRL
+       bool "x86 CPU resource control support"
        depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD)
        select KERNFS
        help
-         Enable Resource Control support.
+         Enable x86 CPU resource control support.
 
          Provide support for the allocation and monitoring of system resources
          usage by the CPU.
index 6403789..f62e347 100644 (file)
@@ -600,6 +600,16 @@ ENTRY(trampoline_32bit_src)
        leal    TRAMPOLINE_32BIT_PGTABLE_OFFSET(%ecx), %eax
        movl    %eax, %cr3
 3:
+       /* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */
+       pushl   %ecx
+       pushl   %edx
+       movl    $MSR_EFER, %ecx
+       rdmsr
+       btsl    $_EFER_LME, %eax
+       wrmsr
+       popl    %edx
+       popl    %ecx
+
        /* Enable PAE and LA57 (if required) paging modes */
        movl    $X86_CR4_PAE, %eax
        cmpl    $0, %edx
index 91f7563..6ff7e81 100644 (file)
@@ -6,7 +6,7 @@
 #define TRAMPOLINE_32BIT_PGTABLE_OFFSET        0
 
 #define TRAMPOLINE_32BIT_CODE_OFFSET   PAGE_SIZE
-#define TRAMPOLINE_32BIT_CODE_SIZE     0x60
+#define TRAMPOLINE_32BIT_CODE_SIZE     0x70
 
 #define TRAMPOLINE_32BIT_STACK_END     TRAMPOLINE_32BIT_SIZE
 
index 40e12cf..daafb89 100644 (file)
@@ -3559,6 +3559,14 @@ static void free_excl_cntrs(int cpu)
 
 static void intel_pmu_cpu_dying(int cpu)
 {
+       fini_debug_store_on_cpu(cpu);
+
+       if (x86_pmu.counter_freezing)
+               disable_counter_freeze();
+}
+
+static void intel_pmu_cpu_dead(int cpu)
+{
        struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
        struct intel_shared_regs *pc;
 
@@ -3570,11 +3578,6 @@ static void intel_pmu_cpu_dying(int cpu)
        }
 
        free_excl_cntrs(cpu);
-
-       fini_debug_store_on_cpu(cpu);
-
-       if (x86_pmu.counter_freezing)
-               disable_counter_freeze();
 }
 
 static void intel_pmu_sched_task(struct perf_event_context *ctx,
@@ -3663,6 +3666,7 @@ static __initconst const struct x86_pmu core_pmu = {
        .cpu_prepare            = intel_pmu_cpu_prepare,
        .cpu_starting           = intel_pmu_cpu_starting,
        .cpu_dying              = intel_pmu_cpu_dying,
+       .cpu_dead               = intel_pmu_cpu_dead,
 };
 
 static struct attribute *intel_pmu_attrs[];
@@ -3703,6 +3707,8 @@ static __initconst const struct x86_pmu intel_pmu = {
        .cpu_prepare            = intel_pmu_cpu_prepare,
        .cpu_starting           = intel_pmu_cpu_starting,
        .cpu_dying              = intel_pmu_cpu_dying,
+       .cpu_dead               = intel_pmu_cpu_dead,
+
        .guest_get_msrs         = intel_guest_get_msrs,
        .sched_task             = intel_pmu_sched_task,
 };
index c07bee3..b10e043 100644 (file)
@@ -1222,6 +1222,8 @@ static struct pci_driver snbep_uncore_pci_driver = {
        .id_table       = snbep_uncore_pci_ids,
 };
 
+#define NODE_ID_MASK   0x7
+
 /*
  * build pci bus to socket mapping
  */
@@ -1243,7 +1245,7 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool
                err = pci_read_config_dword(ubox_dev, nodeid_loc, &config);
                if (err)
                        break;
-               nodeid = config;
+               nodeid = config & NODE_ID_MASK;
                /* get the Node ID mapping */
                err = pci_read_config_dword(ubox_dev, idmap_loc, &config);
                if (err)
index 0dd6b0f..d9a9993 100644 (file)
@@ -6,7 +6,7 @@
  * "Big Core" Processors (Branded as Core, Xeon, etc...)
  *
  * The "_X" parts are generally the EP and EX Xeons, or the
- * "Extreme" ones, like Broadwell-E.
+ * "Extreme" ones, like Broadwell-E, or Atom microserver.
  *
  * While adding a new CPUID for a new microarchitecture, add a new
  * group to keep logically sorted out in chronological order. Within
@@ -71,6 +71,7 @@
 #define INTEL_FAM6_ATOM_GOLDMONT       0x5C /* Apollo Lake */
 #define INTEL_FAM6_ATOM_GOLDMONT_X     0x5F /* Denverton */
 #define INTEL_FAM6_ATOM_GOLDMONT_PLUS  0x7A /* Gemini Lake */
+#define INTEL_FAM6_ATOM_TREMONT_X      0x86 /* Jacobsville */
 
 /* Xeon Phi */
 
index 8f65728..0ce558a 100644 (file)
@@ -7,7 +7,11 @@
 #endif
 
 #ifdef CONFIG_KASAN
+#ifdef CONFIG_KASAN_EXTRA
+#define KASAN_STACK_ORDER 2
+#else
 #define KASAN_STACK_ORDER 1
+#endif
 #else
 #define KASAN_STACK_ORDER 0
 #endif
index 40616e8..2779ace 100644 (file)
@@ -1065,7 +1065,7 @@ static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr,
 static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
                              pmd_t *pmdp, pmd_t pmd)
 {
-       native_set_pmd(pmdp, pmd);
+       set_pmd(pmdp, pmd);
 }
 
 static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
index 40ebddd..f6b7fe2 100644 (file)
@@ -2,7 +2,7 @@
 #ifndef _ASM_X86_RESCTRL_SCHED_H
 #define _ASM_X86_RESCTRL_SCHED_H
 
-#ifdef CONFIG_X86_RESCTRL
+#ifdef CONFIG_X86_CPU_RESCTRL
 
 #include <linux/sched.h>
 #include <linux/jump_label.h>
@@ -88,6 +88,6 @@ static inline void resctrl_sched_in(void)
 
 static inline void resctrl_sched_in(void) {}
 
-#endif /* CONFIG_X86_RESCTRL */
+#endif /* CONFIG_X86_CPU_RESCTRL */
 
 #endif /* _ASM_X86_RESCTRL_SCHED_H */
index b6fa086..cfd24f9 100644 (file)
@@ -39,7 +39,7 @@ obj-$(CONFIG_CPU_SUP_UMC_32)          += umc.o
 obj-$(CONFIG_X86_MCE)                  += mce/
 obj-$(CONFIG_MTRR)                     += mtrr/
 obj-$(CONFIG_MICROCODE)                        += microcode/
-obj-$(CONFIG_X86_RESCTRL)              += resctrl/
+obj-$(CONFIG_X86_CPU_RESCTRL)          += resctrl/
 
 obj-$(CONFIG_X86_LOCAL_APIC)           += perfctr-watchdog.o
 
index 1de0f41..01874d5 100644 (file)
@@ -71,7 +71,7 @@ void __init check_bugs(void)
         * identify_boot_cpu() initialized SMT support information, let the
         * core code know.
         */
-       cpu_smt_check_topology_early();
+       cpu_smt_check_topology();
 
        if (!IS_ENABLED(CONFIG_SMP)) {
                pr_info("CPU: ");
index 672c722..6ce290c 100644 (file)
@@ -784,6 +784,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
                        quirk_no_way_out(i, m, regs);
 
                if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
+                       m->bank = i;
                        mce_read_aux(m, i);
                        *msg = tmp;
                        return 1;
index 51adde0..e1f3ba1 100644 (file)
@@ -855,7 +855,7 @@ load_microcode_amd(bool save, u8 family, const u8 *data, size_t size)
        if (!p) {
                return ret;
        } else {
-               if (boot_cpu_data.microcode == p->patch_id)
+               if (boot_cpu_data.microcode >= p->patch_id)
                        return ret;
 
                ret = UCODE_NEW;
index 1cabe6f..4a06c37 100644 (file)
@@ -1,4 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_X86_RESCTRL)      += core.o rdtgroup.o monitor.o
-obj-$(CONFIG_X86_RESCTRL)      += ctrlmondata.o pseudo_lock.o
+obj-$(CONFIG_X86_CPU_RESCTRL)  += core.o rdtgroup.o monitor.o
+obj-$(CONFIG_X86_CPU_RESCTRL)  += ctrlmondata.o pseudo_lock.o
 CFLAGS_pseudo_lock.o = -I$(src)
index 0d5efa3..53917a3 100644 (file)
@@ -167,6 +167,9 @@ setup_efi_state(struct boot_params *params, unsigned long params_load_addr,
        struct efi_info *current_ei = &boot_params.efi_info;
        struct efi_info *ei = &params->efi_info;
 
+       if (!efi_enabled(EFI_RUNTIME_SERVICES))
+               return 0;
+
        if (!current_ei->efi_memmap_size)
                return 0;
 
index 8ff2052..d8ea4eb 100644 (file)
@@ -211,6 +211,7 @@ static void free_nested(struct kvm_vcpu *vcpu)
        if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon)
                return;
 
+       hrtimer_cancel(&vmx->nested.preemption_timer);
        vmx->nested.vmxon = false;
        vmx->nested.smm.vmxon = false;
        free_vpid(vmx->nested.vpid02);
index 4341175..95d6180 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/mod_devicetable.h>
 #include <linux/mm.h>
 #include <linux/sched.h>
+#include <linux/sched/smt.h>
 #include <linux/slab.h>
 #include <linux/tboot.h>
 #include <linux/trace_events.h>
@@ -6823,7 +6824,7 @@ static int vmx_vm_init(struct kvm *kvm)
                         * Warn upon starting the first VM in a potentially
                         * insecure environment.
                         */
-                       if (cpu_smt_control == CPU_SMT_ENABLED)
+                       if (sched_smt_active())
                                pr_warn_once(L1TF_MSG_SMT);
                        if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER)
                                pr_warn_once(L1TF_MSG_L1D);
index 3d27206..e67ecf2 100644 (file)
@@ -5116,6 +5116,13 @@ int kvm_read_guest_virt(struct kvm_vcpu *vcpu,
 {
        u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
 
+       /*
+        * FIXME: this should call handle_emulation_failure if X86EMUL_IO_NEEDED
+        * is returned, but our callers are not ready for that and they blindly
+        * call kvm_inject_page_fault.  Ensure that they at least do not leak
+        * uninitialized kernel stack memory into cr2 and error code.
+        */
+       memset(exception, 0, sizeof(*exception));
        return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
                                          exception);
 }
index 6689467..df50451 100644 (file)
@@ -2,8 +2,11 @@
 #include <linux/module.h>
 #include <linux/io.h>
 
+#define movs(type,to,from) \
+       asm volatile("movs" type:"=&D" (to), "=&S" (from):"0" (to), "1" (from):"memory")
+
 /* Originally from i386/string.h */
-static __always_inline void __iomem_memcpy(void *to, const void *from, size_t n)
+static __always_inline void rep_movs(void *to, const void *from, size_t n)
 {
        unsigned long d0, d1, d2;
        asm volatile("rep ; movsl\n\t"
@@ -21,13 +24,37 @@ static __always_inline void __iomem_memcpy(void *to, const void *from, size_t n)
 
 void memcpy_fromio(void *to, const volatile void __iomem *from, size_t n)
 {
-       __iomem_memcpy(to, (const void *)from, n);
+       if (unlikely(!n))
+               return;
+
+       /* Align any unaligned source IO */
+       if (unlikely(1 & (unsigned long)from)) {
+               movs("b", to, from);
+               n--;
+       }
+       if (n > 1 && unlikely(2 & (unsigned long)from)) {
+               movs("w", to, from);
+               n-=2;
+       }
+       rep_movs(to, (const void *)from, n);
 }
 EXPORT_SYMBOL(memcpy_fromio);
 
 void memcpy_toio(volatile void __iomem *to, const void *from, size_t n)
 {
-       __iomem_memcpy((void *)to, (const void *) from, n);
+       if (unlikely(!n))
+               return;
+
+       /* Align any unaligned destination IO */
+       if (unlikely(1 & (unsigned long)to)) {
+               movs("b", to, from);
+               n--;
+       }
+       if (n > 1 && unlikely(2 & (unsigned long)to)) {
+               movs("w", to, from);
+               n-=2;
+       }
+       rep_movs((void *)to, (const void *) from, n);
 }
 EXPORT_SYMBOL(memcpy_toio);
 
index 2ff25ad..9d5c75f 100644 (file)
@@ -595,7 +595,7 @@ static void show_ldttss(const struct desc_ptr *gdt, const char *name, u16 index)
                return;
        }
 
-       addr = desc.base0 | (desc.base1 << 16) | (desc.base2 << 24);
+       addr = desc.base0 | (desc.base1 << 16) | ((unsigned long)desc.base2 << 24);
 #ifdef CONFIG_X86_64
        addr |= ((u64)desc.base3 << 32);
 #endif
index 4f89723..14e6119 100644 (file)
@@ -230,6 +230,29 @@ static bool __cpa_pfn_in_highmap(unsigned long pfn)
 
 #endif
 
+/*
+ * See set_mce_nospec().
+ *
+ * Machine check recovery code needs to change cache mode of poisoned pages to
+ * UC to avoid speculative access logging another error. But passing the
+ * address of the 1:1 mapping to set_memory_uc() is a fine way to encourage a
+ * speculative access. So we cheat and flip the top bit of the address. This
+ * works fine for the code that updates the page tables. But at the end of the
+ * process we need to flush the TLB and cache and the non-canonical address
+ * causes a #GP fault when used by the INVLPG and CLFLUSH instructions.
+ *
+ * But in the common case we already have a canonical address. This code
+ * will fix the top bit if needed and is a no-op otherwise.
+ */
+static inline unsigned long fix_addr(unsigned long addr)
+{
+#ifdef CONFIG_X86_64
+       return (long)(addr << 1) >> 1;
+#else
+       return addr;
+#endif
+}
+
 static unsigned long __cpa_addr(struct cpa_data *cpa, unsigned long idx)
 {
        if (cpa->flags & CPA_PAGES_ARRAY) {
@@ -313,7 +336,7 @@ void __cpa_flush_tlb(void *data)
        unsigned int i;
 
        for (i = 0; i < cpa->numpages; i++)
-               __flush_tlb_one_kernel(__cpa_addr(cpa, i));
+               __flush_tlb_one_kernel(fix_addr(__cpa_addr(cpa, i)));
 }
 
 static void cpa_flush(struct cpa_data *data, int cache)
@@ -347,7 +370,7 @@ static void cpa_flush(struct cpa_data *data, int cache)
                 * Only flush present addresses:
                 */
                if (pte && (pte_val(*pte) & _PAGE_PRESENT))
-                       clflush_cache_range_opt((void *)addr, PAGE_SIZE);
+                       clflush_cache_range_opt((void *)fix_addr(addr), PAGE_SIZE);
        }
        mb();
 }
@@ -1627,29 +1650,6 @@ out:
        return ret;
 }
 
-/*
- * Machine check recovery code needs to change cache mode of poisoned
- * pages to UC to avoid speculative access logging another error. But
- * passing the address of the 1:1 mapping to set_memory_uc() is a fine
- * way to encourage a speculative access. So we cheat and flip the top
- * bit of the address. This works fine for the code that updates the
- * page tables. But at the end of the process we need to flush the cache
- * and the non-canonical address causes a #GP fault when used by the
- * CLFLUSH instruction.
- *
- * But in the common case we already have a canonical address. This code
- * will fix the top bit if needed and is a no-op otherwise.
- */
-static inline unsigned long make_addr_canonical_again(unsigned long addr)
-{
-#ifdef CONFIG_X86_64
-       return (long)(addr << 1) >> 1;
-#else
-       return addr;
-#endif
-}
-
-
 static int change_page_attr_set_clr(unsigned long *addr, int numpages,
                                    pgprot_t mask_set, pgprot_t mask_clr,
                                    int force_split, int in_flag,
index 20a0756..ce91682 100644 (file)
@@ -164,7 +164,7 @@ config XTENSA_FAKE_NMI
          If unsure, say N.
 
 config XTENSA_UNALIGNED_USER
-       bool "Unaligned memory access in use space"
+       bool "Unaligned memory access in user space"
        help
          The Xtensa architecture currently does not handle unaligned
          memory accesses in hardware but through an exception handler.
@@ -451,7 +451,7 @@ config USE_OF
        help
          Include support for flattened device tree machine descriptions.
 
-config BUILTIN_DTB
+config BUILTIN_DTB_SOURCE
        string "DTB to build into the kernel image"
        depends on OF
 
index f8052ba..0b8d00c 100644 (file)
@@ -7,9 +7,9 @@
 #
 #
 
-BUILTIN_DTB := $(patsubst "%",%,$(CONFIG_BUILTIN_DTB)).dtb.o
-ifneq ($(CONFIG_BUILTIN_DTB),"")
-obj-$(CONFIG_OF) += $(BUILTIN_DTB)
+BUILTIN_DTB_SOURCE := $(patsubst "%",%,$(CONFIG_BUILTIN_DTB_SOURCE)).dtb.o
+ifneq ($(CONFIG_BUILTIN_DTB_SOURCE),"")
+obj-$(CONFIG_OF) += $(BUILTIN_DTB_SOURCE)
 endif
 
 # for CONFIG_OF_ALL_DTBS test
index 2bf964d..f378e56 100644 (file)
@@ -34,7 +34,7 @@ CONFIG_XTENSA_PLATFORM_XTFPGA=y
 CONFIG_CMDLINE_BOOL=y
 CONFIG_CMDLINE="earlycon=uart8250,mmio32native,0xfd050020,115200n8 console=ttyS0,115200n8 ip=dhcp root=/dev/nfs rw debug memmap=0x38000000@0"
 CONFIG_USE_OF=y
-CONFIG_BUILTIN_DTB="kc705"
+CONFIG_BUILTIN_DTB_SOURCE="kc705"
 # CONFIG_COMPACTION is not set
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
 CONFIG_PM=y
index 3221b70..62f32a9 100644 (file)
@@ -38,7 +38,7 @@ CONFIG_HIGHMEM=y
 # CONFIG_PCI is not set
 CONFIG_XTENSA_PLATFORM_XTFPGA=y
 CONFIG_USE_OF=y
-CONFIG_BUILTIN_DTB="csp"
+CONFIG_BUILTIN_DTB_SOURCE="csp"
 # CONFIG_COMPACTION is not set
 CONFIG_XTFPGA_LCD=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
index 985fa85..8bebe07 100644 (file)
@@ -33,7 +33,7 @@ CONFIG_XTENSA_PLATFORM_XTFPGA=y
 CONFIG_CMDLINE_BOOL=y
 CONFIG_CMDLINE="earlycon=uart8250,mmio32native,0xfd050020,115200n8 console=ttyS0,115200n8 ip=dhcp root=/dev/nfs rw debug memmap=0x38000000@0"
 CONFIG_USE_OF=y
-CONFIG_BUILTIN_DTB="kc705"
+CONFIG_BUILTIN_DTB_SOURCE="kc705"
 # CONFIG_COMPACTION is not set
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
 CONFIG_NET=y
index f3fc4f9..933ab2a 100644 (file)
@@ -39,7 +39,7 @@ CONFIG_XTENSA_PLATFORM_XTFPGA=y
 CONFIG_CMDLINE_BOOL=y
 CONFIG_CMDLINE="earlycon=uart8250,mmio32native,0x9d050020,115200n8 console=ttyS0,115200n8 ip=dhcp root=/dev/nfs rw debug memmap=256M@0x60000000"
 CONFIG_USE_OF=y
-CONFIG_BUILTIN_DTB="kc705_nommu"
+CONFIG_BUILTIN_DTB_SOURCE="kc705_nommu"
 CONFIG_BINFMT_FLAT=y
 CONFIG_NET=y
 CONFIG_PACKET=y
index 11fed6c..e29c5b1 100644 (file)
@@ -33,11 +33,12 @@ CONFIG_SMP=y
 CONFIG_HOTPLUG_CPU=y
 # CONFIG_INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX is not set
 # CONFIG_PCI is not set
+CONFIG_VECTORS_OFFSET=0x00002000
 CONFIG_XTENSA_PLATFORM_XTFPGA=y
 CONFIG_CMDLINE_BOOL=y
 CONFIG_CMDLINE="earlycon=uart8250,mmio32native,0xfd050020,115200n8 console=ttyS0,115200n8 ip=dhcp root=/dev/nfs rw debug memmap=96M@0"
 CONFIG_USE_OF=y
-CONFIG_BUILTIN_DTB="lx200mx"
+CONFIG_BUILTIN_DTB_SOURCE="lx200mx"
 # CONFIG_COMPACTION is not set
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
 CONFIG_NET=y
index da08e75..7f00971 100644 (file)
@@ -276,12 +276,13 @@ should_never_return:
 
        movi    a2, cpu_start_ccount
 1:
+       memw
        l32i    a3, a2, 0
        beqi    a3, 0, 1b
        movi    a3, 0
        s32i    a3, a2, 0
-       memw
 1:
+       memw
        l32i    a3, a2, 0
        beqi    a3, 0, 1b
        wsr     a3, ccount
@@ -317,11 +318,13 @@ ENTRY(cpu_restart)
        rsr     a0, prid
        neg     a2, a0
        movi    a3, cpu_start_id
+       memw
        s32i    a2, a3, 0
 #if XCHAL_DCACHE_IS_WRITEBACK
        dhwbi   a3, 0
 #endif
 1:
+       memw
        l32i    a2, a3, 0
        dhi     a3, 0
        bne     a2, a0, 1b
index 932d646..be1f280 100644 (file)
@@ -83,7 +83,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 {
        unsigned i;
 
-       for (i = 0; i < max_cpus; ++i)
+       for_each_possible_cpu(i)
                set_cpu_present(i, true);
 }
 
@@ -96,6 +96,11 @@ void __init smp_init_cpus(void)
        pr_info("%s: Core Count = %d\n", __func__, ncpus);
        pr_info("%s: Core Id = %d\n", __func__, core_id);
 
+       if (ncpus > NR_CPUS) {
+               ncpus = NR_CPUS;
+               pr_info("%s: limiting core count by %d\n", __func__, ncpus);
+       }
+
        for (i = 0; i < ncpus; ++i)
                set_cpu_possible(i, true);
 }
@@ -195,9 +200,11 @@ static int boot_secondary(unsigned int cpu, struct task_struct *ts)
        int i;
 
 #ifdef CONFIG_HOTPLUG_CPU
-       cpu_start_id = cpu;
-       system_flush_invalidate_dcache_range(
-                       (unsigned long)&cpu_start_id, sizeof(cpu_start_id));
+       WRITE_ONCE(cpu_start_id, cpu);
+       /* Pairs with the third memw in the cpu_restart */
+       mb();
+       system_flush_invalidate_dcache_range((unsigned long)&cpu_start_id,
+                                            sizeof(cpu_start_id));
 #endif
        smp_call_function_single(0, mx_cpu_start, (void *)cpu, 1);
 
@@ -206,18 +213,21 @@ static int boot_secondary(unsigned int cpu, struct task_struct *ts)
                        ccount = get_ccount();
                while (!ccount);
 
-               cpu_start_ccount = ccount;
+               WRITE_ONCE(cpu_start_ccount, ccount);
 
-               while (time_before(jiffies, timeout)) {
+               do {
+                       /*
+                        * Pairs with the first two memws in the
+                        * .Lboot_secondary.
+                        */
                        mb();
-                       if (!cpu_start_ccount)
-                               break;
-               }
+                       ccount = READ_ONCE(cpu_start_ccount);
+               } while (ccount && time_before(jiffies, timeout));
 
-               if (cpu_start_ccount) {
+               if (ccount) {
                        smp_call_function_single(0, mx_cpu_stop,
-                                       (void *)cpu, 1);
-                       cpu_start_ccount = 0;
+                                                (void *)cpu, 1);
+                       WRITE_ONCE(cpu_start_ccount, 0);
                        return -EIO;
                }
        }
@@ -237,6 +247,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
        pr_debug("%s: Calling wakeup_secondary(cpu:%d, idle:%p, sp: %08lx)\n",
                        __func__, cpu, idle, start_info.stack);
 
+       init_completion(&cpu_running);
        ret = boot_secondary(cpu, idle);
        if (ret == 0) {
                wait_for_completion_timeout(&cpu_running,
@@ -298,8 +309,10 @@ void __cpu_die(unsigned int cpu)
        unsigned long timeout = jiffies + msecs_to_jiffies(1000);
        while (time_before(jiffies, timeout)) {
                system_invalidate_dcache_range((unsigned long)&cpu_start_id,
-                               sizeof(cpu_start_id));
-               if (cpu_start_id == -cpu) {
+                                              sizeof(cpu_start_id));
+               /* Pairs with the second memw in the cpu_restart */
+               mb();
+               if (READ_ONCE(cpu_start_id) == -cpu) {
                        platform_cpu_kill(cpu);
                        return;
                }
index fd524a5..378186b 100644 (file)
@@ -89,7 +89,7 @@ static int ccount_timer_shutdown(struct clock_event_device *evt)
                container_of(evt, struct ccount_timer, evt);
 
        if (timer->irq_enabled) {
-               disable_irq(evt->irq);
+               disable_irq_nosync(evt->irq);
                timer->irq_enabled = 0;
        }
        return 0;
index 3c5f61c..6b78ec5 100644 (file)
@@ -462,6 +462,10 @@ static void blk_rq_timed_out_timer(struct timer_list *t)
        kblockd_schedule_work(&q->timeout_work);
 }
 
+static void blk_timeout_work(struct work_struct *work)
+{
+}
+
 /**
  * blk_alloc_queue_node - allocate a request queue
  * @gfp_mask: memory allocation flags
@@ -505,7 +509,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
        timer_setup(&q->backing_dev_info->laptop_mode_wb_timer,
                    laptop_mode_timer_fn, 0);
        timer_setup(&q->timeout, blk_rq_timed_out_timer, 0);
-       INIT_WORK(&q->timeout_work, NULL);
+       INIT_WORK(&q->timeout_work, blk_timeout_work);
        INIT_LIST_HEAD(&q->icq_list);
 #ifdef CONFIG_BLK_CGROUP
        INIT_LIST_HEAD(&q->blkg_list);
index a3fc719..6e0f2d9 100644 (file)
@@ -335,7 +335,7 @@ static void mq_flush_data_end_io(struct request *rq, blk_status_t error)
        blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error);
        spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
 
-       blk_mq_run_hw_queue(hctx, true);
+       blk_mq_sched_restart(hctx);
 }
 
 /**
index fc714ef..2620baa 100644 (file)
@@ -72,6 +72,7 @@
 #include <linux/sched/loadavg.h>
 #include <linux/sched/signal.h>
 #include <trace/events/block.h>
+#include <linux/blk-mq.h>
 #include "blk-rq-qos.h"
 #include "blk-stat.h"
 
@@ -591,6 +592,7 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio)
        u64 now = ktime_to_ns(ktime_get());
        bool issue_as_root = bio_issue_as_root_blkg(bio);
        bool enabled = false;
+       int inflight = 0;
 
        blkg = bio->bi_blkg;
        if (!blkg || !bio_flagged(bio, BIO_TRACKED))
@@ -601,6 +603,9 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio)
                return;
 
        enabled = blk_iolatency_enabled(iolat->blkiolat);
+       if (!enabled)
+               return;
+
        while (blkg && blkg->parent) {
                iolat = blkg_to_lat(blkg);
                if (!iolat) {
@@ -609,8 +614,9 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio)
                }
                rqw = &iolat->rq_wait;
 
-               atomic_dec(&rqw->inflight);
-               if (!enabled || iolat->min_lat_nsec == 0)
+               inflight = atomic_dec_return(&rqw->inflight);
+               WARN_ON_ONCE(inflight < 0);
+               if (iolat->min_lat_nsec == 0)
                        goto next;
                iolatency_record_time(iolat, &bio->bi_issue, now,
                                      issue_as_root);
@@ -754,10 +760,13 @@ int blk_iolatency_init(struct request_queue *q)
        return 0;
 }
 
-static void iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
+/*
+ * return 1 for enabling iolatency, return -1 for disabling iolatency, otherwise
+ * return 0.
+ */
+static int iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
 {
        struct iolatency_grp *iolat = blkg_to_lat(blkg);
-       struct blk_iolatency *blkiolat = iolat->blkiolat;
        u64 oldval = iolat->min_lat_nsec;
 
        iolat->min_lat_nsec = val;
@@ -766,9 +775,10 @@ static void iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
                                    BLKIOLATENCY_MAX_WIN_SIZE);
 
        if (!oldval && val)
-               atomic_inc(&blkiolat->enabled);
+               return 1;
        if (oldval && !val)
-               atomic_dec(&blkiolat->enabled);
+               return -1;
+       return 0;
 }
 
 static void iolatency_clear_scaling(struct blkcg_gq *blkg)
@@ -800,6 +810,7 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
        u64 lat_val = 0;
        u64 oldval;
        int ret;
+       int enable = 0;
 
        ret = blkg_conf_prep(blkcg, &blkcg_policy_iolatency, buf, &ctx);
        if (ret)
@@ -834,7 +845,12 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
        blkg = ctx.blkg;
        oldval = iolat->min_lat_nsec;
 
-       iolatency_set_min_lat_nsec(blkg, lat_val);
+       enable = iolatency_set_min_lat_nsec(blkg, lat_val);
+       if (enable) {
+               WARN_ON_ONCE(!blk_get_queue(blkg->q));
+               blkg_get(blkg);
+       }
+
        if (oldval != iolat->min_lat_nsec) {
                iolatency_clear_scaling(blkg);
        }
@@ -842,6 +858,24 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
        ret = 0;
 out:
        blkg_conf_finish(&ctx);
+       if (ret == 0 && enable) {
+               struct iolatency_grp *tmp = blkg_to_lat(blkg);
+               struct blk_iolatency *blkiolat = tmp->blkiolat;
+
+               blk_mq_freeze_queue(blkg->q);
+
+               if (enable == 1)
+                       atomic_inc(&blkiolat->enabled);
+               else if (enable == -1)
+                       atomic_dec(&blkiolat->enabled);
+               else
+                       WARN_ON_ONCE(1);
+
+               blk_mq_unfreeze_queue(blkg->q);
+
+               blkg_put(blkg);
+               blk_put_queue(blkg->q);
+       }
        return ret ?: nbytes;
 }
 
@@ -977,8 +1011,14 @@ static void iolatency_pd_offline(struct blkg_policy_data *pd)
 {
        struct iolatency_grp *iolat = pd_to_lat(pd);
        struct blkcg_gq *blkg = lat_to_blkg(iolat);
+       struct blk_iolatency *blkiolat = iolat->blkiolat;
+       int ret;
 
-       iolatency_set_min_lat_nsec(blkg, 0);
+       ret = iolatency_set_min_lat_nsec(blkg, 0);
+       if (ret == 1)
+               atomic_inc(&blkiolat->enabled);
+       if (ret == -1)
+               atomic_dec(&blkiolat->enabled);
        iolatency_clear_scaling(blkg);
 }
 
index f812083..7921573 100644 (file)
@@ -839,6 +839,9 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_ctx_attrs[] = {
 static bool debugfs_create_files(struct dentry *parent, void *data,
                                 const struct blk_mq_debugfs_attr *attr)
 {
+       if (IS_ERR_OR_NULL(parent))
+               return false;
+
        d_inode(parent)->i_private = data;
 
        for (; attr->name; attr++) {
index 8f5b533..9437a5e 100644 (file)
@@ -737,12 +737,20 @@ static void blk_mq_requeue_work(struct work_struct *work)
        spin_unlock_irq(&q->requeue_lock);
 
        list_for_each_entry_safe(rq, next, &rq_list, queuelist) {
-               if (!(rq->rq_flags & RQF_SOFTBARRIER))
+               if (!(rq->rq_flags & (RQF_SOFTBARRIER | RQF_DONTPREP)))
                        continue;
 
                rq->rq_flags &= ~RQF_SOFTBARRIER;
                list_del_init(&rq->queuelist);
-               blk_mq_sched_insert_request(rq, true, false, false);
+               /*
+                * If RQF_DONTPREP, rq has contained some driver specific
+                * data, so insert it to hctx dispatch list to avoid any
+                * merge.
+                */
+               if (rq->rq_flags & RQF_DONTPREP)
+                       blk_mq_request_bypass_insert(rq, false);
+               else
+                       blk_mq_sched_insert_request(rq, true, false, false);
        }
 
        while (!list_empty(&rq_list)) {
index d943d46..d0b3dd5 100644 (file)
@@ -36,7 +36,6 @@ struct blk_mq_ctx {
        struct kobject          kobj;
 } ____cacheline_aligned_in_smp;
 
-void blk_mq_freeze_queue(struct request_queue *q);
 void blk_mq_free_queue(struct request_queue *q);
 int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
 void blk_mq_wake_waiters(struct request_queue *q);
index 5c093ce..147f6c7 100644 (file)
@@ -1029,6 +1029,9 @@ void __init acpi_early_init(void)
 
        acpi_permanent_mmap = true;
 
+       /* Initialize debug output. Linux does not use ACPICA defaults */
+       acpi_dbg_level = ACPI_LV_INFO | ACPI_LV_REPAIR;
+
 #ifdef CONFIG_X86
        /*
         * If the machine falls into the DMI check table,
index cdfc876..4d2b2ad 100644 (file)
@@ -5854,9 +5854,10 @@ static int __init init_binder_device(const char *name)
 static int __init binder_init(void)
 {
        int ret;
-       char *device_name, *device_names, *device_tmp;
+       char *device_name, *device_tmp;
        struct binder_device *device;
        struct hlist_node *tmp;
+       char *device_names = NULL;
 
        ret = binder_alloc_shrinker_init();
        if (ret)
@@ -5898,23 +5899,29 @@ static int __init binder_init(void)
                                    &transaction_log_fops);
        }
 
-       /*
-        * Copy the module_parameter string, because we don't want to
-        * tokenize it in-place.
-        */
-       device_names = kstrdup(binder_devices_param, GFP_KERNEL);
-       if (!device_names) {
-               ret = -ENOMEM;
-               goto err_alloc_device_names_failed;
-       }
+       if (strcmp(binder_devices_param, "") != 0) {
+               /*
+               * Copy the module_parameter string, because we don't want to
+               * tokenize it in-place.
+                */
+               device_names = kstrdup(binder_devices_param, GFP_KERNEL);
+               if (!device_names) {
+                       ret = -ENOMEM;
+                       goto err_alloc_device_names_failed;
+               }
 
-       device_tmp = device_names;
-       while ((device_name = strsep(&device_tmp, ","))) {
-               ret = init_binder_device(device_name);
-               if (ret)
-                       goto err_init_binder_device_failed;
+               device_tmp = device_names;
+               while ((device_name = strsep(&device_tmp, ","))) {
+                       ret = init_binder_device(device_name);
+                       if (ret)
+                               goto err_init_binder_device_failed;
+               }
        }
 
+       ret = init_binderfs();
+       if (ret)
+               goto err_init_binder_device_failed;
+
        return ret;
 
 err_init_binder_device_failed:
index 7fb97f5..045b3e4 100644 (file)
@@ -46,4 +46,13 @@ static inline bool is_binderfs_device(const struct inode *inode)
 }
 #endif
 
+#ifdef CONFIG_ANDROID_BINDERFS
+extern int __init init_binderfs(void);
+#else
+static inline int __init init_binderfs(void)
+{
+       return 0;
+}
+#endif
+
 #endif /* _LINUX_BINDER_INTERNAL_H */
index 6a2185e..e773f45 100644 (file)
@@ -395,6 +395,11 @@ static int binderfs_binder_ctl_create(struct super_block *sb)
        struct inode *inode = NULL;
        struct dentry *root = sb->s_root;
        struct binderfs_info *info = sb->s_fs_info;
+#if defined(CONFIG_IPC_NS)
+       bool use_reserve = (info->ipc_ns == &init_ipc_ns);
+#else
+       bool use_reserve = true;
+#endif
 
        device = kzalloc(sizeof(*device), GFP_KERNEL);
        if (!device)
@@ -413,7 +418,10 @@ static int binderfs_binder_ctl_create(struct super_block *sb)
 
        /* Reserve a new minor number for the new device. */
        mutex_lock(&binderfs_minors_mutex);
-       minor = ida_alloc_max(&binderfs_minors, BINDERFS_MAX_MINOR, GFP_KERNEL);
+       minor = ida_alloc_max(&binderfs_minors,
+                             use_reserve ? BINDERFS_MAX_MINOR :
+                                           BINDERFS_MAX_MINOR_CAPPED,
+                             GFP_KERNEL);
        mutex_unlock(&binderfs_minors_mutex);
        if (minor < 0) {
                ret = minor;
@@ -542,7 +550,7 @@ static struct file_system_type binder_fs_type = {
        .fs_flags       = FS_USERNS_MOUNT,
 };
 
-static int __init init_binderfs(void)
+int __init init_binderfs(void)
 {
        int ret;
 
@@ -560,5 +568,3 @@ static int __init init_binderfs(void)
 
        return ret;
 }
-
-device_initcall(init_binderfs);
index b8c3f9e..adf2878 100644 (file)
@@ -4554,6 +4554,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
        { "SAMSUNG MZMPC128HBFU-000MV", "CXM14M1Q", ATA_HORKAGE_NOLPM, },
        { "SAMSUNG SSD PM830 mSATA *",  "CXM13D1Q", ATA_HORKAGE_NOLPM, },
        { "SAMSUNG MZ7TD256HAFV-000L9", NULL,       ATA_HORKAGE_NOLPM, },
+       { "SAMSUNG MZ7TE512HMHP-000L1", "EXT06L0Q", ATA_HORKAGE_NOLPM, },
 
        /* devices that don't properly handle queued TRIM commands */
        { "Micron_M500IT_*",            "MU01", ATA_HORKAGE_NO_NCQ_TRIM |
index cf78fa6..a735953 100644 (file)
@@ -79,8 +79,7 @@ static void cache_size(struct cacheinfo *this_leaf, struct device_node *np)
        ct_idx = get_cacheinfo_idx(this_leaf->type);
        propname = cache_type_info[ct_idx].size_prop;
 
-       if (of_property_read_u32(np, propname, &this_leaf->size))
-               this_leaf->size = 0;
+       of_property_read_u32(np, propname, &this_leaf->size);
 }
 
 /* not cache_line_size() because that's a macro in include/linux/cache.h */
@@ -114,8 +113,7 @@ static void cache_nr_sets(struct cacheinfo *this_leaf, struct device_node *np)
        ct_idx = get_cacheinfo_idx(this_leaf->type);
        propname = cache_type_info[ct_idx].nr_sets_prop;
 
-       if (of_property_read_u32(np, propname, &this_leaf->number_of_sets))
-               this_leaf->number_of_sets = 0;
+       of_property_read_u32(np, propname, &this_leaf->number_of_sets);
 }
 
 static void cache_associativity(struct cacheinfo *this_leaf)
index 457be03..0ea2139 100644 (file)
@@ -130,7 +130,7 @@ u64 pm_runtime_autosuspend_expiration(struct device *dev)
 {
        int autosuspend_delay;
        u64 last_busy, expires = 0;
-       u64 now = ktime_to_ns(ktime_get());
+       u64 now = ktime_get_mono_fast_ns();
 
        if (!dev->power.use_autosuspend)
                goto out;
@@ -909,7 +909,7 @@ static enum hrtimer_restart  pm_suspend_timer_fn(struct hrtimer *timer)
         * If 'expires' is after the current time, we've been called
         * too early.
         */
-       if (expires > 0 && expires < ktime_to_ns(ktime_get())) {
+       if (expires > 0 && expires < ktime_get_mono_fast_ns()) {
                dev->power.timer_expires = 0;
                rpm_suspend(dev, dev->power.timer_autosuspends ?
                    (RPM_ASYNC | RPM_AUTO) : RPM_ASYNC);
@@ -928,7 +928,7 @@ static enum hrtimer_restart  pm_suspend_timer_fn(struct hrtimer *timer)
 int pm_schedule_suspend(struct device *dev, unsigned int delay)
 {
        unsigned long flags;
-       ktime_t expires;
+       u64 expires;
        int retval;
 
        spin_lock_irqsave(&dev->power.lock, flags);
@@ -945,8 +945,8 @@ int pm_schedule_suspend(struct device *dev, unsigned int delay)
        /* Other scheduled or pending requests need to be canceled. */
        pm_runtime_cancel_pending(dev);
 
-       expires = ktime_add(ktime_get(), ms_to_ktime(delay));
-       dev->power.timer_expires = ktime_to_ns(expires);
+       expires = ktime_get_mono_fast_ns() + (u64)delay * NSEC_PER_MSEC;
+       dev->power.timer_expires = expires;
        dev->power.timer_autosuspends = 0;
        hrtimer_start(&dev->power.suspend_timer, expires, HRTIMER_MODE_ABS);
 
index 6f2856c..55481b4 100644 (file)
@@ -4075,7 +4075,7 @@ static unsigned int floppy_check_events(struct gendisk *disk,
 
        if (time_after(jiffies, UDRS->last_checked + UDP->checkfreq)) {
                if (lock_fdc(drive))
-                       return -EINTR;
+                       return 0;
                poll_drive(false, 0);
                process_fd_request();
        }
index 6ccdbed..d2477a5 100644 (file)
@@ -1513,9 +1513,19 @@ static int clk_fetch_parent_index(struct clk_core *core,
        if (!parent)
                return -EINVAL;
 
-       for (i = 0; i < core->num_parents; i++)
-               if (clk_core_get_parent_by_index(core, i) == parent)
+       for (i = 0; i < core->num_parents; i++) {
+               if (core->parents[i] == parent)
+                       return i;
+
+               if (core->parents[i])
+                       continue;
+
+               /* Fallback to comparing globally unique names */
+               if (!strcmp(parent->name, core->parent_names[i])) {
+                       core->parents[i] = parent;
                        return i;
+               }
+       }
 
        return -EINVAL;
 }
index 0026c39..76b9eb1 100644 (file)
@@ -155,13 +155,14 @@ static int clk_pll_set_rate(struct clk_hw *hw, unsigned long rate,
 {
        struct clk_frac_pll *pll = to_clk_frac_pll(hw);
        u32 val, divfi, divff;
-       u64 temp64 = parent_rate;
+       u64 temp64;
        int ret;
 
        parent_rate *= 8;
        rate *= 2;
        divfi = rate / parent_rate;
-       temp64 *= rate - divfi;
+       temp64 = parent_rate * divfi;
+       temp64 = rate - temp64;
        temp64 *= PLL_FRAC_DENOM;
        do_div(temp64, parent_rate);
        divff = temp64;
index 61fefc0..d083b86 100644 (file)
@@ -53,7 +53,6 @@
 #define APMU_DISP1     0x110
 #define APMU_CCIC0     0x50
 #define APMU_CCIC1     0xf4
-#define APMU_SP                0x68
 #define MPMU_UART_PLL  0x14
 
 struct mmp2_clk_unit {
@@ -210,8 +209,6 @@ static struct mmp_clk_mix_config ccic1_mix_config = {
        .reg_info = DEFINE_MIX_REG_INFO(4, 16, 2, 6, 32),
 };
 
-static DEFINE_SPINLOCK(sp_lock);
-
 static struct mmp_param_mux_clk apmu_mux_clks[] = {
        {MMP2_CLK_DISP0_MUX, "disp0_mux", disp_parent_names, ARRAY_SIZE(disp_parent_names), CLK_SET_RATE_PARENT, APMU_DISP0, 6, 2, 0, &disp0_lock},
        {MMP2_CLK_DISP1_MUX, "disp1_mux", disp_parent_names, ARRAY_SIZE(disp_parent_names), CLK_SET_RATE_PARENT, APMU_DISP1, 6, 2, 0, &disp1_lock},
@@ -242,7 +239,6 @@ static struct mmp_param_gate_clk apmu_gate_clks[] = {
        {MMP2_CLK_CCIC1, "ccic1_clk", "ccic1_mix_clk", CLK_SET_RATE_PARENT, APMU_CCIC1, 0x1b, 0x1b, 0x0, 0, &ccic1_lock},
        {MMP2_CLK_CCIC1_PHY, "ccic1_phy_clk", "ccic1_mix_clk", CLK_SET_RATE_PARENT, APMU_CCIC1, 0x24, 0x24, 0x0, 0, &ccic1_lock},
        {MMP2_CLK_CCIC1_SPHY, "ccic1_sphy_clk", "ccic1_sphy_div", CLK_SET_RATE_PARENT, APMU_CCIC1, 0x300, 0x300, 0x0, 0, &ccic1_lock},
-       {MMP2_CLK_SP, "sp_clk", NULL, CLK_SET_RATE_PARENT, APMU_SP, 0x1b, 0x1b, 0x0, 0, &sp_lock},
 };
 
 static void mmp2_axi_periph_clk_init(struct mmp2_clk_unit *pxa_unit)
index c782e62..58fa5c2 100644 (file)
@@ -115,8 +115,8 @@ static const char * const gcc_parent_names_6[] = {
        "core_bi_pll_test_se",
 };
 
-static const char * const gcc_parent_names_7[] = {
-       "bi_tcxo",
+static const char * const gcc_parent_names_7_ao[] = {
+       "bi_tcxo_ao",
        "gpll0",
        "gpll0_out_even",
        "core_bi_pll_test_se",
@@ -128,6 +128,12 @@ static const char * const gcc_parent_names_8[] = {
        "core_bi_pll_test_se",
 };
 
+static const char * const gcc_parent_names_8_ao[] = {
+       "bi_tcxo_ao",
+       "gpll0",
+       "core_bi_pll_test_se",
+};
+
 static const struct parent_map gcc_parent_map_10[] = {
        { P_BI_TCXO, 0 },
        { P_GPLL0_OUT_MAIN, 1 },
@@ -210,7 +216,7 @@ static struct clk_rcg2 gcc_cpuss_ahb_clk_src = {
        .freq_tbl = ftbl_gcc_cpuss_ahb_clk_src,
        .clkr.hw.init = &(struct clk_init_data){
                .name = "gcc_cpuss_ahb_clk_src",
-               .parent_names = gcc_parent_names_7,
+               .parent_names = gcc_parent_names_7_ao,
                .num_parents = 4,
                .ops = &clk_rcg2_ops,
        },
@@ -229,7 +235,7 @@ static struct clk_rcg2 gcc_cpuss_rbcpr_clk_src = {
        .freq_tbl = ftbl_gcc_cpuss_rbcpr_clk_src,
        .clkr.hw.init = &(struct clk_init_data){
                .name = "gcc_cpuss_rbcpr_clk_src",
-               .parent_names = gcc_parent_names_8,
+               .parent_names = gcc_parent_names_8_ao,
                .num_parents = 3,
                .ops = &clk_rcg2_ops,
        },
index 8d77090..0241450 100644 (file)
@@ -403,8 +403,10 @@ int ti_clk_parse_divider_data(int *div_table, int num_dividers, int max_div,
        num_dividers = i;
 
        tmp = kcalloc(valid_div + 1, sizeof(*tmp), GFP_KERNEL);
-       if (!tmp)
+       if (!tmp) {
+               *table = ERR_PTR(-ENOMEM);
                return -ENOMEM;
+       }
 
        valid_div = 0;
        *width = 0;
@@ -439,6 +441,7 @@ struct clk_hw *ti_clk_build_component_div(struct ti_clk_divider *setup)
 {
        struct clk_omap_divider *div;
        struct clk_omap_reg *reg;
+       int ret;
 
        if (!setup)
                return NULL;
@@ -458,6 +461,12 @@ struct clk_hw *ti_clk_build_component_div(struct ti_clk_divider *setup)
                div->flags |= CLK_DIVIDER_POWER_OF_TWO;
 
        div->table = _get_div_table_from_setup(setup, &div->width);
+       if (IS_ERR(div->table)) {
+               ret = PTR_ERR(div->table);
+               kfree(div);
+               return ERR_PTR(ret);
+       }
+
 
        div->shift = setup->bit_shift;
        div->latch = -EINVAL;
index b17d153..23a1b27 100644 (file)
@@ -21,7 +21,7 @@ static int __cpuidle poll_idle(struct cpuidle_device *dev,
        local_irq_enable();
        if (!current_set_polling_and_test()) {
                unsigned int loop_count = 0;
-               u64 limit = TICK_USEC;
+               u64 limit = TICK_NSEC;
                int i;
 
                for (i = 1; i < drv->state_count; i++) {
index fe070d7..4c97478 100644 (file)
@@ -537,6 +537,8 @@ static void process_response_list(struct nitrox_cmdq *cmdq)
        struct nitrox_device *ndev = cmdq->ndev;
        struct nitrox_softreq *sr;
        int req_completed = 0, err = 0, budget;
+       completion_t callback;
+       void *cb_arg;
 
        /* check all pending requests */
        budget = atomic_read(&cmdq->pending_count);
@@ -564,13 +566,13 @@ static void process_response_list(struct nitrox_cmdq *cmdq)
                smp_mb__after_atomic();
                /* remove from response list */
                response_list_del(sr, cmdq);
-
                /* ORH error code */
                err = READ_ONCE(*sr->resp.orh) & 0xff;
-
-               if (sr->callback)
-                       sr->callback(sr->cb_arg, err);
+               callback = sr->callback;
+               cb_arg = sr->cb_arg;
                softreq_destroy(sr);
+               if (callback)
+                       callback(cb_arg, err);
 
                req_completed++;
        }
index 8ada308..b0125ad 100644 (file)
@@ -380,7 +380,7 @@ static int init_cc_resources(struct platform_device *plat_dev)
        rc = cc_ivgen_init(new_drvdata);
        if (rc) {
                dev_err(dev, "cc_ivgen_init failed\n");
-               goto post_power_mgr_err;
+               goto post_buf_mgr_err;
        }
 
        /* Allocate crypto algs */
@@ -403,6 +403,9 @@ static int init_cc_resources(struct platform_device *plat_dev)
                goto post_hash_err;
        }
 
+       /* All set, we can allow autosuspend */
+       cc_pm_go(new_drvdata);
+
        /* If we got here and FIPS mode is enabled
         * it means all FIPS test passed, so let TEE
         * know we're good.
@@ -417,8 +420,6 @@ post_cipher_err:
        cc_cipher_free(new_drvdata);
 post_ivgen_err:
        cc_ivgen_fini(new_drvdata);
-post_power_mgr_err:
-       cc_pm_fini(new_drvdata);
 post_buf_mgr_err:
         cc_buffer_mgr_fini(new_drvdata);
 post_req_mgr_err:
index d990f47..6ff7e75 100644 (file)
@@ -100,20 +100,19 @@ int cc_pm_put_suspend(struct device *dev)
 
 int cc_pm_init(struct cc_drvdata *drvdata)
 {
-       int rc = 0;
        struct device *dev = drvdata_to_dev(drvdata);
 
        /* must be before the enabling to avoid resdundent suspending */
        pm_runtime_set_autosuspend_delay(dev, CC_SUSPEND_TIMEOUT);
        pm_runtime_use_autosuspend(dev);
        /* activate the PM module */
-       rc = pm_runtime_set_active(dev);
-       if (rc)
-               return rc;
-       /* enable the PM module*/
-       pm_runtime_enable(dev);
+       return pm_runtime_set_active(dev);
+}
 
-       return rc;
+/* enable the PM module*/
+void cc_pm_go(struct cc_drvdata *drvdata)
+{
+       pm_runtime_enable(drvdata_to_dev(drvdata));
 }
 
 void cc_pm_fini(struct cc_drvdata *drvdata)
index 020a540..f626243 100644 (file)
@@ -16,6 +16,7 @@
 extern const struct dev_pm_ops ccree_pm;
 
 int cc_pm_init(struct cc_drvdata *drvdata);
+void cc_pm_go(struct cc_drvdata *drvdata);
 void cc_pm_fini(struct cc_drvdata *drvdata);
 int cc_pm_suspend(struct device *dev);
 int cc_pm_resume(struct device *dev);
@@ -29,6 +30,8 @@ static inline int cc_pm_init(struct cc_drvdata *drvdata)
        return 0;
 }
 
+static void cc_pm_go(struct cc_drvdata *drvdata) {}
+
 static inline void cc_pm_fini(struct cc_drvdata *drvdata) {}
 
 static inline int cc_pm_suspend(struct device *dev)
index 4e55768..fe69dcc 100644 (file)
@@ -203,6 +203,7 @@ struct at_xdmac_chan {
        u32                             save_cim;
        u32                             save_cnda;
        u32                             save_cndc;
+       u32                             irq_status;
        unsigned long                   status;
        struct tasklet_struct           tasklet;
        struct dma_slave_config         sconfig;
@@ -1580,8 +1581,8 @@ static void at_xdmac_tasklet(unsigned long data)
        struct at_xdmac_desc    *desc;
        u32                     error_mask;
 
-       dev_dbg(chan2dev(&atchan->chan), "%s: status=0x%08lx\n",
-                __func__, atchan->status);
+       dev_dbg(chan2dev(&atchan->chan), "%s: status=0x%08x\n",
+               __func__, atchan->irq_status);
 
        error_mask = AT_XDMAC_CIS_RBEIS
                     | AT_XDMAC_CIS_WBEIS
@@ -1589,15 +1590,15 @@ static void at_xdmac_tasklet(unsigned long data)
 
        if (at_xdmac_chan_is_cyclic(atchan)) {
                at_xdmac_handle_cyclic(atchan);
-       } else if ((atchan->status & AT_XDMAC_CIS_LIS)
-                  || (atchan->status & error_mask)) {
+       } else if ((atchan->irq_status & AT_XDMAC_CIS_LIS)
+                  || (atchan->irq_status & error_mask)) {
                struct dma_async_tx_descriptor  *txd;
 
-               if (atchan->status & AT_XDMAC_CIS_RBEIS)
+               if (atchan->irq_status & AT_XDMAC_CIS_RBEIS)
                        dev_err(chan2dev(&atchan->chan), "read bus error!!!");
-               if (atchan->status & AT_XDMAC_CIS_WBEIS)
+               if (atchan->irq_status & AT_XDMAC_CIS_WBEIS)
                        dev_err(chan2dev(&atchan->chan), "write bus error!!!");
-               if (atchan->status & AT_XDMAC_CIS_ROIS)
+               if (atchan->irq_status & AT_XDMAC_CIS_ROIS)
                        dev_err(chan2dev(&atchan->chan), "request overflow error!!!");
 
                spin_lock(&atchan->lock);
@@ -1652,7 +1653,7 @@ static irqreturn_t at_xdmac_interrupt(int irq, void *dev_id)
                        atchan = &atxdmac->chan[i];
                        chan_imr = at_xdmac_chan_read(atchan, AT_XDMAC_CIM);
                        chan_status = at_xdmac_chan_read(atchan, AT_XDMAC_CIS);
-                       atchan->status = chan_status & chan_imr;
+                       atchan->irq_status = chan_status & chan_imr;
                        dev_vdbg(atxdmac->dma.dev,
                                 "%s: chan%d: imr=0x%x, status=0x%x\n",
                                 __func__, i, chan_imr, chan_status);
@@ -1666,7 +1667,7 @@ static irqreturn_t at_xdmac_interrupt(int irq, void *dev_id)
                                 at_xdmac_chan_read(atchan, AT_XDMAC_CDA),
                                 at_xdmac_chan_read(atchan, AT_XDMAC_CUBC));
 
-                       if (atchan->status & (AT_XDMAC_CIS_RBEIS | AT_XDMAC_CIS_WBEIS))
+                       if (atchan->irq_status & (AT_XDMAC_CIS_RBEIS | AT_XDMAC_CIS_WBEIS))
                                at_xdmac_write(atxdmac, AT_XDMAC_GD, atchan->mask);
 
                        tasklet_schedule(&atchan->tasklet);
index 1a44c80..ae10f56 100644 (file)
@@ -406,38 +406,32 @@ static void bcm2835_dma_fill_cb_chain_with_sg(
        }
 }
 
-static int bcm2835_dma_abort(void __iomem *chan_base)
+static int bcm2835_dma_abort(struct bcm2835_chan *c)
 {
-       unsigned long cs;
+       void __iomem *chan_base = c->chan_base;
        long int timeout = 10000;
 
-       cs = readl(chan_base + BCM2835_DMA_CS);
-       if (!(cs & BCM2835_DMA_ACTIVE))
+       /*
+        * A zero control block address means the channel is idle.
+        * (The ACTIVE flag in the CS register is not a reliable indicator.)
+        */
+       if (!readl(chan_base + BCM2835_DMA_ADDR))
                return 0;
 
        /* Write 0 to the active bit - Pause the DMA */
        writel(0, chan_base + BCM2835_DMA_CS);
 
        /* Wait for any current AXI transfer to complete */
-       while ((cs & BCM2835_DMA_ISPAUSED) && --timeout) {
+       while ((readl(chan_base + BCM2835_DMA_CS) &
+               BCM2835_DMA_WAITING_FOR_WRITES) && --timeout)
                cpu_relax();
-               cs = readl(chan_base + BCM2835_DMA_CS);
-       }
 
-       /* We'll un-pause when we set of our next DMA */
+       /* Peripheral might be stuck and fail to signal AXI write responses */
        if (!timeout)
-               return -ETIMEDOUT;
-
-       if (!(cs & BCM2835_DMA_ACTIVE))
-               return 0;
-
-       /* Terminate the control block chain */
-       writel(0, chan_base + BCM2835_DMA_NEXTCB);
-
-       /* Abort the whole DMA */
-       writel(BCM2835_DMA_ABORT | BCM2835_DMA_ACTIVE,
-              chan_base + BCM2835_DMA_CS);
+               dev_err(c->vc.chan.device->dev,
+                       "failed to complete outstanding writes\n");
 
+       writel(BCM2835_DMA_RESET, chan_base + BCM2835_DMA_CS);
        return 0;
 }
 
@@ -476,8 +470,15 @@ static irqreturn_t bcm2835_dma_callback(int irq, void *data)
 
        spin_lock_irqsave(&c->vc.lock, flags);
 
-       /* Acknowledge interrupt */
-       writel(BCM2835_DMA_INT, c->chan_base + BCM2835_DMA_CS);
+       /*
+        * Clear the INT flag to receive further interrupts. Keep the channel
+        * active in case the descriptor is cyclic or in case the client has
+        * already terminated the descriptor and issued a new one. (May happen
+        * if this IRQ handler is threaded.) If the channel is finished, it
+        * will remain idle despite the ACTIVE flag being set.
+        */
+       writel(BCM2835_DMA_INT | BCM2835_DMA_ACTIVE,
+              c->chan_base + BCM2835_DMA_CS);
 
        d = c->desc;
 
@@ -485,11 +486,7 @@ static irqreturn_t bcm2835_dma_callback(int irq, void *data)
                if (d->cyclic) {
                        /* call the cyclic callback */
                        vchan_cyclic_callback(&d->vd);
-
-                       /* Keep the DMA engine running */
-                       writel(BCM2835_DMA_ACTIVE,
-                              c->chan_base + BCM2835_DMA_CS);
-               } else {
+               } else if (!readl(c->chan_base + BCM2835_DMA_ADDR)) {
                        vchan_cookie_complete(&c->desc->vd);
                        bcm2835_dma_start_desc(c);
                }
@@ -779,7 +776,6 @@ static int bcm2835_dma_terminate_all(struct dma_chan *chan)
        struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
        struct bcm2835_dmadev *d = to_bcm2835_dma_dev(c->vc.chan.device);
        unsigned long flags;
-       int timeout = 10000;
        LIST_HEAD(head);
 
        spin_lock_irqsave(&c->vc.lock, flags);
@@ -789,27 +785,11 @@ static int bcm2835_dma_terminate_all(struct dma_chan *chan)
        list_del_init(&c->node);
        spin_unlock(&d->lock);
 
-       /*
-        * Stop DMA activity: we assume the callback will not be called
-        * after bcm_dma_abort() returns (even if it does, it will see
-        * c->desc is NULL and exit.)
-        */
+       /* stop DMA activity */
        if (c->desc) {
                vchan_terminate_vdesc(&c->desc->vd);
                c->desc = NULL;
-               bcm2835_dma_abort(c->chan_base);
-
-               /* Wait for stopping */
-               while (--timeout) {
-                       if (!(readl(c->chan_base + BCM2835_DMA_CS) &
-                                               BCM2835_DMA_ACTIVE))
-                               break;
-
-                       cpu_relax();
-               }
-
-               if (!timeout)
-                       dev_err(d->ddev.dev, "DMA transfer could not be terminated\n");
+               bcm2835_dma_abort(c);
        }
 
        vchan_get_all_descriptors(&c->vc, &head);
index 2eea4ef..6511928 100644 (file)
@@ -711,11 +711,9 @@ static int dmatest_func(void *data)
                        srcs[i] = um->addr[i] + src_off;
                        ret = dma_mapping_error(dev->dev, um->addr[i]);
                        if (ret) {
-                               dmaengine_unmap_put(um);
                                result("src mapping error", total_tests,
                                       src_off, dst_off, len, ret);
-                               failed_tests++;
-                               continue;
+                               goto error_unmap_continue;
                        }
                        um->to_cnt++;
                }
@@ -730,11 +728,9 @@ static int dmatest_func(void *data)
                                               DMA_BIDIRECTIONAL);
                        ret = dma_mapping_error(dev->dev, dsts[i]);
                        if (ret) {
-                               dmaengine_unmap_put(um);
                                result("dst mapping error", total_tests,
                                       src_off, dst_off, len, ret);
-                               failed_tests++;
-                               continue;
+                               goto error_unmap_continue;
                        }
                        um->bidi_cnt++;
                }
@@ -762,12 +758,10 @@ static int dmatest_func(void *data)
                }
 
                if (!tx) {
-                       dmaengine_unmap_put(um);
                        result("prep error", total_tests, src_off,
                               dst_off, len, ret);
                        msleep(100);
-                       failed_tests++;
-                       continue;
+                       goto error_unmap_continue;
                }
 
                done->done = false;
@@ -776,12 +770,10 @@ static int dmatest_func(void *data)
                cookie = tx->tx_submit(tx);
 
                if (dma_submit_error(cookie)) {
-                       dmaengine_unmap_put(um);
                        result("submit error", total_tests, src_off,
                               dst_off, len, ret);
                        msleep(100);
-                       failed_tests++;
-                       continue;
+                       goto error_unmap_continue;
                }
                dma_async_issue_pending(chan);
 
@@ -790,22 +782,20 @@ static int dmatest_func(void *data)
 
                status = dma_async_is_tx_complete(chan, cookie, NULL, NULL);
 
-               dmaengine_unmap_put(um);
-
                if (!done->done) {
                        result("test timed out", total_tests, src_off, dst_off,
                               len, 0);
-                       failed_tests++;
-                       continue;
+                       goto error_unmap_continue;
                } else if (status != DMA_COMPLETE) {
                        result(status == DMA_ERROR ?
                               "completion error status" :
                               "completion busy status", total_tests, src_off,
                               dst_off, len, ret);
-                       failed_tests++;
-                       continue;
+                       goto error_unmap_continue;
                }
 
+               dmaengine_unmap_put(um);
+
                if (params->noverify) {
                        verbose_result("test passed", total_tests, src_off,
                                       dst_off, len, 0);
@@ -846,6 +836,12 @@ static int dmatest_func(void *data)
                        verbose_result("test passed", total_tests, src_off,
                                       dst_off, len, 0);
                }
+
+               continue;
+
+error_unmap_continue:
+               dmaengine_unmap_put(um);
+               failed_tests++;
        }
        ktime = ktime_sub(ktime_get(), ktime);
        ktime = ktime_sub(ktime, comparetime);
index c2fff3f..4a09af3 100644 (file)
@@ -618,7 +618,7 @@ static void imxdma_tasklet(unsigned long data)
 {
        struct imxdma_channel *imxdmac = (void *)data;
        struct imxdma_engine *imxdma = imxdmac->imxdma;
-       struct imxdma_desc *desc;
+       struct imxdma_desc *desc, *next_desc;
        unsigned long flags;
 
        spin_lock_irqsave(&imxdma->lock, flags);
@@ -648,10 +648,10 @@ static void imxdma_tasklet(unsigned long data)
        list_move_tail(imxdmac->ld_active.next, &imxdmac->ld_free);
 
        if (!list_empty(&imxdmac->ld_queue)) {
-               desc = list_first_entry(&imxdmac->ld_queue, struct imxdma_desc,
-                                       node);
+               next_desc = list_first_entry(&imxdmac->ld_queue,
+                                            struct imxdma_desc, node);
                list_move_tail(imxdmac->ld_queue.next, &imxdmac->ld_active);
-               if (imxdma_xfer_desc(desc) < 0)
+               if (imxdma_xfer_desc(next_desc) < 0)
                        dev_warn(imxdma->dev, "%s: channel: %d couldn't xfer desc\n",
                                 __func__, imxdmac->channel);
        }
index 472c88a..92f843e 100644 (file)
@@ -119,6 +119,11 @@ void scmi_driver_unregister(struct scmi_driver *driver)
 }
 EXPORT_SYMBOL_GPL(scmi_driver_unregister);
 
+static void scmi_device_release(struct device *dev)
+{
+       kfree(to_scmi_dev(dev));
+}
+
 struct scmi_device *
 scmi_device_create(struct device_node *np, struct device *parent, int protocol)
 {
@@ -138,6 +143,7 @@ scmi_device_create(struct device_node *np, struct device *parent, int protocol)
        scmi_dev->dev.parent = parent;
        scmi_dev->dev.of_node = np;
        scmi_dev->dev.bus = &scmi_bus_type;
+       scmi_dev->dev.release = scmi_device_release;
        dev_set_name(&scmi_dev->dev, "scmi_dev.%d", id);
 
        retval = device_register(&scmi_dev->dev);
@@ -156,9 +162,8 @@ free_mem:
 void scmi_device_destroy(struct scmi_device *scmi_dev)
 {
        scmi_handle_put(scmi_dev->handle);
-       device_unregister(&scmi_dev->dev);
        ida_simple_remove(&scmi_bus_id, scmi_dev->id);
-       kfree(scmi_dev);
+       device_unregister(&scmi_dev->dev);
 }
 
 void scmi_set_handle(struct scmi_device *scmi_dev)
index 23ea1ed..352bd24 100644 (file)
@@ -37,8 +37,9 @@ extern u64 efi_system_table;
 static struct ptdump_info efi_ptdump_info = {
        .mm             = &efi_mm,
        .markers        = (struct addr_marker[]){
-               { 0,            "UEFI runtime start" },
-               { DEFAULT_MAP_WINDOW_64, "UEFI runtime end" }
+               { 0,                            "UEFI runtime start" },
+               { DEFAULT_MAP_WINDOW_64,        "UEFI runtime end" },
+               { -1,                           NULL }
        },
        .base_addr      = 0,
 };
index a1a09e0..13851b3 100644 (file)
@@ -508,14 +508,11 @@ static int __init s10_init(void)
                return -ENODEV;
 
        np = of_find_matching_node(fw_np, s10_of_match);
-       if (!np) {
-               of_node_put(fw_np);
+       if (!np)
                return -ENODEV;
-       }
 
        of_node_put(np);
        ret = of_platform_populate(fw_np, s10_of_match, NULL, NULL);
-       of_node_put(fw_np);
        if (ret)
                return ret;
 
index 6b11f13..7f9e030 100644 (file)
@@ -66,8 +66,10 @@ static int altr_a10sr_gpio_direction_input(struct gpio_chip *gc,
 static int altr_a10sr_gpio_direction_output(struct gpio_chip *gc,
                                            unsigned int nr, int value)
 {
-       if (nr <= (ALTR_A10SR_OUT_VALID_RANGE_HI - ALTR_A10SR_LED_VALID_SHIFT))
+       if (nr <= (ALTR_A10SR_OUT_VALID_RANGE_HI - ALTR_A10SR_LED_VALID_SHIFT)) {
+               altr_a10sr_gpio_set(gc, nr, value);
                return 0;
+       }
        return -EINVAL;
 }
 
index e0d6a0a..e41223c 100644 (file)
@@ -180,7 +180,18 @@ static void sprd_eic_free(struct gpio_chip *chip, unsigned int offset)
 
 static int sprd_eic_get(struct gpio_chip *chip, unsigned int offset)
 {
-       return sprd_eic_read(chip, offset, SPRD_EIC_DBNC_DATA);
+       struct sprd_eic *sprd_eic = gpiochip_get_data(chip);
+
+       switch (sprd_eic->type) {
+       case SPRD_EIC_DEBOUNCE:
+               return sprd_eic_read(chip, offset, SPRD_EIC_DBNC_DATA);
+       case SPRD_EIC_ASYNC:
+               return sprd_eic_read(chip, offset, SPRD_EIC_ASYNC_DATA);
+       case SPRD_EIC_SYNC:
+               return sprd_eic_read(chip, offset, SPRD_EIC_SYNC_DATA);
+       default:
+               return -ENOTSUPP;
+       }
 }
 
 static int sprd_eic_direction_input(struct gpio_chip *chip, unsigned int offset)
@@ -368,6 +379,7 @@ static int sprd_eic_irq_set_type(struct irq_data *data, unsigned int flow_type)
                        irq_set_handler_locked(data, handle_edge_irq);
                        break;
                case IRQ_TYPE_EDGE_BOTH:
+                       sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTMODE, 0);
                        sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTBOTH, 1);
                        irq_set_handler_locked(data, handle_edge_irq);
                        break;
index adf72dd..68a35b6 100644 (file)
@@ -84,6 +84,7 @@ MODULE_DEVICE_TABLE(of, pcf857x_of_table);
  */
 struct pcf857x {
        struct gpio_chip        chip;
+       struct irq_chip         irqchip;
        struct i2c_client       *client;
        struct mutex            lock;           /* protect 'out' */
        unsigned                out;            /* software latch */
@@ -252,18 +253,6 @@ static void pcf857x_irq_bus_sync_unlock(struct irq_data *data)
        mutex_unlock(&gpio->lock);
 }
 
-static struct irq_chip pcf857x_irq_chip = {
-       .name           = "pcf857x",
-       .irq_enable     = pcf857x_irq_enable,
-       .irq_disable    = pcf857x_irq_disable,
-       .irq_ack        = noop,
-       .irq_mask       = noop,
-       .irq_unmask     = noop,
-       .irq_set_wake   = pcf857x_irq_set_wake,
-       .irq_bus_lock           = pcf857x_irq_bus_lock,
-       .irq_bus_sync_unlock    = pcf857x_irq_bus_sync_unlock,
-};
-
 /*-------------------------------------------------------------------------*/
 
 static int pcf857x_probe(struct i2c_client *client,
@@ -376,8 +365,17 @@ static int pcf857x_probe(struct i2c_client *client,
 
        /* Enable irqchip if we have an interrupt */
        if (client->irq) {
+               gpio->irqchip.name = "pcf857x",
+               gpio->irqchip.irq_enable = pcf857x_irq_enable,
+               gpio->irqchip.irq_disable = pcf857x_irq_disable,
+               gpio->irqchip.irq_ack = noop,
+               gpio->irqchip.irq_mask = noop,
+               gpio->irqchip.irq_unmask = noop,
+               gpio->irqchip.irq_set_wake = pcf857x_irq_set_wake,
+               gpio->irqchip.irq_bus_lock = pcf857x_irq_bus_lock,
+               gpio->irqchip.irq_bus_sync_unlock = pcf857x_irq_bus_sync_unlock,
                status = gpiochip_irqchip_add_nested(&gpio->chip,
-                                                    &pcf857x_irq_chip,
+                                                    &gpio->irqchip,
                                                     0, handle_level_irq,
                                                     IRQ_TYPE_NONE);
                if (status) {
@@ -392,7 +390,7 @@ static int pcf857x_probe(struct i2c_client *client,
                if (status)
                        goto fail;
 
-               gpiochip_set_nested_irqchip(&gpio->chip, &pcf857x_irq_chip,
+               gpiochip_set_nested_irqchip(&gpio->chip, &gpio->irqchip,
                                            client->irq);
                gpio->irq_parent = client->irq;
        }
index 1b79ebc..541fa6a 100644 (file)
@@ -253,6 +253,7 @@ static int vf610_gpio_probe(struct platform_device *pdev)
        struct vf610_gpio_port *port;
        struct resource *iores;
        struct gpio_chip *gc;
+       int i;
        int ret;
 
        port = devm_kzalloc(&pdev->dev, sizeof(*port), GFP_KERNEL);
@@ -319,6 +320,10 @@ static int vf610_gpio_probe(struct platform_device *pdev)
        if (ret < 0)
                return ret;
 
+       /* Mask all GPIO interrupts */
+       for (i = 0; i < gc->ngpio; i++)
+               vf610_gpio_writel(0, port->base + PORT_PCR(i));
+
        /* Clear the interrupt status register for all GPIO's */
        vf610_gpio_writel(~0, port->base + PORT_ISFR);
 
index 1651d7f..d1adfdf 100644 (file)
@@ -828,7 +828,14 @@ static irqreturn_t lineevent_irq_thread(int irq, void *p)
        /* Do not leak kernel stack to userspace */
        memset(&ge, 0, sizeof(ge));
 
-       ge.timestamp = le->timestamp;
+       /*
+        * We may be running from a nested threaded interrupt in which case
+        * we didn't get the timestamp from lineevent_irq_handler().
+        */
+       if (!le->timestamp)
+               ge.timestamp = ktime_get_real_ns();
+       else
+               ge.timestamp = le->timestamp;
 
        if (le->eflags & GPIOEVENT_REQUEST_RISING_EDGE
            && le->eflags & GPIOEVENT_REQUEST_FALLING_EDGE) {
index 6896dec..0ed41a9 100644 (file)
@@ -1686,7 +1686,8 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
                effective_mode &= ~S_IWUSR;
 
        if ((adev->flags & AMD_IS_APU) &&
-           (attr == &sensor_dev_attr_power1_cap_max.dev_attr.attr ||
+           (attr == &sensor_dev_attr_power1_average.dev_attr.attr ||
+            attr == &sensor_dev_attr_power1_cap_max.dev_attr.attr ||
             attr == &sensor_dev_attr_power1_cap_min.dev_attr.attr||
             attr == &sensor_dev_attr_power1_cap.dev_attr.attr))
                return 0;
index 71913a1..a38e0fb 100644 (file)
@@ -38,6 +38,7 @@
 #include "amdgpu_gem.h"
 #include <drm/amdgpu_drm.h>
 #include <linux/dma-buf.h>
+#include <linux/dma-fence-array.h>
 
 /**
  * amdgpu_gem_prime_get_sg_table - &drm_driver.gem_prime_get_sg_table
@@ -187,6 +188,48 @@ error:
        return ERR_PTR(ret);
 }
 
+static int
+__reservation_object_make_exclusive(struct reservation_object *obj)
+{
+       struct dma_fence **fences;
+       unsigned int count;
+       int r;
+
+       if (!reservation_object_get_list(obj)) /* no shared fences to convert */
+               return 0;
+
+       r = reservation_object_get_fences_rcu(obj, NULL, &count, &fences);
+       if (r)
+               return r;
+
+       if (count == 0) {
+               /* Now that was unexpected. */
+       } else if (count == 1) {
+               reservation_object_add_excl_fence(obj, fences[0]);
+               dma_fence_put(fences[0]);
+               kfree(fences);
+       } else {
+               struct dma_fence_array *array;
+
+               array = dma_fence_array_create(count, fences,
+                                              dma_fence_context_alloc(1), 0,
+                                              false);
+               if (!array)
+                       goto err_fences_put;
+
+               reservation_object_add_excl_fence(obj, &array->base);
+               dma_fence_put(&array->base);
+       }
+
+       return 0;
+
+err_fences_put:
+       while (count--)
+               dma_fence_put(fences[count]);
+       kfree(fences);
+       return -ENOMEM;
+}
+
 /**
  * amdgpu_gem_map_attach - &dma_buf_ops.attach implementation
  * @dma_buf: Shared DMA buffer
@@ -218,16 +261,16 @@ static int amdgpu_gem_map_attach(struct dma_buf *dma_buf,
 
        if (attach->dev->driver != adev->dev->driver) {
                /*
-                * Wait for all shared fences to complete before we switch to future
-                * use of exclusive fence on this prime shared bo.
+                * We only create shared fences for internal use, but importers
+                * of the dmabuf rely on exclusive fences for implicitly
+                * tracking write hazards. As any of the current fences may
+                * correspond to a write, we need to convert all existing
+                * fences on the reservation object into a single exclusive
+                * fence.
                 */
-               r = reservation_object_wait_timeout_rcu(bo->tbo.resv,
-                                                       true, false,
-                                                       MAX_SCHEDULE_TIMEOUT);
-               if (unlikely(r < 0)) {
-                       DRM_DEBUG_PRIME("Fence wait failed: %li\n", r);
+               r = __reservation_object_make_exclusive(bo->tbo.resv);
+               if (r)
                        goto error_unreserve;
-               }
        }
 
        /* pin buffer into GTT */
index 8fab0d6..3a9b48b 100644 (file)
@@ -90,8 +90,10 @@ static int psp_sw_fini(void *handle)
        adev->psp.sos_fw = NULL;
        release_firmware(adev->psp.asd_fw);
        adev->psp.asd_fw = NULL;
-       release_firmware(adev->psp.ta_fw);
-       adev->psp.ta_fw = NULL;
+       if (adev->psp.ta_fw) {
+               release_firmware(adev->psp.ta_fw);
+               adev->psp.ta_fw = NULL;
+       }
        return 0;
 }
 
@@ -435,6 +437,9 @@ static int psp_xgmi_initialize(struct psp_context *psp)
        struct ta_xgmi_shared_memory *xgmi_cmd;
        int ret;
 
+       if (!psp->adev->psp.ta_fw)
+               return -ENOENT;
+
        if (!psp->xgmi_context.initialized) {
                ret = psp_xgmi_init_shared_buf(psp);
                if (ret)
index d2ea5ce..7c108e6 100644 (file)
@@ -3363,14 +3363,15 @@ void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid,
                         struct amdgpu_task_info *task_info)
 {
        struct amdgpu_vm *vm;
+       unsigned long flags;
 
-       spin_lock(&adev->vm_manager.pasid_lock);
+       spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
 
        vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
        if (vm)
                *task_info = vm->task_info;
 
-       spin_unlock(&adev->vm_manager.pasid_lock);
+       spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
 }
 
 /**
index 4cd31a2..186db18 100644 (file)
@@ -93,7 +93,20 @@ static void nbio_v7_4_enable_doorbell_aperture(struct amdgpu_device *adev,
 static void nbio_v7_4_enable_doorbell_selfring_aperture(struct amdgpu_device *adev,
                                                        bool enable)
 {
+       u32 tmp = 0;
 
+       if (enable) {
+               tmp = REG_SET_FIELD(tmp, DOORBELL_SELFRING_GPA_APER_CNTL, DOORBELL_SELFRING_GPA_APER_EN, 1) |
+                     REG_SET_FIELD(tmp, DOORBELL_SELFRING_GPA_APER_CNTL, DOORBELL_SELFRING_GPA_APER_MODE, 1) |
+                     REG_SET_FIELD(tmp, DOORBELL_SELFRING_GPA_APER_CNTL, DOORBELL_SELFRING_GPA_APER_SIZE, 0);
+
+               WREG32_SOC15(NBIO, 0, mmDOORBELL_SELFRING_GPA_APER_BASE_LOW,
+                            lower_32_bits(adev->doorbell.base));
+               WREG32_SOC15(NBIO, 0, mmDOORBELL_SELFRING_GPA_APER_BASE_HIGH,
+                            upper_32_bits(adev->doorbell.base));
+       }
+
+       WREG32_SOC15(NBIO, 0, mmDOORBELL_SELFRING_GPA_APER_CNTL, tmp);
 }
 
 static void nbio_v7_4_ih_doorbell_range(struct amdgpu_device *adev,
index 0c6e7f9..189fcb0 100644 (file)
@@ -152,18 +152,22 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)
 
        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name);
        err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev);
-       if (err)
-               goto out2;
-
-       err = amdgpu_ucode_validate(adev->psp.ta_fw);
-       if (err)
-               goto out2;
-
-       ta_hdr = (const struct ta_firmware_header_v1_0 *)adev->psp.ta_fw->data;
-       adev->psp.ta_xgmi_ucode_version = le32_to_cpu(ta_hdr->ta_xgmi_ucode_version);
-       adev->psp.ta_xgmi_ucode_size = le32_to_cpu(ta_hdr->ta_xgmi_size_bytes);
-       adev->psp.ta_xgmi_start_addr = (uint8_t *)ta_hdr +
-               le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
+       if (err) {
+               release_firmware(adev->psp.ta_fw);
+               adev->psp.ta_fw = NULL;
+               dev_info(adev->dev,
+                        "psp v11.0: Failed to load firmware \"%s\"\n", fw_name);
+       } else {
+               err = amdgpu_ucode_validate(adev->psp.ta_fw);
+               if (err)
+                       goto out2;
+
+               ta_hdr = (const struct ta_firmware_header_v1_0 *)adev->psp.ta_fw->data;
+               adev->psp.ta_xgmi_ucode_version = le32_to_cpu(ta_hdr->ta_xgmi_ucode_version);
+               adev->psp.ta_xgmi_ucode_size = le32_to_cpu(ta_hdr->ta_xgmi_size_bytes);
+               adev->psp.ta_xgmi_start_addr = (uint8_t *)ta_hdr +
+                       le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
+       }
 
        return 0;
 
index 8849b74..9b63997 100644 (file)
@@ -729,11 +729,13 @@ static int soc15_common_early_init(void *handle)
        case CHIP_RAVEN:
                adev->asic_funcs = &soc15_asic_funcs;
                if (adev->rev_id >= 0x8)
-                       adev->external_rev_id = adev->rev_id + 0x81;
+                       adev->external_rev_id = adev->rev_id + 0x79;
                else if (adev->pdev->device == 0x15d8)
                        adev->external_rev_id = adev->rev_id + 0x41;
+               else if (adev->rev_id == 1)
+                       adev->external_rev_id = adev->rev_id + 0x20;
                else
-                       adev->external_rev_id = 0x1;
+                       adev->external_rev_id = adev->rev_id + 0x01;
 
                if (adev->rev_id >= 0x8) {
                        adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
index 5d85ff3..2e7c449 100644 (file)
@@ -863,7 +863,7 @@ static int kfd_fill_mem_info_for_cpu(int numa_node_id, int *avail_size,
        return 0;
 }
 
-#if CONFIG_X86_64
+#ifdef CONFIG_X86_64
 static int kfd_fill_iolink_info_for_cpu(int numa_node_id, int *avail_size,
                                uint32_t *num_entries,
                                struct crat_subtype_iolink *sub_type_hdr)
index f4fa40c..0b392bf 100644 (file)
@@ -4082,7 +4082,8 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm,
        }
 
        if (connector_type == DRM_MODE_CONNECTOR_HDMIA ||
-           connector_type == DRM_MODE_CONNECTOR_DisplayPort) {
+           connector_type == DRM_MODE_CONNECTOR_DisplayPort ||
+           connector_type == DRM_MODE_CONNECTOR_eDP) {
                drm_connector_attach_vrr_capable_property(
                        &aconnector->base);
        }
index 9a7ac58..ddd75a4 100644 (file)
@@ -671,6 +671,25 @@ static ssize_t dp_phy_test_pattern_debugfs_write(struct file *f, const char __us
        return bytes_from_user;
 }
 
+/*
+ * Returns the min and max vrr vfreq through the connector's debugfs file.
+ * Example usage: cat /sys/kernel/debug/dri/0/DP-1/vrr_range
+ */
+static int vrr_range_show(struct seq_file *m, void *data)
+{
+       struct drm_connector *connector = m->private;
+       struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
+
+       if (connector->status != connector_status_connected)
+               return -ENODEV;
+
+       seq_printf(m, "Min: %u\n", (unsigned int)aconnector->min_vfreq);
+       seq_printf(m, "Max: %u\n", (unsigned int)aconnector->max_vfreq);
+
+       return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(vrr_range);
+
 static const struct file_operations dp_link_settings_debugfs_fops = {
        .owner = THIS_MODULE,
        .read = dp_link_settings_read,
@@ -697,7 +716,8 @@ static const struct {
 } dp_debugfs_entries[] = {
                {"link_settings", &dp_link_settings_debugfs_fops},
                {"phy_settings", &dp_phy_settings_debugfs_fop},
-               {"test_pattern", &dp_phy_test_pattern_fops}
+               {"test_pattern", &dp_phy_test_pattern_fops},
+               {"vrr_range", &vrr_range_fops}
 };
 
 int connector_debugfs_init(struct amdgpu_dm_connector *connector)
index afd287f..19801bd 100644 (file)
@@ -591,7 +591,15 @@ static void dce11_pplib_apply_display_requirements(
                        dc,
                        context->bw.dce.sclk_khz);
 
-       pp_display_cfg->min_dcfclock_khz = pp_display_cfg->min_engine_clock_khz;
+       /*
+        * As workaround for >4x4K lightup set dcfclock to min_engine_clock value.
+        * This is not required for less than 5 displays,
+        * thus don't request decfclk in dc to avoid impact
+        * on power saving.
+        *
+        */
+       pp_display_cfg->min_dcfclock_khz = (context->stream_count > 4)?
+                       pp_display_cfg->min_engine_clock_khz : 0;
 
        pp_display_cfg->min_engine_clock_deep_sleep_khz
                        = context->bw.dce.sclk_deep_sleep_khz;
index f95c5f5..5273de3 100644 (file)
@@ -1033,6 +1033,7 @@ static int smu10_get_clock_by_type_with_latency(struct pp_hwmgr *hwmgr,
                break;
        case amd_pp_dpp_clock:
                pclk_vol_table = pinfo->vdd_dep_on_dppclk;
+               break;
        default:
                return -EINVAL;
        }
index 99cba8e..5df1256 100644 (file)
@@ -528,7 +528,8 @@ int drm_mode_create_lease_ioctl(struct drm_device *dev,
 
        object_count = cl->object_count;
 
-       object_ids = memdup_user(u64_to_user_ptr(cl->object_ids), object_count * sizeof(__u32));
+       object_ids = memdup_user(u64_to_user_ptr(cl->object_ids),
+                       array_size(object_count, sizeof(__u32)));
        if (IS_ERR(object_ids))
                return PTR_ERR(object_ids);
 
index 24a7504..f91e02c 100644 (file)
@@ -758,7 +758,7 @@ int drm_mode_hsync(const struct drm_display_mode *mode)
        if (mode->hsync)
                return mode->hsync;
 
-       if (mode->htotal < 0)
+       if (mode->htotal <= 0)
                return 0;
 
        calc_val = (mode->clock * 1000) / mode->htotal; /* hsync in Hz */
index 216f52b..c882ea9 100644 (file)
@@ -1824,6 +1824,16 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
        return 0;
 }
 
+static inline bool
+__vma_matches(struct vm_area_struct *vma, struct file *filp,
+             unsigned long addr, unsigned long size)
+{
+       if (vma->vm_file != filp)
+               return false;
+
+       return vma->vm_start == addr && (vma->vm_end - vma->vm_start) == size;
+}
+
 /**
  * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address
  *                      it is mapped to.
@@ -1882,7 +1892,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
                        return -EINTR;
                }
                vma = find_vma(mm, addr);
-               if (vma)
+               if (vma && __vma_matches(vma, obj->base.filp, addr, args->size))
                        vma->vm_page_prot =
                                pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
                else
index d6c8f8f..017fc60 100644 (file)
@@ -594,7 +594,8 @@ static void i915_pmu_enable(struct perf_event *event)
         * Update the bitmask of enabled events and increment
         * the event reference counter.
         */
-       GEM_BUG_ON(bit >= I915_PMU_MASK_BITS);
+       BUILD_BUG_ON(ARRAY_SIZE(i915->pmu.enable_count) != I915_PMU_MASK_BITS);
+       GEM_BUG_ON(bit >= ARRAY_SIZE(i915->pmu.enable_count));
        GEM_BUG_ON(i915->pmu.enable_count[bit] == ~0);
        i915->pmu.enable |= BIT_ULL(bit);
        i915->pmu.enable_count[bit]++;
@@ -615,11 +616,16 @@ static void i915_pmu_enable(struct perf_event *event)
                engine = intel_engine_lookup_user(i915,
                                                  engine_event_class(event),
                                                  engine_event_instance(event));
-               GEM_BUG_ON(!engine);
-               engine->pmu.enable |= BIT(sample);
 
-               GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS);
+               BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.enable_count) !=
+                            I915_ENGINE_SAMPLE_COUNT);
+               BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.sample) !=
+                            I915_ENGINE_SAMPLE_COUNT);
+               GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
+               GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
                GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
+
+               engine->pmu.enable |= BIT(sample);
                engine->pmu.enable_count[sample]++;
        }
 
@@ -649,9 +655,11 @@ static void i915_pmu_disable(struct perf_event *event)
                engine = intel_engine_lookup_user(i915,
                                                  engine_event_class(event),
                                                  engine_event_instance(event));
-               GEM_BUG_ON(!engine);
-               GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS);
+
+               GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
+               GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
                GEM_BUG_ON(engine->pmu.enable_count[sample] == 0);
+
                /*
                 * Decrement the reference count and clear the enabled
                 * bitmask when the last listener on an event goes away.
@@ -660,7 +668,7 @@ static void i915_pmu_disable(struct perf_event *event)
                        engine->pmu.enable &= ~BIT(sample);
        }
 
-       GEM_BUG_ON(bit >= I915_PMU_MASK_BITS);
+       GEM_BUG_ON(bit >= ARRAY_SIZE(i915->pmu.enable_count));
        GEM_BUG_ON(i915->pmu.enable_count[bit] == 0);
        /*
         * Decrement the reference count and clear the enabled
index 7f164ca..b3728c5 100644 (file)
@@ -31,6 +31,8 @@ enum {
        ((1 << I915_PMU_SAMPLE_BITS) + \
         (I915_PMU_LAST + 1 - __I915_PMU_OTHER(0)))
 
+#define I915_ENGINE_SAMPLE_COUNT (I915_SAMPLE_SEMA + 1)
+
 struct i915_pmu_sample {
        u64 cur;
 };
index 0a7d605..067054c 100644 (file)
@@ -1790,7 +1790,7 @@ enum i915_power_well_id {
 #define _CNL_PORT_TX_C_LN0_OFFSET              0x162C40
 #define _CNL_PORT_TX_D_LN0_OFFSET              0x162E40
 #define _CNL_PORT_TX_F_LN0_OFFSET              0x162840
-#define _CNL_PORT_TX_DW_GRP(port, dw)  (_PICK((port), \
+#define _CNL_PORT_TX_DW_GRP(dw, port)  (_PICK((port), \
                                               _CNL_PORT_TX_AE_GRP_OFFSET, \
                                               _CNL_PORT_TX_B_GRP_OFFSET, \
                                               _CNL_PORT_TX_B_GRP_OFFSET, \
@@ -1798,7 +1798,7 @@ enum i915_power_well_id {
                                               _CNL_PORT_TX_AE_GRP_OFFSET, \
                                               _CNL_PORT_TX_F_GRP_OFFSET) + \
                                               4 * (dw))
-#define _CNL_PORT_TX_DW_LN0(port, dw)  (_PICK((port), \
+#define _CNL_PORT_TX_DW_LN0(dw, port)  (_PICK((port), \
                                               _CNL_PORT_TX_AE_LN0_OFFSET, \
                                               _CNL_PORT_TX_B_LN0_OFFSET, \
                                               _CNL_PORT_TX_B_LN0_OFFSET, \
@@ -1834,9 +1834,9 @@ enum i915_power_well_id {
 
 #define _CNL_PORT_TX_DW4_LN0_AE                0x162450
 #define _CNL_PORT_TX_DW4_LN1_AE                0x1624D0
-#define CNL_PORT_TX_DW4_GRP(port)      _MMIO(_CNL_PORT_TX_DW_GRP((port), 4))
-#define CNL_PORT_TX_DW4_LN0(port)      _MMIO(_CNL_PORT_TX_DW_LN0((port), 4))
-#define CNL_PORT_TX_DW4_LN(port, ln)   _MMIO(_CNL_PORT_TX_DW_LN0((port), 4) + \
+#define CNL_PORT_TX_DW4_GRP(port)      _MMIO(_CNL_PORT_TX_DW_GRP(4, (port)))
+#define CNL_PORT_TX_DW4_LN0(port)      _MMIO(_CNL_PORT_TX_DW_LN0(4, (port)))
+#define CNL_PORT_TX_DW4_LN(port, ln)   _MMIO(_CNL_PORT_TX_DW_LN0(4, (port)) + \
                                           ((ln) * (_CNL_PORT_TX_DW4_LN1_AE - \
                                                    _CNL_PORT_TX_DW4_LN0_AE)))
 #define ICL_PORT_TX_DW4_AUX(port)      _MMIO(_ICL_PORT_TX_DW_AUX(4, port))
@@ -1864,8 +1864,12 @@ enum i915_power_well_id {
 #define   RTERM_SELECT(x)              ((x) << 3)
 #define   RTERM_SELECT_MASK            (0x7 << 3)
 
-#define CNL_PORT_TX_DW7_GRP(port)      _MMIO(_CNL_PORT_TX_DW_GRP((port), 7))
-#define CNL_PORT_TX_DW7_LN0(port)      _MMIO(_CNL_PORT_TX_DW_LN0((port), 7))
+#define CNL_PORT_TX_DW7_GRP(port)      _MMIO(_CNL_PORT_TX_DW_GRP(7, (port)))
+#define CNL_PORT_TX_DW7_LN0(port)      _MMIO(_CNL_PORT_TX_DW_LN0(7, (port)))
+#define ICL_PORT_TX_DW7_AUX(port)      _MMIO(_ICL_PORT_TX_DW_AUX(7, port))
+#define ICL_PORT_TX_DW7_GRP(port)      _MMIO(_ICL_PORT_TX_DW_GRP(7, port))
+#define ICL_PORT_TX_DW7_LN0(port)      _MMIO(_ICL_PORT_TX_DW_LN(7, 0, port))
+#define ICL_PORT_TX_DW7_LN(port, ln)   _MMIO(_ICL_PORT_TX_DW_LN(7, ln, port))
 #define   N_SCALAR(x)                  ((x) << 24)
 #define   N_SCALAR_MASK                        (0x7F << 24)
 
index f3e1d6a..7edce1b 100644 (file)
@@ -494,103 +494,58 @@ static const struct cnl_ddi_buf_trans cnl_ddi_translations_edp_1_05V[] = {
        { 0x2, 0x7F, 0x3F, 0x00, 0x00 },        /* 400   400      0.0   */
 };
 
-struct icl_combo_phy_ddi_buf_trans {
-       u32 dw2_swing_select;
-       u32 dw2_swing_scalar;
-       u32 dw4_scaling;
-};
-
-/* Voltage Swing Programming for VccIO 0.85V for DP */
-static const struct icl_combo_phy_ddi_buf_trans icl_combo_phy_ddi_translations_dp_hdmi_0_85V[] = {
-                               /* Voltage mV  db    */
-       { 0x2, 0x98, 0x0018 },  /* 400         0.0   */
-       { 0x2, 0x98, 0x3015 },  /* 400         3.5   */
-       { 0x2, 0x98, 0x6012 },  /* 400         6.0   */
-       { 0x2, 0x98, 0x900F },  /* 400         9.5   */
-       { 0xB, 0x70, 0x0018 },  /* 600         0.0   */
-       { 0xB, 0x70, 0x3015 },  /* 600         3.5   */
-       { 0xB, 0x70, 0x6012 },  /* 600         6.0   */
-       { 0x5, 0x00, 0x0018 },  /* 800         0.0   */
-       { 0x5, 0x00, 0x3015 },  /* 800         3.5   */
-       { 0x6, 0x98, 0x0018 },  /* 1200        0.0   */
-};
-
-/* FIXME - After table is updated in Bspec */
-/* Voltage Swing Programming for VccIO 0.85V for eDP */
-static const struct icl_combo_phy_ddi_buf_trans icl_combo_phy_ddi_translations_edp_0_85V[] = {
-                               /* Voltage mV  db    */
-       { 0x0, 0x00, 0x00 },    /* 200         0.0   */
-       { 0x0, 0x00, 0x00 },    /* 200         1.5   */
-       { 0x0, 0x00, 0x00 },    /* 200         4.0   */
-       { 0x0, 0x00, 0x00 },    /* 200         6.0   */
-       { 0x0, 0x00, 0x00 },    /* 250         0.0   */
-       { 0x0, 0x00, 0x00 },    /* 250         1.5   */
-       { 0x0, 0x00, 0x00 },    /* 250         4.0   */
-       { 0x0, 0x00, 0x00 },    /* 300         0.0   */
-       { 0x0, 0x00, 0x00 },    /* 300         1.5   */
-       { 0x0, 0x00, 0x00 },    /* 350         0.0   */
-};
-
-/* Voltage Swing Programming for VccIO 0.95V for DP */
-static const struct icl_combo_phy_ddi_buf_trans icl_combo_phy_ddi_translations_dp_hdmi_0_95V[] = {
-                               /* Voltage mV  db    */
-       { 0x2, 0x98, 0x0018 },  /* 400         0.0   */
-       { 0x2, 0x98, 0x3015 },  /* 400         3.5   */
-       { 0x2, 0x98, 0x6012 },  /* 400         6.0   */
-       { 0x2, 0x98, 0x900F },  /* 400         9.5   */
-       { 0x4, 0x98, 0x0018 },  /* 600         0.0   */
-       { 0x4, 0x98, 0x3015 },  /* 600         3.5   */
-       { 0x4, 0x98, 0x6012 },  /* 600         6.0   */
-       { 0x5, 0x76, 0x0018 },  /* 800         0.0   */
-       { 0x5, 0x76, 0x3015 },  /* 800         3.5   */
-       { 0x6, 0x98, 0x0018 },  /* 1200        0.0   */
+/* icl_combo_phy_ddi_translations */
+static const struct cnl_ddi_buf_trans icl_combo_phy_ddi_translations_dp_hbr2[] = {
+                                               /* NT mV Trans mV db    */
+       { 0xA, 0x35, 0x3F, 0x00, 0x00 },        /* 350   350      0.0   */
+       { 0xA, 0x4F, 0x37, 0x00, 0x08 },        /* 350   500      3.1   */
+       { 0xC, 0x71, 0x2F, 0x00, 0x10 },        /* 350   700      6.0   */
+       { 0x6, 0x7F, 0x2B, 0x00, 0x14 },        /* 350   900      8.2   */
+       { 0xA, 0x4C, 0x3F, 0x00, 0x00 },        /* 500   500      0.0   */
+       { 0xC, 0x73, 0x34, 0x00, 0x0B },        /* 500   700      2.9   */
+       { 0x6, 0x7F, 0x2F, 0x00, 0x10 },        /* 500   900      5.1   */
+       { 0xC, 0x6C, 0x3C, 0x00, 0x03 },        /* 650   700      0.6   */
+       { 0x6, 0x7F, 0x35, 0x00, 0x0A },        /* 600   900      3.5   */
+       { 0x6, 0x7F, 0x3F, 0x00, 0x00 },        /* 900   900      0.0   */
 };
 
-/* FIXME - After table is updated in Bspec */
-/* Voltage Swing Programming for VccIO 0.95V for eDP */
-static const struct icl_combo_phy_ddi_buf_trans icl_combo_phy_ddi_translations_edp_0_95V[] = {
-                               /* Voltage mV  db    */
-       { 0x0, 0x00, 0x00 },    /* 200         0.0   */
-       { 0x0, 0x00, 0x00 },    /* 200         1.5   */
-       { 0x0, 0x00, 0x00 },    /* 200         4.0   */
-       { 0x0, 0x00, 0x00 },    /* 200         6.0   */
-       { 0x0, 0x00, 0x00 },    /* 250         0.0   */
-       { 0x0, 0x00, 0x00 },    /* 250         1.5   */
-       { 0x0, 0x00, 0x00 },    /* 250         4.0   */
-       { 0x0, 0x00, 0x00 },    /* 300         0.0   */
-       { 0x0, 0x00, 0x00 },    /* 300         1.5   */
-       { 0x0, 0x00, 0x00 },    /* 350         0.0   */
+static const struct cnl_ddi_buf_trans icl_combo_phy_ddi_translations_edp_hbr2[] = {
+                                               /* NT mV Trans mV db    */
+       { 0x0, 0x7F, 0x3F, 0x00, 0x00 },        /* 200   200      0.0   */
+       { 0x8, 0x7F, 0x38, 0x00, 0x07 },        /* 200   250      1.9   */
+       { 0x1, 0x7F, 0x33, 0x00, 0x0C },        /* 200   300      3.5   */
+       { 0x9, 0x7F, 0x31, 0x00, 0x0E },        /* 200   350      4.9   */
+       { 0x8, 0x7F, 0x3F, 0x00, 0x00 },        /* 250   250      0.0   */
+       { 0x1, 0x7F, 0x38, 0x00, 0x07 },        /* 250   300      1.6   */
+       { 0x9, 0x7F, 0x35, 0x00, 0x0A },        /* 250   350      2.9   */
+       { 0x1, 0x7F, 0x3F, 0x00, 0x00 },        /* 300   300      0.0   */
+       { 0x9, 0x7F, 0x38, 0x00, 0x07 },        /* 300   350      1.3   */
+       { 0x9, 0x7F, 0x3F, 0x00, 0x00 },        /* 350   350      0.0   */
 };
 
-/* Voltage Swing Programming for VccIO 1.05V for DP */
-static const struct icl_combo_phy_ddi_buf_trans icl_combo_phy_ddi_translations_dp_hdmi_1_05V[] = {
-                               /* Voltage mV  db    */
-       { 0x2, 0x98, 0x0018 },  /* 400         0.0   */
-       { 0x2, 0x98, 0x3015 },  /* 400         3.5   */
-       { 0x2, 0x98, 0x6012 },  /* 400         6.0   */
-       { 0x2, 0x98, 0x900F },  /* 400         9.5   */
-       { 0x4, 0x98, 0x0018 },  /* 600         0.0   */
-       { 0x4, 0x98, 0x3015 },  /* 600         3.5   */
-       { 0x4, 0x98, 0x6012 },  /* 600         6.0   */
-       { 0x5, 0x71, 0x0018 },  /* 800         0.0   */
-       { 0x5, 0x71, 0x3015 },  /* 800         3.5   */
-       { 0x6, 0x98, 0x0018 },  /* 1200        0.0   */
+static const struct cnl_ddi_buf_trans icl_combo_phy_ddi_translations_edp_hbr3[] = {
+                                               /* NT mV Trans mV db    */
+       { 0xA, 0x35, 0x3F, 0x00, 0x00 },        /* 350   350      0.0   */
+       { 0xA, 0x4F, 0x37, 0x00, 0x08 },        /* 350   500      3.1   */
+       { 0xC, 0x71, 0x2F, 0x00, 0x10 },        /* 350   700      6.0   */
+       { 0x6, 0x7F, 0x2B, 0x00, 0x14 },        /* 350   900      8.2   */
+       { 0xA, 0x4C, 0x3F, 0x00, 0x00 },        /* 500   500      0.0   */
+       { 0xC, 0x73, 0x34, 0x00, 0x0B },        /* 500   700      2.9   */
+       { 0x6, 0x7F, 0x2F, 0x00, 0x10 },        /* 500   900      5.1   */
+       { 0xC, 0x6C, 0x3C, 0x00, 0x03 },        /* 650   700      0.6   */
+       { 0x6, 0x7F, 0x35, 0x00, 0x0A },        /* 600   900      3.5   */
+       { 0x6, 0x7F, 0x3F, 0x00, 0x00 },        /* 900   900      0.0   */
 };
 
-/* FIXME - After table is updated in Bspec */
-/* Voltage Swing Programming for VccIO 1.05V for eDP */
-static const struct icl_combo_phy_ddi_buf_trans icl_combo_phy_ddi_translations_edp_1_05V[] = {
-                               /* Voltage mV  db    */
-       { 0x0, 0x00, 0x00 },    /* 200         0.0   */
-       { 0x0, 0x00, 0x00 },    /* 200         1.5   */
-       { 0x0, 0x00, 0x00 },    /* 200         4.0   */
-       { 0x0, 0x00, 0x00 },    /* 200         6.0   */
-       { 0x0, 0x00, 0x00 },    /* 250         0.0   */
-       { 0x0, 0x00, 0x00 },    /* 250         1.5   */
-       { 0x0, 0x00, 0x00 },    /* 250         4.0   */
-       { 0x0, 0x00, 0x00 },    /* 300         0.0   */
-       { 0x0, 0x00, 0x00 },    /* 300         1.5   */
-       { 0x0, 0x00, 0x00 },    /* 350         0.0   */
+static const struct cnl_ddi_buf_trans icl_combo_phy_ddi_translations_hdmi[] = {
+                                               /* NT mV Trans mV db    */
+       { 0xA, 0x60, 0x3F, 0x00, 0x00 },        /* 450   450      0.0   */
+       { 0xB, 0x73, 0x36, 0x00, 0x09 },        /* 450   650      3.2   */
+       { 0x6, 0x7F, 0x31, 0x00, 0x0E },        /* 450   850      5.5   */
+       { 0xB, 0x73, 0x3F, 0x00, 0x00 },        /* 650   650      0.0   ALS */
+       { 0x6, 0x7F, 0x37, 0x00, 0x08 },        /* 650   850      2.3   */
+       { 0x6, 0x7F, 0x3F, 0x00, 0x00 },        /* 850   850      0.0   */
+       { 0x6, 0x7F, 0x35, 0x00, 0x0A },        /* 600   850      3.0   */
 };
 
 struct icl_mg_phy_ddi_buf_trans {
@@ -871,43 +826,23 @@ cnl_get_buf_trans_edp(struct drm_i915_private *dev_priv, int *n_entries)
        }
 }
 
-static const struct icl_combo_phy_ddi_buf_trans *
+static const struct cnl_ddi_buf_trans *
 icl_get_combo_buf_trans(struct drm_i915_private *dev_priv, enum port port,
-                       int type, int *n_entries)
+                       int type, int rate, int *n_entries)
 {
-       u32 voltage = I915_READ(ICL_PORT_COMP_DW3(port)) & VOLTAGE_INFO_MASK;
-
-       if (type == INTEL_OUTPUT_EDP && dev_priv->vbt.edp.low_vswing) {
-               switch (voltage) {
-               case VOLTAGE_INFO_0_85V:
-                       *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_0_85V);
-                       return icl_combo_phy_ddi_translations_edp_0_85V;
-               case VOLTAGE_INFO_0_95V:
-                       *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_0_95V);
-                       return icl_combo_phy_ddi_translations_edp_0_95V;
-               case VOLTAGE_INFO_1_05V:
-                       *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_1_05V);
-                       return icl_combo_phy_ddi_translations_edp_1_05V;
-               default:
-                       MISSING_CASE(voltage);
-                       return NULL;
-               }
-       } else {
-               switch (voltage) {
-               case VOLTAGE_INFO_0_85V:
-                       *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_dp_hdmi_0_85V);
-                       return icl_combo_phy_ddi_translations_dp_hdmi_0_85V;
-               case VOLTAGE_INFO_0_95V:
-                       *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_dp_hdmi_0_95V);
-                       return icl_combo_phy_ddi_translations_dp_hdmi_0_95V;
-               case VOLTAGE_INFO_1_05V:
-                       *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_dp_hdmi_1_05V);
-                       return icl_combo_phy_ddi_translations_dp_hdmi_1_05V;
-               default:
-                       MISSING_CASE(voltage);
-                       return NULL;
-               }
+       if (type == INTEL_OUTPUT_HDMI) {
+               *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_hdmi);
+               return icl_combo_phy_ddi_translations_hdmi;
+       } else if (rate > 540000 && type == INTEL_OUTPUT_EDP) {
+               *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_hbr3);
+               return icl_combo_phy_ddi_translations_edp_hbr3;
+       } else if (type == INTEL_OUTPUT_EDP && dev_priv->vbt.edp.low_vswing) {
+               *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_hbr2);
+               return icl_combo_phy_ddi_translations_edp_hbr2;
        }
+
+       *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_dp_hbr2);
+       return icl_combo_phy_ddi_translations_dp_hbr2;
 }
 
 static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port port)
@@ -918,8 +853,8 @@ static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port por
 
        if (IS_ICELAKE(dev_priv)) {
                if (intel_port_is_combophy(dev_priv, port))
-                       icl_get_combo_buf_trans(dev_priv, port,
-                                               INTEL_OUTPUT_HDMI, &n_entries);
+                       icl_get_combo_buf_trans(dev_priv, port, INTEL_OUTPUT_HDMI,
+                                               0, &n_entries);
                else
                        n_entries = ARRAY_SIZE(icl_mg_phy_ddi_translations);
                default_entry = n_entries - 1;
@@ -1086,7 +1021,7 @@ static uint32_t icl_pll_to_ddi_pll_sel(struct intel_encoder *encoder,
                        return DDI_CLK_SEL_TBT_810;
                default:
                        MISSING_CASE(clock);
-                       break;
+                       return DDI_CLK_SEL_NONE;
                }
        case DPLL_ID_ICL_MGPLL1:
        case DPLL_ID_ICL_MGPLL2:
@@ -2275,13 +2210,14 @@ static void bxt_ddi_vswing_sequence(struct intel_encoder *encoder,
 u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder)
 {
        struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
+       struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
        enum port port = encoder->port;
        int n_entries;
 
        if (IS_ICELAKE(dev_priv)) {
                if (intel_port_is_combophy(dev_priv, port))
                        icl_get_combo_buf_trans(dev_priv, port, encoder->type,
-                                               &n_entries);
+                                               intel_dp->link_rate, &n_entries);
                else
                        n_entries = ARRAY_SIZE(icl_mg_phy_ddi_translations);
        } else if (IS_CANNONLAKE(dev_priv)) {
@@ -2462,14 +2398,15 @@ static void cnl_ddi_vswing_sequence(struct intel_encoder *encoder,
 }
 
 static void icl_ddi_combo_vswing_program(struct drm_i915_private *dev_priv,
-                                        u32 level, enum port port, int type)
+                                       u32 level, enum port port, int type,
+                                       int rate)
 {
-       const struct icl_combo_phy_ddi_buf_trans *ddi_translations = NULL;
+       const struct cnl_ddi_buf_trans *ddi_translations = NULL;
        u32 n_entries, val;
        int ln;
 
        ddi_translations = icl_get_combo_buf_trans(dev_priv, port, type,
-                                                  &n_entries);
+                                                  rate, &n_entries);
        if (!ddi_translations)
                return;
 
@@ -2478,34 +2415,23 @@ static void icl_ddi_combo_vswing_program(struct drm_i915_private *dev_priv,
                level = n_entries - 1;
        }
 
-       /* Set PORT_TX_DW5 Rterm Sel to 110b. */
+       /* Set PORT_TX_DW5 */
        val = I915_READ(ICL_PORT_TX_DW5_LN0(port));
-       val &= ~RTERM_SELECT_MASK;
+       val &= ~(SCALING_MODE_SEL_MASK | RTERM_SELECT_MASK |
+                 TAP2_DISABLE | TAP3_DISABLE);
+       val |= SCALING_MODE_SEL(0x2);
        val |= RTERM_SELECT(0x6);
-       I915_WRITE(ICL_PORT_TX_DW5_GRP(port), val);
-
-       /* Program PORT_TX_DW5 */
-       val = I915_READ(ICL_PORT_TX_DW5_LN0(port));
-       /* Set DisableTap2 and DisableTap3 if MIPI DSI
-        * Clear DisableTap2 and DisableTap3 for all other Ports
-        */
-       if (type == INTEL_OUTPUT_DSI) {
-               val |= TAP2_DISABLE;
-               val |= TAP3_DISABLE;
-       } else {
-               val &= ~TAP2_DISABLE;
-               val &= ~TAP3_DISABLE;
-       }
+       val |= TAP3_DISABLE;
        I915_WRITE(ICL_PORT_TX_DW5_GRP(port), val);
 
        /* Program PORT_TX_DW2 */
        val = I915_READ(ICL_PORT_TX_DW2_LN0(port));
        val &= ~(SWING_SEL_LOWER_MASK | SWING_SEL_UPPER_MASK |
                 RCOMP_SCALAR_MASK);
-       val |= SWING_SEL_UPPER(ddi_translations[level].dw2_swing_select);
-       val |= SWING_SEL_LOWER(ddi_translations[level].dw2_swing_select);
+       val |= SWING_SEL_UPPER(ddi_translations[level].dw2_swing_sel);
+       val |= SWING_SEL_LOWER(ddi_translations[level].dw2_swing_sel);
        /* Program Rcomp scalar for every table entry */
-       val |= RCOMP_SCALAR(ddi_translations[level].dw2_swing_scalar);
+       val |= RCOMP_SCALAR(0x98);
        I915_WRITE(ICL_PORT_TX_DW2_GRP(port), val);
 
        /* Program PORT_TX_DW4 */
@@ -2514,9 +2440,17 @@ static void icl_ddi_combo_vswing_program(struct drm_i915_private *dev_priv,
                val = I915_READ(ICL_PORT_TX_DW4_LN(port, ln));
                val &= ~(POST_CURSOR_1_MASK | POST_CURSOR_2_MASK |
                         CURSOR_COEFF_MASK);
-               val |= ddi_translations[level].dw4_scaling;
+               val |= POST_CURSOR_1(ddi_translations[level].dw4_post_cursor_1);
+               val |= POST_CURSOR_2(ddi_translations[level].dw4_post_cursor_2);
+               val |= CURSOR_COEFF(ddi_translations[level].dw4_cursor_coeff);
                I915_WRITE(ICL_PORT_TX_DW4_LN(port, ln), val);
        }
+
+       /* Program PORT_TX_DW7 */
+       val = I915_READ(ICL_PORT_TX_DW7_LN0(port));
+       val &= ~N_SCALAR_MASK;
+       val |= N_SCALAR(ddi_translations[level].dw7_n_scalar);
+       I915_WRITE(ICL_PORT_TX_DW7_GRP(port), val);
 }
 
 static void icl_combo_phy_ddi_vswing_sequence(struct intel_encoder *encoder,
@@ -2581,7 +2515,7 @@ static void icl_combo_phy_ddi_vswing_sequence(struct intel_encoder *encoder,
        I915_WRITE(ICL_PORT_TX_DW5_GRP(port), val);
 
        /* 5. Program swing and de-emphasis */
-       icl_ddi_combo_vswing_program(dev_priv, level, port, type);
+       icl_ddi_combo_vswing_program(dev_priv, level, port, type, rate);
 
        /* 6. Set training enable to trigger update */
        val = I915_READ(ICL_PORT_TX_DW5_LN0(port));
index 3da9c0f..2481281 100644 (file)
@@ -15415,16 +15415,45 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc,
        }
 }
 
+static bool has_bogus_dpll_config(const struct intel_crtc_state *crtc_state)
+{
+       struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+
+       /*
+        * Some SNB BIOSen (eg. ASUS K53SV) are known to misprogram
+        * the hardware when a high res displays plugged in. DPLL P
+        * divider is zero, and the pipe timings are bonkers. We'll
+        * try to disable everything in that case.
+        *
+        * FIXME would be nice to be able to sanitize this state
+        * without several WARNs, but for now let's take the easy
+        * road.
+        */
+       return IS_GEN6(dev_priv) &&
+               crtc_state->base.active &&
+               crtc_state->shared_dpll &&
+               crtc_state->port_clock == 0;
+}
+
 static void intel_sanitize_encoder(struct intel_encoder *encoder)
 {
        struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
        struct intel_connector *connector;
+       struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc);
+       struct intel_crtc_state *crtc_state = crtc ?
+               to_intel_crtc_state(crtc->base.state) : NULL;
 
        /* We need to check both for a crtc link (meaning that the
         * encoder is active and trying to read from a pipe) and the
         * pipe itself being active. */
-       bool has_active_crtc = encoder->base.crtc &&
-               to_intel_crtc(encoder->base.crtc)->active;
+       bool has_active_crtc = crtc_state &&
+               crtc_state->base.active;
+
+       if (crtc_state && has_bogus_dpll_config(crtc_state)) {
+               DRM_DEBUG_KMS("BIOS has misprogrammed the hardware. Disabling pipe %c\n",
+                             pipe_name(crtc->pipe));
+               has_active_crtc = false;
+       }
 
        connector = intel_encoder_find_connector(encoder);
        if (connector && !has_active_crtc) {
@@ -15435,16 +15464,25 @@ static void intel_sanitize_encoder(struct intel_encoder *encoder)
                /* Connector is active, but has no active pipe. This is
                 * fallout from our resume register restoring. Disable
                 * the encoder manually again. */
-               if (encoder->base.crtc) {
-                       struct drm_crtc_state *crtc_state = encoder->base.crtc->state;
+               if (crtc_state) {
+                       struct drm_encoder *best_encoder;
 
                        DRM_DEBUG_KMS("[ENCODER:%d:%s] manually disabled\n",
                                      encoder->base.base.id,
                                      encoder->base.name);
+
+                       /* avoid oopsing in case the hooks consult best_encoder */
+                       best_encoder = connector->base.state->best_encoder;
+                       connector->base.state->best_encoder = &encoder->base;
+
                        if (encoder->disable)
-                               encoder->disable(encoder, to_intel_crtc_state(crtc_state), connector->base.state);
+                               encoder->disable(encoder, crtc_state,
+                                                connector->base.state);
                        if (encoder->post_disable)
-                               encoder->post_disable(encoder, to_intel_crtc_state(crtc_state), connector->base.state);
+                               encoder->post_disable(encoder, crtc_state,
+                                                     connector->base.state);
+
+                       connector->base.state->best_encoder = best_encoder;
                }
                encoder->base.crtc = NULL;
 
index fdd2cbc..22a7460 100644 (file)
@@ -304,9 +304,11 @@ static int cnl_max_source_rate(struct intel_dp *intel_dp)
 static int icl_max_source_rate(struct intel_dp *intel_dp)
 {
        struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
+       struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev);
        enum port port = dig_port->base.port;
 
-       if (port == PORT_B)
+       if (intel_port_is_combophy(dev_priv, port) &&
+           !intel_dp_is_edp(intel_dp))
                return 540000;
 
        return 810000;
index f94a04b..e9ddeaf 100644 (file)
@@ -209,6 +209,16 @@ struct intel_fbdev {
        unsigned long vma_flags;
        async_cookie_t cookie;
        int preferred_bpp;
+
+       /* Whether or not fbdev hpd processing is temporarily suspended */
+       bool hpd_suspended : 1;
+       /* Set when a hotplug was received while HPD processing was
+        * suspended
+        */
+       bool hpd_waiting : 1;
+
+       /* Protects hpd_suspended */
+       struct mutex hpd_lock;
 };
 
 struct intel_encoder {
index fb5bb5b..7f365ac 100644 (file)
@@ -679,6 +679,7 @@ int intel_fbdev_init(struct drm_device *dev)
        if (ifbdev == NULL)
                return -ENOMEM;
 
+       mutex_init(&ifbdev->hpd_lock);
        drm_fb_helper_prepare(dev, &ifbdev->helper, &intel_fb_helper_funcs);
 
        if (!intel_fbdev_init_bios(dev, ifbdev))
@@ -752,6 +753,26 @@ void intel_fbdev_fini(struct drm_i915_private *dev_priv)
        intel_fbdev_destroy(ifbdev);
 }
 
+/* Suspends/resumes fbdev processing of incoming HPD events. When resuming HPD
+ * processing, fbdev will perform a full connector reprobe if a hotplug event
+ * was received while HPD was suspended.
+ */
+static void intel_fbdev_hpd_set_suspend(struct intel_fbdev *ifbdev, int state)
+{
+       bool send_hpd = false;
+
+       mutex_lock(&ifbdev->hpd_lock);
+       ifbdev->hpd_suspended = state == FBINFO_STATE_SUSPENDED;
+       send_hpd = !ifbdev->hpd_suspended && ifbdev->hpd_waiting;
+       ifbdev->hpd_waiting = false;
+       mutex_unlock(&ifbdev->hpd_lock);
+
+       if (send_hpd) {
+               DRM_DEBUG_KMS("Handling delayed fbcon HPD event\n");
+               drm_fb_helper_hotplug_event(&ifbdev->helper);
+       }
+}
+
 void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous)
 {
        struct drm_i915_private *dev_priv = to_i915(dev);
@@ -773,6 +794,7 @@ void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous
                 */
                if (state != FBINFO_STATE_RUNNING)
                        flush_work(&dev_priv->fbdev_suspend_work);
+
                console_lock();
        } else {
                /*
@@ -800,17 +822,26 @@ void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous
 
        drm_fb_helper_set_suspend(&ifbdev->helper, state);
        console_unlock();
+
+       intel_fbdev_hpd_set_suspend(ifbdev, state);
 }
 
 void intel_fbdev_output_poll_changed(struct drm_device *dev)
 {
        struct intel_fbdev *ifbdev = to_i915(dev)->fbdev;
+       bool send_hpd;
 
        if (!ifbdev)
                return;
 
        intel_fbdev_sync(ifbdev);
-       if (ifbdev->vma || ifbdev->helper.deferred_setup)
+
+       mutex_lock(&ifbdev->hpd_lock);
+       send_hpd = !ifbdev->hpd_suspended;
+       ifbdev->hpd_waiting = true;
+       mutex_unlock(&ifbdev->hpd_lock);
+
+       if (send_hpd && (ifbdev->vma || ifbdev->helper.deferred_setup))
                drm_fb_helper_hotplug_event(&ifbdev->helper);
 }
 
index b8f106d..3ac2015 100644 (file)
 struct opregion_header {
        u8 signature[16];
        u32 size;
-       u32 opregion_ver;
+       struct {
+               u8 rsvd;
+               u8 revision;
+               u8 minor;
+               u8 major;
+       }  __packed over;
        u8 bios_ver[32];
        u8 vbios_ver[16];
        u8 driver_ver[16];
@@ -119,7 +124,8 @@ struct opregion_asle {
        u64 fdss;
        u32 fdsp;
        u32 stat;
-       u64 rvda;       /* Physical address of raw vbt data */
+       u64 rvda;       /* Physical (2.0) or relative from opregion (2.1+)
+                        * address of raw VBT data. */
        u32 rvds;       /* Size of raw vbt data */
        u8 rsvd[58];
 } __packed;
@@ -925,6 +931,11 @@ int intel_opregion_setup(struct drm_i915_private *dev_priv)
        opregion->header = base;
        opregion->lid_state = base + ACPI_CLID;
 
+       DRM_DEBUG_DRIVER("ACPI OpRegion version %u.%u.%u\n",
+                        opregion->header->over.major,
+                        opregion->header->over.minor,
+                        opregion->header->over.revision);
+
        mboxes = opregion->header->mboxes;
        if (mboxes & MBOX_ACPI) {
                DRM_DEBUG_DRIVER("Public ACPI methods supported\n");
@@ -953,11 +964,26 @@ int intel_opregion_setup(struct drm_i915_private *dev_priv)
        if (dmi_check_system(intel_no_opregion_vbt))
                goto out;
 
-       if (opregion->header->opregion_ver >= 2 && opregion->asle &&
+       if (opregion->header->over.major >= 2 && opregion->asle &&
            opregion->asle->rvda && opregion->asle->rvds) {
-               opregion->rvda = memremap(opregion->asle->rvda,
-                                         opregion->asle->rvds,
+               resource_size_t rvda = opregion->asle->rvda;
+
+               /*
+                * opregion 2.0: rvda is the physical VBT address.
+                *
+                * opregion 2.1+: rvda is unsigned, relative offset from
+                * opregion base, and should never point within opregion.
+                */
+               if (opregion->header->over.major > 2 ||
+                   opregion->header->over.minor >= 1) {
+                       WARN_ON(rvda < OPREGION_SIZE);
+
+                       rvda += asls;
+               }
+
+               opregion->rvda = memremap(rvda, opregion->asle->rvds,
                                          MEMREMAP_WB);
+
                vbt = opregion->rvda;
                vbt_size = opregion->asle->rvds;
                if (intel_bios_is_valid_vbt(vbt, vbt_size)) {
@@ -967,6 +993,8 @@ int intel_opregion_setup(struct drm_i915_private *dev_priv)
                        goto out;
                } else {
                        DRM_DEBUG_KMS("Invalid VBT in ACPI OpRegion (RVDA)\n");
+                       memunmap(opregion->rvda);
+                       opregion->rvda = NULL;
                }
        }
 
index 72edaa7..a1a7cc2 100644 (file)
@@ -415,16 +415,17 @@ struct intel_engine_cs {
                /**
                 * @enable_count: Reference count for the enabled samplers.
                 *
-                * Index number corresponds to the bit number from @enable.
+                * Index number corresponds to @enum drm_i915_pmu_engine_sample.
                 */
-               unsigned int enable_count[I915_PMU_SAMPLE_BITS];
+               unsigned int enable_count[I915_ENGINE_SAMPLE_COUNT];
                /**
                 * @sample: Counter values for sampling events.
                 *
                 * Our internal timer stores the current counters in this field.
+                *
+                * Index number corresponds to @enum drm_i915_pmu_engine_sample.
                 */
-#define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_SEMA + 1)
-               struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_MAX];
+               struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_COUNT];
        } pmu;
 
        /*
index d2e003d..5170a0f 100644 (file)
@@ -494,7 +494,7 @@ skl_program_plane(struct intel_plane *plane,
 
        keymax = (key->max_value & 0xffffff) | PLANE_KEYMAX_ALPHA(alpha);
 
-       keymsk = key->channel_mask & 0x3ffffff;
+       keymsk = key->channel_mask & 0x7ffffff;
        if (alpha < 0xff)
                keymsk |= PLANE_KEYMSK_ALPHA_ENABLE;
 
index 2c5bbe3..e31e263 100644 (file)
@@ -643,8 +643,10 @@ static int imx_ldb_bind(struct device *dev, struct device *master, void *data)
                int bus_format;
 
                ret = of_property_read_u32(child, "reg", &i);
-               if (ret || i < 0 || i > 1)
-                       return -EINVAL;
+               if (ret || i < 0 || i > 1) {
+                       ret = -EINVAL;
+                       goto free_child;
+               }
 
                if (!of_device_is_available(child))
                        continue;
@@ -657,7 +659,6 @@ static int imx_ldb_bind(struct device *dev, struct device *master, void *data)
                channel = &imx_ldb->channel[i];
                channel->ldb = imx_ldb;
                channel->chno = i;
-               channel->child = child;
 
                /*
                 * The output port is port@4 with an external 4-port mux or
@@ -667,13 +668,13 @@ static int imx_ldb_bind(struct device *dev, struct device *master, void *data)
                                                  imx_ldb->lvds_mux ? 4 : 2, 0,
                                                  &channel->panel, &channel->bridge);
                if (ret && ret != -ENODEV)
-                       return ret;
+                       goto free_child;
 
                /* panel ddc only if there is no bridge */
                if (!channel->bridge) {
                        ret = imx_ldb_panel_ddc(dev, channel, child);
                        if (ret)
-                               return ret;
+                               goto free_child;
                }
 
                bus_format = of_get_bus_format(dev, child);
@@ -689,18 +690,26 @@ static int imx_ldb_bind(struct device *dev, struct device *master, void *data)
                if (bus_format < 0) {
                        dev_err(dev, "could not determine data mapping: %d\n",
                                bus_format);
-                       return bus_format;
+                       ret = bus_format;
+                       goto free_child;
                }
                channel->bus_format = bus_format;
+               channel->child = child;
 
                ret = imx_ldb_register(drm, channel);
-               if (ret)
-                       return ret;
+               if (ret) {
+                       channel->child = NULL;
+                       goto free_child;
+               }
        }
 
        dev_set_drvdata(dev, imx_ldb);
 
        return 0;
+
+free_child:
+       of_node_put(child);
+       return ret;
 }
 
 static void imx_ldb_unbind(struct device *dev, struct device *master,
index c390924..21e964f 100644 (file)
@@ -370,9 +370,9 @@ static int ipu_plane_atomic_check(struct drm_plane *plane,
        if (ret)
                return ret;
 
-       /* CRTC should be enabled */
+       /* nothing to check when disabling or disabled */
        if (!crtc_state->enable)
-               return -EINVAL;
+               return 0;
 
        switch (plane->type) {
        case DRM_PLANE_TYPE_PRIMARY:
index 00a9c2a..64fb788 100644 (file)
@@ -1406,7 +1406,7 @@ static void dsi_pll_disable(struct dss_pll *pll)
 
 static int dsi_dump_dsi_clocks(struct seq_file *s, void *p)
 {
-       struct dsi_data *dsi = p;
+       struct dsi_data *dsi = s->private;
        struct dss_pll_clock_info *cinfo = &dsi->pll.cinfo;
        enum dss_clk_source dispc_clk_src, dsi_clk_src;
        int dsi_module = dsi->module_id;
@@ -1467,7 +1467,7 @@ static int dsi_dump_dsi_clocks(struct seq_file *s, void *p)
 #ifdef CONFIG_OMAP2_DSS_COLLECT_IRQ_STATS
 static int dsi_dump_dsi_irqs(struct seq_file *s, void *p)
 {
-       struct dsi_data *dsi = p;
+       struct dsi_data *dsi = s->private;
        unsigned long flags;
        struct dsi_irq_stats stats;
 
@@ -1558,7 +1558,7 @@ static int dsi_dump_dsi_irqs(struct seq_file *s, void *p)
 
 static int dsi_dump_dsi_regs(struct seq_file *s, void *p)
 {
-       struct dsi_data *dsi = p;
+       struct dsi_data *dsi = s->private;
 
        if (dsi_runtime_get(dsi))
                return 0;
@@ -4751,6 +4751,17 @@ static int dsi_set_config(struct omap_dss_device *dssdev,
        dsi->vm.flags |= DISPLAY_FLAGS_HSYNC_HIGH;
        dsi->vm.flags &= ~DISPLAY_FLAGS_VSYNC_LOW;
        dsi->vm.flags |= DISPLAY_FLAGS_VSYNC_HIGH;
+       /*
+        * HACK: These flags should be handled through the omap_dss_device bus
+        * flags, but this will only be possible when the DSI encoder will be
+        * converted to the omapdrm-managed encoder model.
+        */
+       dsi->vm.flags &= ~DISPLAY_FLAGS_PIXDATA_NEGEDGE;
+       dsi->vm.flags |= DISPLAY_FLAGS_PIXDATA_POSEDGE;
+       dsi->vm.flags &= ~DISPLAY_FLAGS_DE_LOW;
+       dsi->vm.flags |= DISPLAY_FLAGS_DE_HIGH;
+       dsi->vm.flags &= ~DISPLAY_FLAGS_SYNC_POSEDGE;
+       dsi->vm.flags |= DISPLAY_FLAGS_SYNC_NEGEDGE;
 
        dss_mgr_set_timings(&dsi->output, &dsi->vm);
 
@@ -5083,15 +5094,15 @@ static int dsi_bind(struct device *dev, struct device *master, void *data)
 
        snprintf(name, sizeof(name), "dsi%u_regs", dsi->module_id + 1);
        dsi->debugfs.regs = dss_debugfs_create_file(dss, name,
-                                                   dsi_dump_dsi_regs, &dsi);
+                                                   dsi_dump_dsi_regs, dsi);
 #ifdef CONFIG_OMAP2_DSS_COLLECT_IRQ_STATS
        snprintf(name, sizeof(name), "dsi%u_irqs", dsi->module_id + 1);
        dsi->debugfs.irqs = dss_debugfs_create_file(dss, name,
-                                                   dsi_dump_dsi_irqs, &dsi);
+                                                   dsi_dump_dsi_irqs, dsi);
 #endif
        snprintf(name, sizeof(name), "dsi%u_clks", dsi->module_id + 1);
        dsi->debugfs.clks = dss_debugfs_create_file(dss, name,
-                                                   dsi_dump_dsi_clocks, &dsi);
+                                                   dsi_dump_dsi_clocks, dsi);
 
        return 0;
 }
@@ -5104,8 +5115,6 @@ static void dsi_unbind(struct device *dev, struct device *master, void *data)
        dss_debugfs_remove_file(dsi->debugfs.irqs);
        dss_debugfs_remove_file(dsi->debugfs.regs);
 
-       of_platform_depopulate(dev);
-
        WARN_ON(dsi->scp_clk_refcount > 0);
 
        dss_pll_unregister(&dsi->pll);
@@ -5457,6 +5466,8 @@ static int dsi_remove(struct platform_device *pdev)
 
        dsi_uninit_output(dsi);
 
+       of_platform_depopulate(&pdev->dev);
+
        pm_runtime_disable(&pdev->dev);
 
        if (dsi->vdds_dsi_reg != NULL && dsi->vdds_dsi_enabled) {
index d587779..a97294a 100644 (file)
@@ -5676,7 +5676,7 @@ int ci_dpm_init(struct radeon_device *rdev)
        u16 data_offset, size;
        u8 frev, crev;
        struct ci_power_info *pi;
-       enum pci_bus_speed speed_cap;
+       enum pci_bus_speed speed_cap = PCI_SPEED_UNKNOWN;
        struct pci_dev *root = rdev->pdev->bus->self;
        int ret;
 
@@ -5685,7 +5685,8 @@ int ci_dpm_init(struct radeon_device *rdev)
                return -ENOMEM;
        rdev->pm.dpm.priv = pi;
 
-       speed_cap = pcie_get_speed_cap(root);
+       if (!pci_is_root_bus(rdev->pdev->bus))
+               speed_cap = pcie_get_speed_cap(root);
        if (speed_cap == PCI_SPEED_UNKNOWN) {
                pi->sys_pcie_mask = 0;
        } else {
index 8fb60b3..0a785ef 100644 (file)
@@ -6899,7 +6899,7 @@ int si_dpm_init(struct radeon_device *rdev)
        struct ni_power_info *ni_pi;
        struct si_power_info *si_pi;
        struct atom_clock_dividers dividers;
-       enum pci_bus_speed speed_cap;
+       enum pci_bus_speed speed_cap = PCI_SPEED_UNKNOWN;
        struct pci_dev *root = rdev->pdev->bus->self;
        int ret;
 
@@ -6911,7 +6911,8 @@ int si_dpm_init(struct radeon_device *rdev)
        eg_pi = &ni_pi->eg;
        pi = &eg_pi->rv7xx;
 
-       speed_cap = pcie_get_speed_cap(root);
+       if (!pci_is_root_bus(rdev->pdev->bus))
+               speed_cap = pcie_get_speed_cap(root);
        if (speed_cap == PCI_SPEED_UNKNOWN) {
                si_pi->sys_pcie_mask = 0;
        } else {
index 37f9302..c0351ab 100644 (file)
@@ -1,17 +1,8 @@
-//SPDX-License-Identifier: GPL-2.0+
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd
  * Author:
  *      Sandy Huang <hjc@rock-chips.com>
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <drm/drmP.h>
index 38b52e6..27b9635 100644 (file)
@@ -1,17 +1,8 @@
-//SPDX-License-Identifier: GPL-2.0+
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd
  * Author:
  *      Sandy Huang <hjc@rock-chips.com>
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #ifdef CONFIG_ROCKCHIP_RGB
index 4463d38..e2942c9 100644 (file)
@@ -440,13 +440,10 @@ struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity)
 
        while ((entity->dependency =
                        sched->ops->dependency(sched_job, entity))) {
+               trace_drm_sched_job_wait_dep(sched_job, entity->dependency);
 
-               if (drm_sched_entity_add_dependency_cb(entity)) {
-
-                       trace_drm_sched_job_wait_dep(sched_job,
-                                                    entity->dependency);
+               if (drm_sched_entity_add_dependency_cb(entity))
                        return NULL;
-               }
        }
 
        /* skip jobs from entity that marked guilty */
index 0420f5c..cf45d0f 100644 (file)
@@ -761,6 +761,7 @@ static int sun4i_tcon_init_clocks(struct device *dev,
                        return PTR_ERR(tcon->sclk0);
                }
        }
+       clk_prepare_enable(tcon->sclk0);
 
        if (tcon->quirks->has_channel_1) {
                tcon->sclk1 = devm_clk_get(dev, "tcon-ch1");
@@ -775,6 +776,7 @@ static int sun4i_tcon_init_clocks(struct device *dev,
 
 static void sun4i_tcon_free_clocks(struct sun4i_tcon *tcon)
 {
+       clk_disable_unprepare(tcon->sclk0);
        clk_disable_unprepare(tcon->clk);
 }
 
index 9d9e814..d7b409a 100644 (file)
@@ -1,4 +1,5 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0+
+
 #include "vkms_drv.h"
 #include <linux/crc32.h>
 #include <drm/drm_atomic.h>
index 177bbcb..eb56ee8 100644 (file)
@@ -1,10 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
+// SPDX-License-Identifier: GPL-2.0+
 
 #include "vkms_drv.h"
 #include <drm/drm_atomic_helper.h>
index 8308787..7dcbecb 100644 (file)
@@ -1,9 +1,4 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
+// SPDX-License-Identifier: GPL-2.0+
 
 /**
  * DOC: vkms (Virtual Kernel Modesetting)
index e4469cd..81f1cfb 100644 (file)
@@ -1,3 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
 #ifndef _VKMS_DRV_H_
 #define _VKMS_DRV_H_
 
index 80311da..138b0bb 100644 (file)
@@ -1,10 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
+// SPDX-License-Identifier: GPL-2.0+
 
 #include <linux/shmem_fs.h>
 
index 271a0eb..4173e4f 100644 (file)
@@ -1,10 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
+// SPDX-License-Identifier: GPL-2.0+
 
 #include "vkms_drv.h"
 #include <drm/drm_crtc_helper.h>
index 4188176..0e67d2d 100644 (file)
@@ -1,10 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
+// SPDX-License-Identifier: GPL-2.0+
 
 #include "vkms_drv.h"
 #include <drm/drm_plane_helper.h>
index 25afb1d..7ef5dcb 100644 (file)
@@ -26,6 +26,7 @@
  **************************************************************************/
 #include <linux/module.h>
 #include <linux/console.h>
+#include <linux/dma-mapping.h>
 
 #include <drm/drmP.h>
 #include "vmwgfx_drv.h"
@@ -34,7 +35,6 @@
 #include <drm/ttm/ttm_placement.h>
 #include <drm/ttm/ttm_bo_driver.h>
 #include <drm/ttm/ttm_module.h>
-#include <linux/intel-iommu.h>
 
 #define VMWGFX_DRIVER_DESC "Linux drm driver for VMware graphics devices"
 #define VMWGFX_CHIP_SVGAII 0
@@ -546,6 +546,21 @@ static void vmw_get_initial_size(struct vmw_private *dev_priv)
 }
 
 /**
+ * vmw_assume_iommu - Figure out whether coherent dma-remapping might be
+ * taking place.
+ * @dev: Pointer to the struct drm_device.
+ *
+ * Return: true if iommu present, false otherwise.
+ */
+static bool vmw_assume_iommu(struct drm_device *dev)
+{
+       const struct dma_map_ops *ops = get_dma_ops(dev->dev);
+
+       return !dma_is_direct(ops) && ops &&
+               ops->map_page != dma_direct_map_page;
+}
+
+/**
  * vmw_dma_select_mode - Determine how DMA mappings should be set up for this
  * system.
  *
@@ -565,55 +580,27 @@ static int vmw_dma_select_mode(struct vmw_private *dev_priv)
                [vmw_dma_alloc_coherent] = "Using coherent TTM pages.",
                [vmw_dma_map_populate] = "Keeping DMA mappings.",
                [vmw_dma_map_bind] = "Giving up DMA mappings early."};
-#ifdef CONFIG_X86
-       const struct dma_map_ops *dma_ops = get_dma_ops(dev_priv->dev->dev);
 
-#ifdef CONFIG_INTEL_IOMMU
-       if (intel_iommu_enabled) {
+       if (vmw_force_coherent)
+               dev_priv->map_mode = vmw_dma_alloc_coherent;
+       else if (vmw_assume_iommu(dev_priv->dev))
                dev_priv->map_mode = vmw_dma_map_populate;
-               goto out_fixup;
-       }
-#endif
-
-       if (!(vmw_force_iommu || vmw_force_coherent)) {
+       else if (!vmw_force_iommu)
                dev_priv->map_mode = vmw_dma_phys;
-               DRM_INFO("DMA map mode: %s\n", names[dev_priv->map_mode]);
-               return 0;
-       }
-
-       dev_priv->map_mode = vmw_dma_map_populate;
-
-       if (dma_ops && dma_ops->sync_single_for_cpu)
+       else if (IS_ENABLED(CONFIG_SWIOTLB) && swiotlb_nr_tbl())
                dev_priv->map_mode = vmw_dma_alloc_coherent;
-#ifdef CONFIG_SWIOTLB
-       if (swiotlb_nr_tbl() == 0)
+       else
                dev_priv->map_mode = vmw_dma_map_populate;
-#endif
 
-#ifdef CONFIG_INTEL_IOMMU
-out_fixup:
-#endif
-       if (dev_priv->map_mode == vmw_dma_map_populate &&
-           vmw_restrict_iommu)
+       if (dev_priv->map_mode == vmw_dma_map_populate && vmw_restrict_iommu)
                dev_priv->map_mode = vmw_dma_map_bind;
 
-       if (vmw_force_coherent)
-               dev_priv->map_mode = vmw_dma_alloc_coherent;
-
-#if !defined(CONFIG_SWIOTLB) && !defined(CONFIG_INTEL_IOMMU)
-       /*
-        * No coherent page pool
-        */
-       if (dev_priv->map_mode == vmw_dma_alloc_coherent)
+       /* No TTM coherent page pool? FIXME: Ask TTM instead! */
+        if (!(IS_ENABLED(CONFIG_SWIOTLB) || IS_ENABLED(CONFIG_INTEL_IOMMU)) &&
+           (dev_priv->map_mode == vmw_dma_alloc_coherent))
                return -EINVAL;
-#endif
-
-#else /* CONFIG_X86 */
-       dev_priv->map_mode = vmw_dma_map_populate;
-#endif /* CONFIG_X86 */
 
        DRM_INFO("DMA map mode: %s\n", names[dev_priv->map_mode]);
-
        return 0;
 }
 
@@ -625,24 +612,20 @@ out_fixup:
  * With 32-bit we can only handle 32 bit PFNs. Optionally set that
  * restriction also for 64-bit systems.
  */
-#ifdef CONFIG_INTEL_IOMMU
 static int vmw_dma_masks(struct vmw_private *dev_priv)
 {
        struct drm_device *dev = dev_priv->dev;
+       int ret = 0;
 
-       if (intel_iommu_enabled &&
+       ret = dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64));
+       if (dev_priv->map_mode != vmw_dma_phys &&
            (sizeof(unsigned long) == 4 || vmw_restrict_dma_mask)) {
                DRM_INFO("Restricting DMA addresses to 44 bits.\n");
-               return dma_set_mask(dev->dev, DMA_BIT_MASK(44));
+               return dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(44));
        }
-       return 0;
-}
-#else
-static int vmw_dma_masks(struct vmw_private *dev_priv)
-{
-       return 0;
+
+       return ret;
 }
-#endif
 
 static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 {
index f2d13a7..88b8178 100644 (file)
@@ -3570,7 +3570,7 @@ int vmw_execbuf_fence_commands(struct drm_file *file_priv,
                *p_fence = NULL;
        }
 
-       return 0;
+       return ret;
 }
 
 /**
index b351fb5..ed2f678 100644 (file)
@@ -1646,7 +1646,7 @@ static int vmw_kms_check_topology(struct drm_device *dev,
                struct drm_connector_state *conn_state;
                struct vmw_connector_state *vmw_conn_state;
 
-               if (!du->pref_active) {
+               if (!du->pref_active && new_crtc_state->enable) {
                        ret = -EINVAL;
                        goto clean;
                }
@@ -2554,8 +2554,8 @@ void vmw_kms_helper_validation_finish(struct vmw_private *dev_priv,
                                      user_fence_rep)
 {
        struct vmw_fence_obj *fence = NULL;
-       uint32_t handle;
-       int ret;
+       uint32_t handle = 0;
+       int ret = 0;
 
        if (file_priv || user_fence_rep || vmw_validation_has_bos(ctx) ||
            out_fence)
index 474b00e..0a7d439 100644 (file)
@@ -898,8 +898,8 @@ static struct ipu_devtype ipu_type_imx51 = {
        .cpmem_ofs = 0x1f000000,
        .srm_ofs = 0x1f040000,
        .tpm_ofs = 0x1f060000,
-       .csi0_ofs = 0x1f030000,
-       .csi1_ofs = 0x1f038000,
+       .csi0_ofs = 0x1e030000,
+       .csi1_ofs = 0x1e038000,
        .ic_ofs = 0x1e020000,
        .disp0_ofs = 0x1e040000,
        .disp1_ofs = 0x1e048000,
@@ -914,8 +914,8 @@ static struct ipu_devtype ipu_type_imx53 = {
        .cpmem_ofs = 0x07000000,
        .srm_ofs = 0x07040000,
        .tpm_ofs = 0x07060000,
-       .csi0_ofs = 0x07030000,
-       .csi1_ofs = 0x07038000,
+       .csi0_ofs = 0x06030000,
+       .csi1_ofs = 0x06038000,
        .ic_ofs = 0x06020000,
        .disp0_ofs = 0x06040000,
        .disp1_ofs = 0x06048000,
index 2f8db9d..4a28f3f 100644 (file)
@@ -106,6 +106,7 @@ struct ipu_pre {
        void                    *buffer_virt;
        bool                    in_use;
        unsigned int            safe_window_end;
+       unsigned int            last_bufaddr;
 };
 
 static DEFINE_MUTEX(ipu_pre_list_mutex);
@@ -185,6 +186,7 @@ void ipu_pre_configure(struct ipu_pre *pre, unsigned int width,
 
        writel(bufaddr, pre->regs + IPU_PRE_CUR_BUF);
        writel(bufaddr, pre->regs + IPU_PRE_NEXT_BUF);
+       pre->last_bufaddr = bufaddr;
 
        val = IPU_PRE_PREF_ENG_CTRL_INPUT_PIXEL_FORMAT(0) |
              IPU_PRE_PREF_ENG_CTRL_INPUT_ACTIVE_BPP(active_bpp) |
@@ -242,7 +244,11 @@ void ipu_pre_update(struct ipu_pre *pre, unsigned int bufaddr)
        unsigned short current_yblock;
        u32 val;
 
+       if (bufaddr == pre->last_bufaddr)
+               return;
+
        writel(bufaddr, pre->regs + IPU_PRE_NEXT_BUF);
+       pre->last_bufaddr = bufaddr;
 
        do {
                if (time_after(jiffies, timeout)) {
index c530476..ac9fda1 100644 (file)
@@ -30,6 +30,7 @@
 
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
+#include <linux/kfifo.h>
 #include <linux/sched/signal.h>
 #include <linux/export.h>
 #include <linux/slab.h>
@@ -661,17 +662,12 @@ EXPORT_SYMBOL_GPL(hid_dump_device);
 /* enqueue string to 'events' ring buffer */
 void hid_debug_event(struct hid_device *hdev, char *buf)
 {
-       unsigned i;
        struct hid_debug_list *list;
        unsigned long flags;
 
        spin_lock_irqsave(&hdev->debug_list_lock, flags);
-       list_for_each_entry(list, &hdev->debug_list, node) {
-               for (i = 0; buf[i]; i++)
-                       list->hid_debug_buf[(list->tail + i) % HID_DEBUG_BUFSIZE] =
-                               buf[i];
-               list->tail = (list->tail + i) % HID_DEBUG_BUFSIZE;
-        }
+       list_for_each_entry(list, &hdev->debug_list, node)
+               kfifo_in(&list->hid_debug_fifo, buf, strlen(buf));
        spin_unlock_irqrestore(&hdev->debug_list_lock, flags);
 
        wake_up_interruptible(&hdev->debug_wait);
@@ -722,8 +718,7 @@ void hid_dump_input(struct hid_device *hdev, struct hid_usage *usage, __s32 valu
        hid_debug_event(hdev, buf);
 
        kfree(buf);
-        wake_up_interruptible(&hdev->debug_wait);
-
+       wake_up_interruptible(&hdev->debug_wait);
 }
 EXPORT_SYMBOL_GPL(hid_dump_input);
 
@@ -1083,8 +1078,8 @@ static int hid_debug_events_open(struct inode *inode, struct file *file)
                goto out;
        }
 
-       if (!(list->hid_debug_buf = kzalloc(HID_DEBUG_BUFSIZE, GFP_KERNEL))) {
-               err = -ENOMEM;
+       err = kfifo_alloc(&list->hid_debug_fifo, HID_DEBUG_FIFOSIZE, GFP_KERNEL);
+       if (err) {
                kfree(list);
                goto out;
        }
@@ -1104,77 +1099,57 @@ static ssize_t hid_debug_events_read(struct file *file, char __user *buffer,
                size_t count, loff_t *ppos)
 {
        struct hid_debug_list *list = file->private_data;
-       int ret = 0, len;
+       int ret = 0, copied;
        DECLARE_WAITQUEUE(wait, current);
 
        mutex_lock(&list->read_mutex);
-       while (ret == 0) {
-               if (list->head == list->tail) {
-                       add_wait_queue(&list->hdev->debug_wait, &wait);
-                       set_current_state(TASK_INTERRUPTIBLE);
-
-                       while (list->head == list->tail) {
-                               if (file->f_flags & O_NONBLOCK) {
-                                       ret = -EAGAIN;
-                                       break;
-                               }
-                               if (signal_pending(current)) {
-                                       ret = -ERESTARTSYS;
-                                       break;
-                               }
+       if (kfifo_is_empty(&list->hid_debug_fifo)) {
+               add_wait_queue(&list->hdev->debug_wait, &wait);
+               set_current_state(TASK_INTERRUPTIBLE);
+
+               while (kfifo_is_empty(&list->hid_debug_fifo)) {
+                       if (file->f_flags & O_NONBLOCK) {
+                               ret = -EAGAIN;
+                               break;
+                       }
 
-                               if (!list->hdev || !list->hdev->debug) {
-                                       ret = -EIO;
-                                       set_current_state(TASK_RUNNING);
-                                       goto out;
-                               }
+                       if (signal_pending(current)) {
+                               ret = -ERESTARTSYS;
+                               break;
+                       }
 
-                               /* allow O_NONBLOCK from other threads */
-                               mutex_unlock(&list->read_mutex);
-                               schedule();
-                               mutex_lock(&list->read_mutex);
-                               set_current_state(TASK_INTERRUPTIBLE);
+                       /* if list->hdev is NULL we cannot remove_wait_queue().
+                        * if list->hdev->debug is 0 then hid_debug_unregister()
+                        * was already called and list->hdev is being destroyed.
+                        * if we add remove_wait_queue() here we can hit a race.
+                        */
+                       if (!list->hdev || !list->hdev->debug) {
+                               ret = -EIO;
+                               set_current_state(TASK_RUNNING);
+                               goto out;
                        }
 
-                       set_current_state(TASK_RUNNING);
-                       remove_wait_queue(&list->hdev->debug_wait, &wait);
+                       /* allow O_NONBLOCK from other threads */
+                       mutex_unlock(&list->read_mutex);
+                       schedule();
+                       mutex_lock(&list->read_mutex);
+                       set_current_state(TASK_INTERRUPTIBLE);
                }
 
-               if (ret)
-                       goto out;
+               __set_current_state(TASK_RUNNING);
+               remove_wait_queue(&list->hdev->debug_wait, &wait);
 
-               /* pass the ringbuffer contents to userspace */
-copy_rest:
-               if (list->tail == list->head)
+               if (ret)
                        goto out;
-               if (list->tail > list->head) {
-                       len = list->tail - list->head;
-                       if (len > count)
-                               len = count;
-
-                       if (copy_to_user(buffer + ret, &list->hid_debug_buf[list->head], len)) {
-                               ret = -EFAULT;
-                               goto out;
-                       }
-                       ret += len;
-                       list->head += len;
-               } else {
-                       len = HID_DEBUG_BUFSIZE - list->head;
-                       if (len > count)
-                               len = count;
-
-                       if (copy_to_user(buffer, &list->hid_debug_buf[list->head], len)) {
-                               ret = -EFAULT;
-                               goto out;
-                       }
-                       list->head = 0;
-                       ret += len;
-                       count -= len;
-                       if (count > 0)
-                               goto copy_rest;
-               }
-
        }
+
+       /* pass the fifo content to userspace, locking is not needed with only
+        * one concurrent reader and one concurrent writer
+        */
+       ret = kfifo_to_user(&list->hid_debug_fifo, buffer, count, &copied);
+       if (ret)
+               goto out;
+       ret = copied;
 out:
        mutex_unlock(&list->read_mutex);
        return ret;
@@ -1185,7 +1160,7 @@ static __poll_t hid_debug_events_poll(struct file *file, poll_table *wait)
        struct hid_debug_list *list = file->private_data;
 
        poll_wait(file, &list->hdev->debug_wait, wait);
-       if (list->head != list->tail)
+       if (!kfifo_is_empty(&list->hid_debug_fifo))
                return EPOLLIN | EPOLLRDNORM;
        if (!list->hdev->debug)
                return EPOLLERR | EPOLLHUP;
@@ -1200,7 +1175,7 @@ static int hid_debug_events_release(struct inode *inode, struct file *file)
        spin_lock_irqsave(&list->hdev->debug_list_lock, flags);
        list_del(&list->node);
        spin_unlock_irqrestore(&list->hdev->debug_list_lock, flags);
-       kfree(list->hid_debug_buf);
+       kfifo_free(&list->hid_debug_fifo);
        kfree(list);
 
        return 0;
@@ -1246,4 +1221,3 @@ void hid_debug_exit(void)
 {
        debugfs_remove_recursive(hid_debug_root);
 }
-
index 4adec4a..59ee01f 100644 (file)
@@ -3594,7 +3594,8 @@ nct6775_check_fan_inputs(struct nct6775_data *data)
                        fan5pin |= cr1b & BIT(5);
                        fan5pin |= creb & BIT(5);
 
-                       fan6pin = creb & BIT(3);
+                       fan6pin = !dsw_en && (cr2d & BIT(1));
+                       fan6pin |= creb & BIT(3);
 
                        pwm5pin |= cr2d & BIT(7);
                        pwm5pin |= (creb & BIT(4)) && !(cr2a & BIT(0));
index b1086bf..cd9c65f 100644 (file)
@@ -1500,8 +1500,7 @@ static int omap_i2c_remove(struct platform_device *pdev)
        return 0;
 }
 
-#ifdef CONFIG_PM
-static int omap_i2c_runtime_suspend(struct device *dev)
+static int __maybe_unused omap_i2c_runtime_suspend(struct device *dev)
 {
        struct omap_i2c_dev *omap = dev_get_drvdata(dev);
 
@@ -1527,7 +1526,7 @@ static int omap_i2c_runtime_suspend(struct device *dev)
        return 0;
 }
 
-static int omap_i2c_runtime_resume(struct device *dev)
+static int __maybe_unused omap_i2c_runtime_resume(struct device *dev)
 {
        struct omap_i2c_dev *omap = dev_get_drvdata(dev);
 
@@ -1542,20 +1541,18 @@ static int omap_i2c_runtime_resume(struct device *dev)
 }
 
 static const struct dev_pm_ops omap_i2c_pm_ops = {
+       SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+                                     pm_runtime_force_resume)
        SET_RUNTIME_PM_OPS(omap_i2c_runtime_suspend,
                           omap_i2c_runtime_resume, NULL)
 };
-#define OMAP_I2C_PM_OPS (&omap_i2c_pm_ops)
-#else
-#define OMAP_I2C_PM_OPS NULL
-#endif /* CONFIG_PM */
 
 static struct platform_driver omap_i2c_driver = {
        .probe          = omap_i2c_probe,
        .remove         = omap_i2c_remove,
        .driver         = {
                .name   = "omap_i2c",
-               .pm     = OMAP_I2C_PM_OPS,
+               .pm     = &omap_i2c_pm_ops,
                .of_match_table = of_match_ptr(omap_i2c_of_match),
        },
 };
index c39f89d..2dc628d 100644 (file)
@@ -1828,7 +1828,7 @@ int i3c_master_add_i3c_dev_locked(struct i3c_master_controller *master,
 
        ret = i3c_master_retrieve_dev_info(newdev);
        if (ret)
-               goto err_free_dev;
+               goto err_detach_dev;
 
        olddev = i3c_master_search_i3c_dev_duplicate(newdev);
        if (olddev) {
index f8c00b9..bb03079 100644 (file)
@@ -419,12 +419,9 @@ static void dw_i3c_master_enqueue_xfer(struct dw_i3c_master *master,
        spin_unlock_irqrestore(&master->xferqueue.lock, flags);
 }
 
-static void dw_i3c_master_dequeue_xfer(struct dw_i3c_master *master,
-                                      struct dw_i3c_xfer *xfer)
+static void dw_i3c_master_dequeue_xfer_locked(struct dw_i3c_master *master,
+                                             struct dw_i3c_xfer *xfer)
 {
-       unsigned long flags;
-
-       spin_lock_irqsave(&master->xferqueue.lock, flags);
        if (master->xferqueue.cur == xfer) {
                u32 status;
 
@@ -439,6 +436,15 @@ static void dw_i3c_master_dequeue_xfer(struct dw_i3c_master *master,
        } else {
                list_del_init(&xfer->node);
        }
+}
+
+static void dw_i3c_master_dequeue_xfer(struct dw_i3c_master *master,
+                                      struct dw_i3c_xfer *xfer)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&master->xferqueue.lock, flags);
+       dw_i3c_master_dequeue_xfer_locked(master, xfer);
        spin_unlock_irqrestore(&master->xferqueue.lock, flags);
 }
 
@@ -494,7 +500,7 @@ static void dw_i3c_master_end_xfer_locked(struct dw_i3c_master *master, u32 isr)
        complete(&xfer->comp);
 
        if (ret < 0) {
-               dw_i3c_master_dequeue_xfer(master, xfer);
+               dw_i3c_master_dequeue_xfer_locked(master, xfer);
                writel(readl(master->regs + DEVICE_CTRL) | DEV_CTRL_RESUME,
                       master->regs + DEVICE_CTRL);
        }
index da58020..33a28cd 100644 (file)
@@ -235,21 +235,28 @@ EXPORT_SYMBOL_GPL(ide_prep_sense);
 
 int ide_queue_sense_rq(ide_drive_t *drive, void *special)
 {
-       struct request *sense_rq = drive->sense_rq;
+       ide_hwif_t *hwif = drive->hwif;
+       struct request *sense_rq;
+       unsigned long flags;
+
+       spin_lock_irqsave(&hwif->lock, flags);
 
        /* deferred failure from ide_prep_sense() */
        if (!drive->sense_rq_armed) {
                printk(KERN_WARNING PFX "%s: error queuing a sense request\n",
                       drive->name);
+               spin_unlock_irqrestore(&hwif->lock, flags);
                return -ENOMEM;
        }
 
+       sense_rq = drive->sense_rq;
        ide_req(sense_rq)->special = special;
        drive->sense_rq_armed = false;
 
        drive->hwif->rq = NULL;
 
        ide_insert_request_head(drive, sense_rq);
+       spin_unlock_irqrestore(&hwif->lock, flags);
        return 0;
 }
 EXPORT_SYMBOL_GPL(ide_queue_sense_rq);
index 8445b48..b137f27 100644 (file)
@@ -68,8 +68,10 @@ int ide_end_rq(ide_drive_t *drive, struct request *rq, blk_status_t error,
        }
 
        if (!blk_update_request(rq, error, nr_bytes)) {
-               if (rq == drive->sense_rq)
+               if (rq == drive->sense_rq) {
                        drive->sense_rq = NULL;
+                       drive->sense_rq_active = false;
+               }
 
                __blk_mq_end_request(rq, error);
                return 0;
@@ -451,16 +453,11 @@ void ide_requeue_and_plug(ide_drive_t *drive, struct request *rq)
                blk_mq_delay_run_hw_queue(q->queue_hw_ctx[0], 3);
 }
 
-/*
- * Issue a new request to a device.
- */
-blk_status_t ide_queue_rq(struct blk_mq_hw_ctx *hctx,
-                         const struct blk_mq_queue_data *bd)
+blk_status_t ide_issue_rq(ide_drive_t *drive, struct request *rq,
+                         bool local_requeue)
 {
-       ide_drive_t     *drive = hctx->queue->queuedata;
-       ide_hwif_t      *hwif = drive->hwif;
+       ide_hwif_t *hwif = drive->hwif;
        struct ide_host *host = hwif->host;
-       struct request  *rq = bd->rq;
        ide_startstop_t startstop;
 
        if (!blk_rq_is_passthrough(rq) && !(rq->rq_flags & RQF_DONTPREP)) {
@@ -474,8 +471,6 @@ blk_status_t ide_queue_rq(struct blk_mq_hw_ctx *hctx,
        if (ide_lock_host(host, hwif))
                return BLK_STS_DEV_RESOURCE;
 
-       blk_mq_start_request(rq);
-
        spin_lock_irq(&hwif->lock);
 
        if (!ide_lock_port(hwif)) {
@@ -511,18 +506,6 @@ repeat:
                drive->dev_flags &= ~(IDE_DFLAG_SLEEPING | IDE_DFLAG_PARKED);
 
                /*
-                * we know that the queue isn't empty, but this can happen
-                * if ->prep_rq() decides to kill a request
-                */
-               if (!rq) {
-                       rq = bd->rq;
-                       if (!rq) {
-                               ide_unlock_port(hwif);
-                               goto out;
-                       }
-               }
-
-               /*
                 * Sanity: don't accept a request that isn't a PM request
                 * if we are currently power managed. This is very important as
                 * blk_stop_queue() doesn't prevent the blk_fetch_request()
@@ -560,9 +543,12 @@ repeat:
                }
        } else {
 plug_device:
+               if (local_requeue)
+                       list_add(&rq->queuelist, &drive->rq_list);
                spin_unlock_irq(&hwif->lock);
                ide_unlock_host(host);
-               ide_requeue_and_plug(drive, rq);
+               if (!local_requeue)
+                       ide_requeue_and_plug(drive, rq);
                return BLK_STS_OK;
        }
 
@@ -573,6 +559,26 @@ out:
        return BLK_STS_OK;
 }
 
+/*
+ * Issue a new request to a device.
+ */
+blk_status_t ide_queue_rq(struct blk_mq_hw_ctx *hctx,
+                         const struct blk_mq_queue_data *bd)
+{
+       ide_drive_t *drive = hctx->queue->queuedata;
+       ide_hwif_t *hwif = drive->hwif;
+
+       spin_lock_irq(&hwif->lock);
+       if (drive->sense_rq_active) {
+               spin_unlock_irq(&hwif->lock);
+               return BLK_STS_DEV_RESOURCE;
+       }
+       spin_unlock_irq(&hwif->lock);
+
+       blk_mq_start_request(bd->rq);
+       return ide_issue_rq(drive, bd->rq, false);
+}
+
 static int drive_is_ready(ide_drive_t *drive)
 {
        ide_hwif_t *hwif = drive->hwif;
@@ -893,13 +899,8 @@ EXPORT_SYMBOL_GPL(ide_pad_transfer);
 
 void ide_insert_request_head(ide_drive_t *drive, struct request *rq)
 {
-       ide_hwif_t *hwif = drive->hwif;
-       unsigned long flags;
-
-       spin_lock_irqsave(&hwif->lock, flags);
+       drive->sense_rq_active = true;
        list_add_tail(&rq->queuelist, &drive->rq_list);
-       spin_unlock_irqrestore(&hwif->lock, flags);
-
        kblockd_schedule_work(&drive->rq_work);
 }
 EXPORT_SYMBOL_GPL(ide_insert_request_head);
index 102aa3b..8af7af6 100644 (file)
@@ -54,7 +54,9 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
        scsi_req(rq)->cmd[0] = REQ_UNPARK_HEADS;
        scsi_req(rq)->cmd_len = 1;
        ide_req(rq)->type = ATA_PRIV_MISC;
+       spin_lock_irq(&hwif->lock);
        ide_insert_request_head(drive, rq);
+       spin_unlock_irq(&hwif->lock);
 
 out:
        return;
index 63627be..5aeaca2 100644 (file)
@@ -1159,18 +1159,27 @@ static void drive_rq_insert_work(struct work_struct *work)
        ide_drive_t *drive = container_of(work, ide_drive_t, rq_work);
        ide_hwif_t *hwif = drive->hwif;
        struct request *rq;
+       blk_status_t ret;
        LIST_HEAD(list);
 
-       spin_lock_irq(&hwif->lock);
-       if (!list_empty(&drive->rq_list))
-               list_splice_init(&drive->rq_list, &list);
-       spin_unlock_irq(&hwif->lock);
+       blk_mq_quiesce_queue(drive->queue);
 
-       while (!list_empty(&list)) {
-               rq = list_first_entry(&list, struct request, queuelist);
+       ret = BLK_STS_OK;
+       spin_lock_irq(&hwif->lock);
+       while (!list_empty(&drive->rq_list)) {
+               rq = list_first_entry(&drive->rq_list, struct request, queuelist);
                list_del_init(&rq->queuelist);
-               blk_execute_rq_nowait(drive->queue, rq->rq_disk, rq, true, NULL);
+
+               spin_unlock_irq(&hwif->lock);
+               ret = ide_issue_rq(drive, rq, true);
+               spin_lock_irq(&hwif->lock);
        }
+       spin_unlock_irq(&hwif->lock);
+
+       blk_mq_unquiesce_queue(drive->queue);
+
+       if (ret != BLK_STS_OK)
+               kblockd_schedule_work(&drive->rq_work);
 }
 
 static const u8 ide_hwif_to_major[] =
index 031d568..4e339cf 100644 (file)
 #include <linux/iio/machine.h>
 #include <linux/iio/driver.h>
 
-#define AXP288_ADC_EN_MASK             0xF1
-#define AXP288_ADC_TS_PIN_GPADC                0xF2
-#define AXP288_ADC_TS_PIN_ON           0xF3
+/*
+ * This mask enables all ADCs except for the battery temp-sensor (TS), that is
+ * left as-is to avoid breaking charging on devices without a temp-sensor.
+ */
+#define AXP288_ADC_EN_MASK                             0xF0
+#define AXP288_ADC_TS_ENABLE                           0x01
+
+#define AXP288_ADC_TS_CURRENT_ON_OFF_MASK              GENMASK(1, 0)
+#define AXP288_ADC_TS_CURRENT_OFF                      (0 << 0)
+#define AXP288_ADC_TS_CURRENT_ON_WHEN_CHARGING         (1 << 0)
+#define AXP288_ADC_TS_CURRENT_ON_ONDEMAND              (2 << 0)
+#define AXP288_ADC_TS_CURRENT_ON                       (3 << 0)
 
 enum axp288_adc_id {
        AXP288_ADC_TS,
@@ -44,6 +53,7 @@ enum axp288_adc_id {
 struct axp288_adc_info {
        int irq;
        struct regmap *regmap;
+       bool ts_enabled;
 };
 
 static const struct iio_chan_spec axp288_adc_channels[] = {
@@ -115,21 +125,33 @@ static int axp288_adc_read_channel(int *val, unsigned long address,
        return IIO_VAL_INT;
 }
 
-static int axp288_adc_set_ts(struct regmap *regmap, unsigned int mode,
-                               unsigned long address)
+/*
+ * The current-source used for the battery temp-sensor (TS) is shared
+ * with the GPADC. For proper fuel-gauge and charger operation the TS
+ * current-source needs to be permanently on. But to read the GPADC we
+ * need to temporary switch the TS current-source to ondemand, so that
+ * the GPADC can use it, otherwise we will always read an all 0 value.
+ */
+static int axp288_adc_set_ts(struct axp288_adc_info *info,
+                            unsigned int mode, unsigned long address)
 {
        int ret;
 
-       /* channels other than GPADC do not need to switch TS pin */
+       /* No need to switch the current-source if the TS pin is disabled */
+       if (!info->ts_enabled)
+               return 0;
+
+       /* Channels other than GPADC do not need the current source */
        if (address != AXP288_GP_ADC_H)
                return 0;
 
-       ret = regmap_write(regmap, AXP288_ADC_TS_PIN_CTRL, mode);
+       ret = regmap_update_bits(info->regmap, AXP288_ADC_TS_PIN_CTRL,
+                                AXP288_ADC_TS_CURRENT_ON_OFF_MASK, mode);
        if (ret)
                return ret;
 
        /* When switching to the GPADC pin give things some time to settle */
-       if (mode == AXP288_ADC_TS_PIN_GPADC)
+       if (mode == AXP288_ADC_TS_CURRENT_ON_ONDEMAND)
                usleep_range(6000, 10000);
 
        return 0;
@@ -145,14 +167,14 @@ static int axp288_adc_read_raw(struct iio_dev *indio_dev,
        mutex_lock(&indio_dev->mlock);
        switch (mask) {
        case IIO_CHAN_INFO_RAW:
-               if (axp288_adc_set_ts(info->regmap, AXP288_ADC_TS_PIN_GPADC,
+               if (axp288_adc_set_ts(info, AXP288_ADC_TS_CURRENT_ON_ONDEMAND,
                                        chan->address)) {
                        dev_err(&indio_dev->dev, "GPADC mode\n");
                        ret = -EINVAL;
                        break;
                }
                ret = axp288_adc_read_channel(val, chan->address, info->regmap);
-               if (axp288_adc_set_ts(info->regmap, AXP288_ADC_TS_PIN_ON,
+               if (axp288_adc_set_ts(info, AXP288_ADC_TS_CURRENT_ON,
                                                chan->address))
                        dev_err(&indio_dev->dev, "TS pin restore\n");
                break;
@@ -164,13 +186,35 @@ static int axp288_adc_read_raw(struct iio_dev *indio_dev,
        return ret;
 }
 
-static int axp288_adc_set_state(struct regmap *regmap)
+static int axp288_adc_initialize(struct axp288_adc_info *info)
 {
-       /* ADC should be always enabled for internal FG to function */
-       if (regmap_write(regmap, AXP288_ADC_TS_PIN_CTRL, AXP288_ADC_TS_PIN_ON))
-               return -EIO;
+       int ret, adc_enable_val;
+
+       /*
+        * Determine if the TS pin is enabled and set the TS current-source
+        * accordingly.
+        */
+       ret = regmap_read(info->regmap, AXP20X_ADC_EN1, &adc_enable_val);
+       if (ret)
+               return ret;
+
+       if (adc_enable_val & AXP288_ADC_TS_ENABLE) {
+               info->ts_enabled = true;
+               ret = regmap_update_bits(info->regmap, AXP288_ADC_TS_PIN_CTRL,
+                                        AXP288_ADC_TS_CURRENT_ON_OFF_MASK,
+                                        AXP288_ADC_TS_CURRENT_ON);
+       } else {
+               info->ts_enabled = false;
+               ret = regmap_update_bits(info->regmap, AXP288_ADC_TS_PIN_CTRL,
+                                        AXP288_ADC_TS_CURRENT_ON_OFF_MASK,
+                                        AXP288_ADC_TS_CURRENT_OFF);
+       }
+       if (ret)
+               return ret;
 
-       return regmap_write(regmap, AXP20X_ADC_EN1, AXP288_ADC_EN_MASK);
+       /* Turn on the ADC for all channels except TS, leave TS as is */
+       return regmap_update_bits(info->regmap, AXP20X_ADC_EN1,
+                                 AXP288_ADC_EN_MASK, AXP288_ADC_EN_MASK);
 }
 
 static const struct iio_info axp288_adc_iio_info = {
@@ -200,7 +244,7 @@ static int axp288_adc_probe(struct platform_device *pdev)
         * Set ADC to enabled state at all time, including system suspend.
         * otherwise internal fuel gauge functionality may be affected.
         */
-       ret = axp288_adc_set_state(axp20x->regmap);
+       ret = axp288_adc_initialize(info);
        if (ret) {
                dev_err(&pdev->dev, "unable to enable ADC device\n");
                return ret;
index 184d686..8b4568e 100644 (file)
@@ -41,6 +41,7 @@
 
 #define ADS8688_VREF_MV                        4096
 #define ADS8688_REALBITS               16
+#define ADS8688_MAX_CHANNELS           8
 
 /*
  * enum ads8688_range - ADS8688 reference voltage range
@@ -385,7 +386,7 @@ static irqreturn_t ads8688_trigger_handler(int irq, void *p)
 {
        struct iio_poll_func *pf = p;
        struct iio_dev *indio_dev = pf->indio_dev;
-       u16 buffer[8];
+       u16 buffer[ADS8688_MAX_CHANNELS + sizeof(s64)/sizeof(u16)];
        int i, j = 0;
 
        for (i = 0; i < indio_dev->masklength; i++) {
index a406ad3..3a20cb5 100644 (file)
@@ -444,9 +444,8 @@ static int atlas_read_raw(struct iio_dev *indio_dev,
        case IIO_CHAN_INFO_SCALE:
                switch (chan->type) {
                case IIO_TEMP:
-                       *val = 1; /* 0.01 */
-                       *val2 = 100;
-                       break;
+                       *val = 10;
+                       return IIO_VAL_INT;
                case IIO_PH:
                        *val = 1; /* 0.001 */
                        *val2 = 1000;
@@ -477,7 +476,7 @@ static int atlas_write_raw(struct iio_dev *indio_dev,
                           int val, int val2, long mask)
 {
        struct atlas_data *data = iio_priv(indio_dev);
-       __be32 reg = cpu_to_be32(val);
+       __be32 reg = cpu_to_be32(val / 10);
 
        if (val2 != 0 || val < 0 || val > 20000)
                return -EINVAL;
index 3cd830d..6167343 100644 (file)
@@ -267,7 +267,6 @@ static inline int ib_mad_enforce_security(struct ib_mad_agent_private *map,
 #endif
 
 struct ib_device *ib_device_get_by_index(u32 ifindex);
-void ib_device_put(struct ib_device *device);
 /* RDMA device netlink */
 void nldev_init(void);
 void nldev_exit(void);
index 8872453..238ec42 100644 (file)
@@ -156,19 +156,26 @@ struct ib_device *ib_device_get_by_index(u32 index)
        down_read(&lists_rwsem);
        device = __ib_device_get_by_index(index);
        if (device) {
-               /* Do not return a device if unregistration has started. */
-               if (!refcount_inc_not_zero(&device->refcount))
+               if (!ib_device_try_get(device))
                        device = NULL;
        }
        up_read(&lists_rwsem);
        return device;
 }
 
+/**
+ * ib_device_put - Release IB device reference
+ * @device: device whose reference to be released
+ *
+ * ib_device_put() releases reference to the IB device to allow it to be
+ * unregistered and eventually free.
+ */
 void ib_device_put(struct ib_device *device)
 {
        if (refcount_dec_and_test(&device->refcount))
                complete(&device->unreg_completion);
 }
+EXPORT_SYMBOL(ib_device_put);
 
 static struct ib_device *__ib_device_get_by_name(const char *name)
 {
@@ -303,7 +310,6 @@ struct ib_device *ib_alloc_device(size_t size)
        rwlock_init(&device->client_data_lock);
        INIT_LIST_HEAD(&device->client_data_list);
        INIT_LIST_HEAD(&device->port_list);
-       refcount_set(&device->refcount, 1);
        init_completion(&device->unreg_completion);
 
        return device;
@@ -620,6 +626,7 @@ int ib_register_device(struct ib_device *device, const char *name,
                goto cg_cleanup;
        }
 
+       refcount_set(&device->refcount, 1);
        device->reg_state = IB_DEV_REGISTERED;
 
        list_for_each_entry(client, &client_list, list)
index a4ec430..acb882f 100644 (file)
@@ -352,6 +352,8 @@ struct ib_umem_odp *ib_alloc_odp_umem(struct ib_ucontext_per_mm *per_mm,
        umem->writable   = 1;
        umem->is_odp = 1;
        odp_data->per_mm = per_mm;
+       umem->owning_mm  = per_mm->mm;
+       mmgrab(umem->owning_mm);
 
        mutex_init(&odp_data->umem_mutex);
        init_completion(&odp_data->notifier_completion);
@@ -384,6 +386,7 @@ struct ib_umem_odp *ib_alloc_odp_umem(struct ib_ucontext_per_mm *per_mm,
 out_page_list:
        vfree(odp_data->page_list);
 out_odp_data:
+       mmdrop(umem->owning_mm);
        kfree(odp_data);
        return ERR_PTR(ret);
 }
index 2890a77..5f36683 100644 (file)
@@ -204,6 +204,9 @@ void ib_uverbs_release_file(struct kref *ref)
        if (atomic_dec_and_test(&file->device->refcount))
                ib_uverbs_comp_dev(file->device);
 
+       if (file->async_file)
+               kref_put(&file->async_file->ref,
+                        ib_uverbs_release_async_event_file);
        put_device(&file->device->dev);
        kfree(file);
 }
@@ -964,11 +967,19 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
 
                /* Get an arbitrary mm pointer that hasn't been cleaned yet */
                mutex_lock(&ufile->umap_lock);
-               if (!list_empty(&ufile->umaps)) {
-                       mm = list_first_entry(&ufile->umaps,
-                                             struct rdma_umap_priv, list)
-                                    ->vma->vm_mm;
-                       mmget(mm);
+               while (!list_empty(&ufile->umaps)) {
+                       int ret;
+
+                       priv = list_first_entry(&ufile->umaps,
+                                               struct rdma_umap_priv, list);
+                       mm = priv->vma->vm_mm;
+                       ret = mmget_not_zero(mm);
+                       if (!ret) {
+                               list_del_init(&priv->list);
+                               mm = NULL;
+                               continue;
+                       }
+                       break;
                }
                mutex_unlock(&ufile->umap_lock);
                if (!mm)
@@ -1096,10 +1107,6 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp)
        list_del_init(&file->list);
        mutex_unlock(&file->device->lists_mutex);
 
-       if (file->async_file)
-               kref_put(&file->async_file->ref,
-                        ib_uverbs_release_async_event_file);
-
        kref_put(&file->ref, ib_uverbs_release_file);
 
        return 0;
index 5030ec4..2a3f2f0 100644 (file)
@@ -168,12 +168,18 @@ void copy_port_attr_to_resp(struct ib_port_attr *attr,
 static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_PORT)(
        struct uverbs_attr_bundle *attrs)
 {
-       struct ib_device *ib_dev = attrs->ufile->device->ib_dev;
+       struct ib_device *ib_dev;
        struct ib_port_attr attr = {};
        struct ib_uverbs_query_port_resp_ex resp = {};
+       struct ib_ucontext *ucontext;
        int ret;
        u8 port_num;
 
+       ucontext = ib_uverbs_get_ucontext(attrs);
+       if (IS_ERR(ucontext))
+               return PTR_ERR(ucontext);
+       ib_dev = ucontext->device;
+
        /* FIXME: Extend the UAPI_DEF_OBJ_NEEDS_FN stuff.. */
        if (!ib_dev->ops.query_port)
                return -EOPNOTSUPP;
index c22ebc7..f9a7e9d 100644 (file)
@@ -488,7 +488,7 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
                vmf = 1;
                break;
        case STATUS:
-               if (flags & (unsigned long)(VM_WRITE | VM_EXEC)) {
+               if (flags & VM_WRITE) {
                        ret = -EPERM;
                        goto done;
                }
index 88242fe..bf96067 100644 (file)
@@ -987,7 +987,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
            opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) {
                wc.ex.imm_data = packet->ohdr->u.ud.imm_data;
                wc.wc_flags = IB_WC_WITH_IMM;
-               tlen -= sizeof(u32);
        } else if (opcode == IB_OPCODE_UD_SEND_ONLY) {
                wc.ex.imm_data = 0;
                wc.wc_flags = 0;
index 960b194..12deacf 100644 (file)
@@ -210,6 +210,7 @@ struct ib_srq *hns_roce_create_srq(struct ib_pd *pd,
                                   struct ib_udata *udata)
 {
        struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
+       struct hns_roce_ib_create_srq_resp resp = {};
        struct hns_roce_srq *srq;
        int srq_desc_size;
        int srq_buf_size;
@@ -378,16 +379,21 @@ struct ib_srq *hns_roce_create_srq(struct ib_pd *pd,
 
        srq->event = hns_roce_ib_srq_event;
        srq->ibsrq.ext.xrc.srq_num = srq->srqn;
+       resp.srqn = srq->srqn;
 
        if (udata) {
-               if (ib_copy_to_udata(udata, &srq->srqn, sizeof(__u32))) {
+               if (ib_copy_to_udata(udata, &resp,
+                                    min(udata->outlen, sizeof(resp)))) {
                        ret = -EFAULT;
-                       goto err_wrid;
+                       goto err_srqc_alloc;
                }
        }
 
        return &srq->ibsrq;
 
+err_srqc_alloc:
+       hns_roce_srq_free(hr_dev, srq);
+
 err_wrid:
        kvfree(srq->wrid);
 
index 25439da..936ee13 100644 (file)
@@ -1411,7 +1411,7 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
 
        sqp_mad = (struct mlx4_mad_snd_buf *) (sqp->tx_ring[wire_tx_ix].buf.addr);
        if (sqp->tx_ring[wire_tx_ix].ah)
-               rdma_destroy_ah(sqp->tx_ring[wire_tx_ix].ah, 0);
+               mlx4_ib_destroy_ah(sqp->tx_ring[wire_tx_ix].ah, 0);
        sqp->tx_ring[wire_tx_ix].ah = ah;
        ib_dma_sync_single_for_cpu(&dev->ib_dev,
                                   sqp->tx_ring[wire_tx_ix].buf.map,
@@ -1902,7 +1902,7 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
                if (wc.status == IB_WC_SUCCESS) {
                        switch (wc.opcode) {
                        case IB_WC_SEND:
-                               rdma_destroy_ah(sqp->tx_ring[wc.wr_id &
+                               mlx4_ib_destroy_ah(sqp->tx_ring[wc.wr_id &
                                              (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0);
                                sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
                                        = NULL;
@@ -1931,7 +1931,7 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
                                 " status = %d, wrid = 0x%llx\n",
                                 ctx->slave, wc.status, wc.wr_id);
                        if (!MLX4_TUN_IS_RECV(wc.wr_id)) {
-                               rdma_destroy_ah(sqp->tx_ring[wc.wr_id &
+                               mlx4_ib_destroy_ah(sqp->tx_ring[wc.wr_id &
                                              (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0);
                                sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
                                        = NULL;
index 356bccc..6bcc63a 100644 (file)
@@ -345,3 +345,40 @@ int mlx5_cmd_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id,
                                       counter_set_id);
        return err;
 }
+
+int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
+                    u16 opmod, u8 port)
+{
+       int outlen = MLX5_ST_SZ_BYTES(mad_ifc_out);
+       int inlen = MLX5_ST_SZ_BYTES(mad_ifc_in);
+       int err = -ENOMEM;
+       void *data;
+       void *resp;
+       u32 *out;
+       u32 *in;
+
+       in = kzalloc(inlen, GFP_KERNEL);
+       out = kzalloc(outlen, GFP_KERNEL);
+       if (!in || !out)
+               goto out;
+
+       MLX5_SET(mad_ifc_in, in, opcode, MLX5_CMD_OP_MAD_IFC);
+       MLX5_SET(mad_ifc_in, in, op_mod, opmod);
+       MLX5_SET(mad_ifc_in, in, port, port);
+
+       data = MLX5_ADDR_OF(mad_ifc_in, in, mad);
+       memcpy(data, inb, MLX5_FLD_SZ_BYTES(mad_ifc_in, mad));
+
+       err = mlx5_cmd_exec(dev, in, inlen, out, outlen);
+       if (err)
+               goto out;
+
+       resp = MLX5_ADDR_OF(mad_ifc_out, out, response_mad_packet);
+       memcpy(outb, resp,
+              MLX5_FLD_SZ_BYTES(mad_ifc_out, response_mad_packet));
+
+out:
+       kfree(out);
+       kfree(in);
+       return err;
+}
index 1e76dc6..923a7b9 100644 (file)
@@ -63,4 +63,6 @@ int mlx5_cmd_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn, u16 uid);
 int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid);
 int mlx5_cmd_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id,
                             u16 uid);
+int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
+                    u16 opmod, u8 port);
 #endif /* MLX5_IB_CMD_H */
index e8a1e44..798591a 100644 (file)
@@ -630,8 +630,7 @@ const struct uapi_definition mlx5_ib_flow_defs[] = {
                UAPI_DEF_IS_OBJ_SUPPORTED(flow_is_supported)),
        UAPI_DEF_CHAIN_OBJ_TREE(
                UVERBS_OBJECT_FLOW,
-               &mlx5_ib_fs,
-               UAPI_DEF_IS_OBJ_SUPPORTED(flow_is_supported)),
+               &mlx5_ib_fs),
        UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION,
                                &mlx5_ib_flow_actions),
        {},
index 46a9ddc..4700cff 100644 (file)
@@ -3,10 +3,11 @@
  * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
  */
 
+#include <linux/mlx5/vport.h>
 #include "ib_rep.h"
 #include "srq.h"
 
-static const struct mlx5_ib_profile rep_profile = {
+static const struct mlx5_ib_profile vf_rep_profile = {
        STAGE_CREATE(MLX5_IB_STAGE_INIT,
                     mlx5_ib_stage_init_init,
                     mlx5_ib_stage_init_cleanup),
@@ -46,30 +47,16 @@ static const struct mlx5_ib_profile rep_profile = {
 };
 
 static int
-mlx5_ib_nic_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
-{
-       struct mlx5_ib_dev *ibdev;
-
-       ibdev = mlx5_ib_rep_to_dev(rep);
-       if (!__mlx5_ib_add(ibdev, ibdev->profile))
-               return -EINVAL;
-       return 0;
-}
-
-static void
-mlx5_ib_nic_rep_unload(struct mlx5_eswitch_rep *rep)
-{
-       struct mlx5_ib_dev *ibdev;
-
-       ibdev = mlx5_ib_rep_to_dev(rep);
-       __mlx5_ib_remove(ibdev, ibdev->profile, MLX5_IB_STAGE_MAX);
-}
-
-static int
 mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 {
+       const struct mlx5_ib_profile *profile;
        struct mlx5_ib_dev *ibdev;
 
+       if (rep->vport == MLX5_VPORT_UPLINK)
+               profile = &uplink_rep_profile;
+       else
+               profile = &vf_rep_profile;
+
        ibdev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*ibdev));
        if (!ibdev)
                return -ENOMEM;
@@ -78,7 +65,7 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
        ibdev->mdev = dev;
        ibdev->num_ports = max(MLX5_CAP_GEN(dev, num_ports),
                               MLX5_CAP_GEN(dev, num_vhca_ports));
-       if (!__mlx5_ib_add(ibdev, &rep_profile))
+       if (!__mlx5_ib_add(ibdev, profile))
                return -EINVAL;
 
        rep->rep_if[REP_IB].priv = ibdev;
@@ -105,53 +92,23 @@ static void *mlx5_ib_vport_get_proto_dev(struct mlx5_eswitch_rep *rep)
        return mlx5_ib_rep_to_dev(rep);
 }
 
-static void mlx5_ib_rep_register_vf_vports(struct mlx5_ib_dev *dev)
-{
-       struct mlx5_eswitch *esw   = dev->mdev->priv.eswitch;
-       int total_vfs = MLX5_TOTAL_VPORTS(dev->mdev);
-       int vport;
-
-       for (vport = 1; vport < total_vfs; vport++) {
-               struct mlx5_eswitch_rep_if rep_if = {};
-
-               rep_if.load = mlx5_ib_vport_rep_load;
-               rep_if.unload = mlx5_ib_vport_rep_unload;
-               rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev;
-               mlx5_eswitch_register_vport_rep(esw, vport, &rep_if, REP_IB);
-       }
-}
-
-static void mlx5_ib_rep_unregister_vf_vports(struct mlx5_ib_dev *dev)
+void mlx5_ib_register_vport_reps(struct mlx5_core_dev *mdev)
 {
-       struct mlx5_eswitch *esw   = dev->mdev->priv.eswitch;
-       int total_vfs = MLX5_TOTAL_VPORTS(dev->mdev);
-       int vport;
-
-       for (vport = 1; vport < total_vfs; vport++)
-               mlx5_eswitch_unregister_vport_rep(esw, vport, REP_IB);
-}
-
-void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev)
-{
-       struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+       struct mlx5_eswitch *esw = mdev->priv.eswitch;
        struct mlx5_eswitch_rep_if rep_if = {};
 
-       rep_if.load = mlx5_ib_nic_rep_load;
-       rep_if.unload = mlx5_ib_nic_rep_unload;
+       rep_if.load = mlx5_ib_vport_rep_load;
+       rep_if.unload = mlx5_ib_vport_rep_unload;
        rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev;
-       rep_if.priv = dev;
-
-       mlx5_eswitch_register_vport_rep(esw, 0, &rep_if, REP_IB);
 
-       mlx5_ib_rep_register_vf_vports(dev);
+       mlx5_eswitch_register_vport_reps(esw, &rep_if, REP_IB);
 }
 
-void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev)
+void mlx5_ib_unregister_vport_reps(struct mlx5_core_dev *mdev)
 {
-       struct mlx5_eswitch *esw   = dev->mdev->priv.eswitch;
+       struct mlx5_eswitch *esw mdev->priv.eswitch;
 
-       mlx5_ib_rep_unregister_vf_vports(dev); /* VFs vports */
-       mlx5_eswitch_unregister_vport_rep(esw, 0, REP_IB); /* UPLINK PF*/
+       mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
 }
 
 u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw)
index 2ba7363..798d41e 100644 (file)
 #include "mlx5_ib.h"
 
 #ifdef CONFIG_MLX5_ESWITCH
+extern const struct mlx5_ib_profile uplink_rep_profile;
+
 u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw);
 struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
                                          int vport_index);
 struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw);
 struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw,
                                           int vport_index);
-void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev);
-void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev);
+void mlx5_ib_register_vport_reps(struct mlx5_core_dev *mdev);
+void mlx5_ib_unregister_vport_reps(struct mlx5_core_dev *mdev);
 int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
                              struct mlx5_ib_sq *sq);
 struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
@@ -48,8 +50,8 @@ struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw,
        return NULL;
 }
 
-static inline void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev) {}
-static inline void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev) {}
+static inline void mlx5_ib_register_vport_reps(struct mlx5_core_dev *mdev) {}
+static inline void mlx5_ib_unregister_vport_reps(struct mlx5_core_dev *mdev) {}
 static inline int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
                                            struct mlx5_ib_sq *sq)
 {
index 5586384..6c529e6 100644 (file)
@@ -36,6 +36,7 @@
 #include <rdma/ib_smi.h>
 #include <rdma/ib_pma.h>
 #include "mlx5_ib.h"
+#include "cmd.h"
 
 enum {
        MLX5_IB_VENDOR_CLASS1 = 0x9,
@@ -51,9 +52,10 @@ static bool can_do_mad_ifc(struct mlx5_ib_dev *dev, u8 port_num,
        return dev->mdev->port_caps[port_num - 1].has_smi;
 }
 
-int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
-                u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh,
-                const void *in_mad, void *response_mad)
+static int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey,
+                       int ignore_bkey, u8 port, const struct ib_wc *in_wc,
+                       const struct ib_grh *in_grh, const void *in_mad,
+                       void *response_mad)
 {
        u8 op_modifier = 0;
 
@@ -68,7 +70,8 @@ int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
        if (ignore_bkey || !in_wc)
                op_modifier |= 0x2;
 
-       return mlx5_core_mad_ifc(dev->mdev, in_mad, response_mad, op_modifier, port);
+       return mlx5_cmd_mad_ifc(dev->mdev, in_mad, response_mad, op_modifier,
+                               port);
 }
 
 static int process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
index 94fe253..581ae11 100644 (file)
@@ -331,8 +331,8 @@ out:
        spin_unlock(&port->mp.mpi_lock);
 }
 
-static int translate_eth_proto_oper(u32 eth_proto_oper, u8 *active_speed,
-                                   u8 *active_width)
+static int translate_eth_legacy_proto_oper(u32 eth_proto_oper, u8 *active_speed,
+                                          u8 *active_width)
 {
        switch (eth_proto_oper) {
        case MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII):
@@ -389,10 +389,66 @@ static int translate_eth_proto_oper(u32 eth_proto_oper, u8 *active_speed,
        return 0;
 }
 
+static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u8 *active_speed,
+                                       u8 *active_width)
+{
+       switch (eth_proto_oper) {
+       case MLX5E_PROT_MASK(MLX5E_SGMII_100M):
+       case MLX5E_PROT_MASK(MLX5E_1000BASE_X_SGMII):
+               *active_width = IB_WIDTH_1X;
+               *active_speed = IB_SPEED_SDR;
+               break;
+       case MLX5E_PROT_MASK(MLX5E_5GBASE_R):
+               *active_width = IB_WIDTH_1X;
+               *active_speed = IB_SPEED_DDR;
+               break;
+       case MLX5E_PROT_MASK(MLX5E_10GBASE_XFI_XAUI_1):
+               *active_width = IB_WIDTH_1X;
+               *active_speed = IB_SPEED_QDR;
+               break;
+       case MLX5E_PROT_MASK(MLX5E_40GBASE_XLAUI_4_XLPPI_4):
+               *active_width = IB_WIDTH_4X;
+               *active_speed = IB_SPEED_QDR;
+               break;
+       case MLX5E_PROT_MASK(MLX5E_25GAUI_1_25GBASE_CR_KR):
+               *active_width = IB_WIDTH_1X;
+               *active_speed = IB_SPEED_EDR;
+               break;
+       case MLX5E_PROT_MASK(MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2):
+       case MLX5E_PROT_MASK(MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR):
+               *active_width = IB_WIDTH_1X;
+               *active_speed = IB_SPEED_HDR;
+               break;
+       case MLX5E_PROT_MASK(MLX5E_100GAUI_2_100GBASE_CR2_KR2):
+               *active_width = IB_WIDTH_2X;
+               *active_speed = IB_SPEED_HDR;
+               break;
+       case MLX5E_PROT_MASK(MLX5E_200GAUI_4_200GBASE_CR4_KR4):
+               *active_width = IB_WIDTH_4X;
+               *active_speed = IB_SPEED_HDR;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int translate_eth_proto_oper(u32 eth_proto_oper, u8 *active_speed,
+                                   u8 *active_width, bool ext)
+{
+       return ext ?
+               translate_eth_ext_proto_oper(eth_proto_oper, active_speed,
+                                            active_width) :
+               translate_eth_legacy_proto_oper(eth_proto_oper, active_speed,
+                                               active_width);
+}
+
 static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
                                struct ib_port_attr *props)
 {
        struct mlx5_ib_dev *dev = to_mdev(device);
+       u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {0};
        struct mlx5_core_dev *mdev;
        struct net_device *ndev, *upper;
        enum ib_mtu ndev_ib_mtu;
@@ -400,6 +456,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
        u16 qkey_viol_cntr;
        u32 eth_prot_oper;
        u8 mdev_port_num;
+       bool ext;
        int err;
 
        mdev = mlx5_ib_get_native_port_mdev(dev, port_num, &mdev_port_num);
@@ -416,16 +473,18 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
        /* Possible bad flows are checked before filling out props so in case
         * of an error it will still be zeroed out.
         */
-       err = mlx5_query_port_eth_proto_oper(mdev, &eth_prot_oper,
-                                            mdev_port_num);
+       err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN,
+                                  mdev_port_num);
        if (err)
                goto out;
+       ext = MLX5_CAP_PCAM_FEATURE(dev->mdev, ptys_extended_ethernet);
+       eth_prot_oper = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_oper);
 
        props->active_width     = IB_WIDTH_4X;
        props->active_speed     = IB_SPEED_QDR;
 
        translate_eth_proto_oper(eth_prot_oper, &props->active_speed,
-                                &props->active_width);
+                                &props->active_width, ext);
 
        props->port_cap_flags |= IB_PORT_CM_SUP;
        props->ip_gids = true;
@@ -6386,7 +6445,7 @@ static const struct mlx5_ib_profile pf_profile = {
                     mlx5_ib_stage_delay_drop_cleanup),
 };
 
-static const struct mlx5_ib_profile nic_rep_profile = {
+const struct mlx5_ib_profile uplink_rep_profile = {
        STAGE_CREATE(MLX5_IB_STAGE_INIT,
                     mlx5_ib_stage_init_init,
                     mlx5_ib_stage_init_cleanup),
@@ -6479,6 +6538,12 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 
        printk_once(KERN_INFO "%s", mlx5_version);
 
+       if (MLX5_ESWITCH_MANAGER(mdev) &&
+           mlx5_ib_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) {
+               mlx5_ib_register_vport_reps(mdev);
+               return mdev;
+       }
+
        port_type_cap = MLX5_CAP_GEN(mdev, port_type);
        ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
 
@@ -6493,14 +6558,6 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
        dev->num_ports = max(MLX5_CAP_GEN(mdev, num_ports),
                             MLX5_CAP_GEN(mdev, num_vhca_ports));
 
-       if (MLX5_ESWITCH_MANAGER(mdev) &&
-           mlx5_ib_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) {
-               dev->rep = mlx5_ib_vport_rep(mdev->priv.eswitch, 0);
-               dev->profile = &nic_rep_profile;
-               mlx5_ib_register_vport_reps(dev);
-               return dev;
-       }
-
        return __mlx5_ib_add(dev, &pf_profile);
 }
 
@@ -6509,6 +6566,11 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
        struct mlx5_ib_multiport_info *mpi;
        struct mlx5_ib_dev *dev;
 
+       if (MLX5_ESWITCH_MANAGER(mdev) && context == mdev) {
+               mlx5_ib_unregister_vport_reps(mdev);
+               return;
+       }
+
        if (mlx5_core_is_mp_slave(mdev)) {
                mpi = context;
                mutex_lock(&mlx5_ib_multiport_mutex);
@@ -6520,10 +6582,7 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
        }
 
        dev = context;
-       if (dev->profile == &nic_rep_profile)
-               mlx5_ib_unregister_vport_reps(dev);
-       else
-               __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
+       __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
 
        ib_dealloc_device((struct ib_device *)dev);
 }
index b06d3b1..eedba0d 100644 (file)
@@ -587,6 +587,7 @@ struct mlx5_ib_mr {
        struct mlx5_ib_mr      *parent;
        atomic_t                num_leaf_free;
        wait_queue_head_t       q_leaf_free;
+       struct mlx5_async_work  cb_work;
 };
 
 struct mlx5_ib_mw {
@@ -944,6 +945,7 @@ struct mlx5_ib_dev {
        struct mlx5_memic       memic;
        u16                     devx_whitelist_uid;
        struct mlx5_srq_table   srq_table;
+       struct mlx5_async_ctx   async_ctx;
 };
 
 static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
@@ -1038,9 +1040,6 @@ void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db)
 void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq);
 void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq);
 void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index);
-int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
-                u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh,
-                const void *in_mad, void *response_mad);
 struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
                                u32 flags, struct ib_udata *udata);
 int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
index fd6ea1f..bf2b6ea 100644 (file)
@@ -123,9 +123,10 @@ static void update_odp_mr(struct mlx5_ib_mr *mr)
 }
 #endif
 
-static void reg_mr_callback(int status, void *context)
+static void reg_mr_callback(int status, struct mlx5_async_work *context)
 {
-       struct mlx5_ib_mr *mr = context;
+       struct mlx5_ib_mr *mr =
+               container_of(context, struct mlx5_ib_mr, cb_work);
        struct mlx5_ib_dev *dev = mr->dev;
        struct mlx5_mr_cache *cache = &dev->cache;
        int c = order2idx(dev, mr->order);
@@ -216,9 +217,9 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
                ent->pending++;
                spin_unlock_irq(&ent->lock);
                err = mlx5_core_create_mkey_cb(dev->mdev, &mr->mmkey,
-                                              in, inlen,
+                                              &dev->async_ctx, in, inlen,
                                               mr->out, sizeof(mr->out),
-                                              reg_mr_callback, mr);
+                                              reg_mr_callback, &mr->cb_work);
                if (err) {
                        spin_lock_irq(&ent->lock);
                        ent->pending--;
@@ -679,6 +680,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
                return -ENOMEM;
        }
 
+       mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx);
        timer_setup(&dev->delay_timer, delay_time_func, 0);
        for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
                ent = &cache->ent[i];
@@ -725,33 +727,6 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
        return 0;
 }
 
-static void wait_for_async_commands(struct mlx5_ib_dev *dev)
-{
-       struct mlx5_mr_cache *cache = &dev->cache;
-       struct mlx5_cache_ent *ent;
-       int total = 0;
-       int i;
-       int j;
-
-       for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
-               ent = &cache->ent[i];
-               for (j = 0 ; j < 1000; j++) {
-                       if (!ent->pending)
-                               break;
-                       msleep(50);
-               }
-       }
-       for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
-               ent = &cache->ent[i];
-               total += ent->pending;
-       }
-
-       if (total)
-               mlx5_ib_warn(dev, "aborted while there are %d pending mr requests\n", total);
-       else
-               mlx5_ib_warn(dev, "done with all pending requests\n");
-}
-
 int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
 {
        int i;
@@ -763,12 +738,12 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
        flush_workqueue(dev->cache.wq);
 
        mlx5_mr_cache_debugfs_cleanup(dev);
+       mlx5_cmd_cleanup_async_ctx(&dev->async_ctx);
 
        for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++)
                clean_keys(dev, i);
 
        destroy_workqueue(dev->cache.wq);
-       wait_for_async_commands(dev);
        del_timer_sync(&dev->delay_timer);
 
        return 0;
index 01e0f62..4ee3296 100644 (file)
@@ -1595,10 +1595,12 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *work)
        struct prefetch_mr_work *w =
                container_of(work, struct prefetch_mr_work, work);
 
-       if (w->dev->ib_dev.reg_state == IB_DEV_REGISTERED)
+       if (ib_device_try_get(&w->dev->ib_dev)) {
                mlx5_ib_prefetch_sg_list(w->dev, w->pf_flags, w->sg_list,
                                         w->num_sge);
-
+               ib_device_put(&w->dev->ib_dev);
+       }
+       put_device(&w->dev->ib_dev.dev);
        kfree(w);
 }
 
@@ -1617,15 +1619,13 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
                return mlx5_ib_prefetch_sg_list(dev, pf_flags, sg_list,
                                                num_sge);
 
-       if (dev->ib_dev.reg_state != IB_DEV_REGISTERED)
-               return -ENODEV;
-
        work = kvzalloc(struct_size(work, sg_list, num_sge), GFP_KERNEL);
        if (!work)
                return -ENOMEM;
 
        memcpy(work->sg_list, sg_list, num_sge * sizeof(struct ib_sge));
 
+       get_device(&dev->ib_dev.dev);
        work->dev = dev;
        work->pf_flags = pf_flags;
        work->num_sge = num_sge;
index dd2ae64..7db778d 100644 (file)
@@ -1912,14 +1912,16 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
                }
 
                if (!check_flags_mask(ucmd.flags,
+                                     MLX5_QP_FLAG_ALLOW_SCATTER_CQE |
+                                     MLX5_QP_FLAG_BFREG_INDEX |
+                                     MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE |
+                                     MLX5_QP_FLAG_SCATTER_CQE |
                                      MLX5_QP_FLAG_SIGNATURE |
-                                             MLX5_QP_FLAG_SCATTER_CQE |
-                                             MLX5_QP_FLAG_TUNNEL_OFFLOADS |
-                                             MLX5_QP_FLAG_BFREG_INDEX |
-                                             MLX5_QP_FLAG_TYPE_DCT |
-                                             MLX5_QP_FLAG_TYPE_DCI |
-                                             MLX5_QP_FLAG_ALLOW_SCATTER_CQE |
-                                             MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE))
+                                     MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC |
+                                     MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC |
+                                     MLX5_QP_FLAG_TUNNEL_OFFLOADS |
+                                     MLX5_QP_FLAG_TYPE_DCI |
+                                     MLX5_QP_FLAG_TYPE_DCT))
                        return -EINVAL;
 
                err = get_qp_user_index(to_mucontext(pd->uobject->context),
index 868da0e..445ea19 100644 (file)
@@ -512,7 +512,6 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr,
            opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) {
                wc.ex.imm_data = ohdr->u.ud.imm_data;
                wc.wc_flags = IB_WC_WITH_IMM;
-               tlen -= sizeof(u32);
        } else if (opcode == IB_OPCODE_UD_SEND_ONLY) {
                wc.ex.imm_data = 0;
                wc.wc_flags = 0;
index a1bd8cf..c6cc3e4 100644 (file)
@@ -2910,6 +2910,8 @@ send:
                        goto op_err;
                if (!ret)
                        goto rnr_nak;
+               if (wqe->length > qp->r_len)
+                       goto inv_err;
                break;
 
        case IB_WR_RDMA_WRITE_WITH_IMM:
@@ -3078,7 +3080,10 @@ op_err:
        goto err;
 
 inv_err:
-       send_status = IB_WC_REM_INV_REQ_ERR;
+       send_status =
+               sqp->ibqp.qp_type == IB_QPT_RC ?
+                       IB_WC_REM_INV_REQ_ERR :
+                       IB_WC_SUCCESS;
        wc.status = IB_WC_LOC_QP_OP_ERR;
        goto err;
 
index 1da119d..73e808c 100644 (file)
@@ -248,7 +248,6 @@ struct ipoib_cm_tx {
        struct list_head     list;
        struct net_device   *dev;
        struct ipoib_neigh  *neigh;
-       struct ipoib_path   *path;
        struct ipoib_tx_buf *tx_ring;
        unsigned int         tx_head;
        unsigned int         tx_tail;
index 0428e01..aa9dcfc 100644 (file)
@@ -1312,7 +1312,6 @@ struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path
 
        neigh->cm = tx;
        tx->neigh = neigh;
-       tx->path = path;
        tx->dev = dev;
        list_add(&tx->list, &priv->cm.start_list);
        set_bit(IPOIB_FLAG_INITIALIZED, &tx->flags);
@@ -1371,7 +1370,7 @@ static void ipoib_cm_tx_start(struct work_struct *work)
                                neigh->daddr + QPN_AND_OPTIONS_OFFSET);
                        goto free_neigh;
                }
-               memcpy(&pathrec, &p->path->pathrec, sizeof(pathrec));
+               memcpy(&pathrec, &path->pathrec, sizeof(pathrec));
 
                spin_unlock_irqrestore(&priv->lock, flags);
                netif_tx_unlock_bh(dev);
index bae0822..a7cfab3 100644 (file)
@@ -23,7 +23,6 @@
 #include <linux/of.h>
 #include <linux/slab.h>
 #include <linux/delay.h>
-#include <linux/clk.h>
 
 /*
  * The OLPC XO-1.75 and XO-4 laptops do not have a hardware PS/2 controller.
@@ -75,7 +74,6 @@ struct olpc_apsp {
        struct serio *kbio;
        struct serio *padio;
        void __iomem *base;
-       struct clk *clk;
        int open_count;
        int irq;
 };
@@ -148,17 +146,11 @@ static int olpc_apsp_open(struct serio *port)
        struct olpc_apsp *priv = port->port_data;
        unsigned int tmp;
        unsigned long l;
-       int error;
 
        if (priv->open_count++ == 0) {
-               error = clk_prepare_enable(priv->clk);
-               if (error)
-                       return error;
-
                l = readl(priv->base + COMMAND_FIFO_STATUS);
                if (!(l & CMD_STS_MASK)) {
                        dev_err(priv->dev, "SP cannot accept commands.\n");
-                       clk_disable_unprepare(priv->clk);
                        return -EIO;
                }
 
@@ -179,8 +171,6 @@ static void olpc_apsp_close(struct serio *port)
                /* Disable interrupt 0 */
                tmp = readl(priv->base + PJ_INTERRUPT_MASK);
                writel(tmp | INT_0, priv->base + PJ_INTERRUPT_MASK);
-
-               clk_disable_unprepare(priv->clk);
        }
 }
 
@@ -208,10 +198,6 @@ static int olpc_apsp_probe(struct platform_device *pdev)
        if (priv->irq < 0)
                return priv->irq;
 
-       priv->clk = devm_clk_get(&pdev->dev, "sp");
-       if (IS_ERR(priv->clk))
-               return PTR_ERR(priv->clk);
-
        /* KEYBOARD */
        kb_serio = kzalloc(sizeof(struct serio), GFP_KERNEL);
        if (!kb_serio)
index 87ba23a..2a7b78b 100644 (file)
@@ -1991,16 +1991,13 @@ static void do_attach(struct iommu_dev_data *dev_data,
 
 static void do_detach(struct iommu_dev_data *dev_data)
 {
+       struct protection_domain *domain = dev_data->domain;
        struct amd_iommu *iommu;
        u16 alias;
 
        iommu = amd_iommu_rlookup_table[dev_data->devid];
        alias = dev_data->alias;
 
-       /* decrease reference counters */
-       dev_data->domain->dev_iommu[iommu->index] -= 1;
-       dev_data->domain->dev_cnt                 -= 1;
-
        /* Update data structures */
        dev_data->domain = NULL;
        list_del(&dev_data->list);
@@ -2010,6 +2007,16 @@ static void do_detach(struct iommu_dev_data *dev_data)
 
        /* Flush the DTE entry */
        device_flush_dte(dev_data);
+
+       /* Flush IOTLB */
+       domain_flush_tlb_pde(domain);
+
+       /* Wait for the flushes to finish */
+       domain_flush_complete(domain);
+
+       /* decrease reference counters - needs to happen after the flushes */
+       domain->dev_iommu[iommu->index] -= 1;
+       domain->dev_cnt                 -= 1;
 }
 
 /*
@@ -2617,13 +2624,13 @@ out_unmap:
                        bus_addr  = address + s->dma_address + (j << PAGE_SHIFT);
                        iommu_unmap_page(domain, bus_addr, PAGE_SIZE);
 
-                       if (--mapped_pages)
+                       if (--mapped_pages == 0)
                                goto out_free_iova;
                }
        }
 
 out_free_iova:
-       free_iova_fast(&dma_dom->iovad, address, npages);
+       free_iova_fast(&dma_dom->iovad, address >> PAGE_SHIFT, npages);
 
 out_err:
        return 0;
index 2bd9ac2..78188bf 100644 (file)
@@ -363,7 +363,7 @@ static int dmar_map_gfx = 1;
 static int dmar_forcedac;
 static int intel_iommu_strict;
 static int intel_iommu_superpage = 1;
-static int intel_iommu_sm = 1;
+static int intel_iommu_sm;
 static int iommu_identity_mapping;
 
 #define IDENTMAP_ALL           1
@@ -456,9 +456,9 @@ static int __init intel_iommu_setup(char *str)
                } else if (!strncmp(str, "sp_off", 6)) {
                        pr_info("Disable supported super page\n");
                        intel_iommu_superpage = 0;
-               } else if (!strncmp(str, "sm_off", 6)) {
-                       pr_info("Intel-IOMMU: disable scalable mode support\n");
-                       intel_iommu_sm = 0;
+               } else if (!strncmp(str, "sm_on", 5)) {
+                       pr_info("Intel-IOMMU: scalable mode supported\n");
+                       intel_iommu_sm = 1;
                } else if (!strncmp(str, "tboot_noforce", 13)) {
                        printk(KERN_INFO
                                "Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
@@ -5294,7 +5294,7 @@ static void intel_iommu_put_resv_regions(struct device *dev,
        struct iommu_resv_region *entry, *next;
 
        list_for_each_entry_safe(entry, next, head, list) {
-               if (entry->type == IOMMU_RESV_RESERVED)
+               if (entry->type == IOMMU_RESV_MSI)
                        kfree(entry);
        }
 }
index 730f7da..7e0df67 100644 (file)
@@ -441,6 +441,10 @@ static int mtk_iommu_add_device(struct device *dev)
                iommu_spec.args_count = count;
 
                mtk_iommu_create_mapping(dev, &iommu_spec);
+
+               /* dev->iommu_fwspec might have changed */
+               fwspec = dev_iommu_fwspec_get(dev);
+
                of_node_put(iommu_spec.np);
        }
 
index 7f2a454..c3aba3f 100644 (file)
@@ -97,9 +97,14 @@ struct its_device;
  * The ITS structure - contains most of the infrastructure, with the
  * top-level MSI domain, the command queue, the collections, and the
  * list of devices writing to it.
+ *
+ * dev_alloc_lock has to be taken for device allocations, while the
+ * spinlock must be taken to parse data structures such as the device
+ * list.
  */
 struct its_node {
        raw_spinlock_t          lock;
+       struct mutex            dev_alloc_lock;
        struct list_head        entry;
        void __iomem            *base;
        phys_addr_t             phys_base;
@@ -156,6 +161,7 @@ struct its_device {
        void                    *itt;
        u32                     nr_ites;
        u32                     device_id;
+       bool                    shared;
 };
 
 static struct {
@@ -1580,6 +1586,9 @@ static unsigned long *its_lpi_alloc(int nr_irqs, u32 *base, int *nr_ids)
                nr_irqs /= 2;
        } while (nr_irqs > 0);
 
+       if (!nr_irqs)
+               err = -ENOSPC;
+
        if (err)
                goto out;
 
@@ -2059,6 +2068,29 @@ static int __init allocate_lpi_tables(void)
        return 0;
 }
 
+static u64 its_clear_vpend_valid(void __iomem *vlpi_base)
+{
+       u32 count = 1000000;    /* 1s! */
+       bool clean;
+       u64 val;
+
+       val = gits_read_vpendbaser(vlpi_base + GICR_VPENDBASER);
+       val &= ~GICR_VPENDBASER_Valid;
+       gits_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER);
+
+       do {
+               val = gits_read_vpendbaser(vlpi_base + GICR_VPENDBASER);
+               clean = !(val & GICR_VPENDBASER_Dirty);
+               if (!clean) {
+                       count--;
+                       cpu_relax();
+                       udelay(1);
+               }
+       } while (!clean && count);
+
+       return val;
+}
+
 static void its_cpu_init_lpis(void)
 {
        void __iomem *rbase = gic_data_rdist_rd_base();
@@ -2144,6 +2176,30 @@ static void its_cpu_init_lpis(void)
        val |= GICR_CTLR_ENABLE_LPIS;
        writel_relaxed(val, rbase + GICR_CTLR);
 
+       if (gic_rdists->has_vlpis) {
+               void __iomem *vlpi_base = gic_data_rdist_vlpi_base();
+
+               /*
+                * It's possible for CPU to receive VLPIs before it is
+                * sheduled as a vPE, especially for the first CPU, and the
+                * VLPI with INTID larger than 2^(IDbits+1) will be considered
+                * as out of range and dropped by GIC.
+                * So we initialize IDbits to known value to avoid VLPI drop.
+                */
+               val = (LPI_NRBITS - 1) & GICR_VPROPBASER_IDBITS_MASK;
+               pr_debug("GICv4: CPU%d: Init IDbits to 0x%llx for GICR_VPROPBASER\n",
+                       smp_processor_id(), val);
+               gits_write_vpropbaser(val, vlpi_base + GICR_VPROPBASER);
+
+               /*
+                * Also clear Valid bit of GICR_VPENDBASER, in case some
+                * ancient programming gets left in and has possibility of
+                * corrupting memory.
+                */
+               val = its_clear_vpend_valid(vlpi_base);
+               WARN_ON(val & GICR_VPENDBASER_Dirty);
+       }
+
        /* Make sure the GIC has seen the above */
        dsb(sy);
 out:
@@ -2422,6 +2478,7 @@ static int its_msi_prepare(struct irq_domain *domain, struct device *dev,
        struct its_device *its_dev;
        struct msi_domain_info *msi_info;
        u32 dev_id;
+       int err = 0;
 
        /*
         * We ignore "dev" entierely, and rely on the dev_id that has
@@ -2444,6 +2501,7 @@ static int its_msi_prepare(struct irq_domain *domain, struct device *dev,
                return -EINVAL;
        }
 
+       mutex_lock(&its->dev_alloc_lock);
        its_dev = its_find_device(its, dev_id);
        if (its_dev) {
                /*
@@ -2451,18 +2509,22 @@ static int its_msi_prepare(struct irq_domain *domain, struct device *dev,
                 * another alias (PCI bridge of some sort). No need to
                 * create the device.
                 */
+               its_dev->shared = true;
                pr_debug("Reusing ITT for devID %x\n", dev_id);
                goto out;
        }
 
        its_dev = its_create_device(its, dev_id, nvec, true);
-       if (!its_dev)
-               return -ENOMEM;
+       if (!its_dev) {
+               err = -ENOMEM;
+               goto out;
+       }
 
        pr_debug("ITT %d entries, %d bits\n", nvec, ilog2(nvec));
 out:
+       mutex_unlock(&its->dev_alloc_lock);
        info->scratchpad[0].ptr = its_dev;
-       return 0;
+       return err;
 }
 
 static struct msi_domain_ops its_msi_domain_ops = {
@@ -2566,6 +2628,7 @@ static void its_irq_domain_free(struct irq_domain *domain, unsigned int virq,
 {
        struct irq_data *d = irq_domain_get_irq_data(domain, virq);
        struct its_device *its_dev = irq_data_get_irq_chip_data(d);
+       struct its_node *its = its_dev->its;
        int i;
 
        for (i = 0; i < nr_irqs; i++) {
@@ -2580,8 +2643,14 @@ static void its_irq_domain_free(struct irq_domain *domain, unsigned int virq,
                irq_domain_reset_irq_data(data);
        }
 
-       /* If all interrupts have been freed, start mopping the floor */
-       if (bitmap_empty(its_dev->event_map.lpi_map,
+       mutex_lock(&its->dev_alloc_lock);
+
+       /*
+        * If all interrupts have been freed, start mopping the
+        * floor. This is conditionned on the device not being shared.
+        */
+       if (!its_dev->shared &&
+           bitmap_empty(its_dev->event_map.lpi_map,
                         its_dev->event_map.nr_lpis)) {
                its_lpi_free(its_dev->event_map.lpi_map,
                             its_dev->event_map.lpi_base,
@@ -2593,6 +2662,8 @@ static void its_irq_domain_free(struct irq_domain *domain, unsigned int virq,
                its_free_device(its_dev);
        }
 
+       mutex_unlock(&its->dev_alloc_lock);
+
        irq_domain_free_irqs_parent(domain, virq, nr_irqs);
 }
 
@@ -2755,26 +2826,11 @@ static void its_vpe_schedule(struct its_vpe *vpe)
 static void its_vpe_deschedule(struct its_vpe *vpe)
 {
        void __iomem *vlpi_base = gic_data_rdist_vlpi_base();
-       u32 count = 1000000;    /* 1s! */
-       bool clean;
        u64 val;
 
-       /* We're being scheduled out */
-       val = gits_read_vpendbaser(vlpi_base + GICR_VPENDBASER);
-       val &= ~GICR_VPENDBASER_Valid;
-       gits_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER);
-
-       do {
-               val = gits_read_vpendbaser(vlpi_base + GICR_VPENDBASER);
-               clean = !(val & GICR_VPENDBASER_Dirty);
-               if (!clean) {
-                       count--;
-                       cpu_relax();
-                       udelay(1);
-               }
-       } while (!clean && count);
+       val = its_clear_vpend_valid(vlpi_base);
 
-       if (unlikely(!clean && !count)) {
+       if (unlikely(val & GICR_VPENDBASER_Dirty)) {
                pr_err_ratelimited("ITS virtual pending table not cleaning\n");
                vpe->idai = false;
                vpe->pending_last = true;
@@ -3517,6 +3573,7 @@ static int __init its_probe_one(struct resource *res,
        }
 
        raw_spin_lock_init(&its->lock);
+       mutex_init(&its->dev_alloc_lock);
        INIT_LIST_HEAD(&its->entry);
        INIT_LIST_HEAD(&its->its_device_list);
        typer = gic_read_typer(its_base + GITS_TYPER);
index 25f32e1..3496b61 100644 (file)
@@ -34,6 +34,9 @@
 #define SEL_INT_PENDING                (1 << 6)
 #define SEL_INT_NUM_MASK       0x3f
 
+#define MMP2_ICU_INT_ROUTE_PJ4_IRQ     (1 << 5)
+#define MMP2_ICU_INT_ROUTE_PJ4_FIQ     (1 << 6)
+
 struct icu_chip_data {
        int                     nr_irqs;
        unsigned int            virq_base;
@@ -190,7 +193,8 @@ static const struct mmp_intc_conf mmp_conf = {
 static const struct mmp_intc_conf mmp2_conf = {
        .conf_enable    = 0x20,
        .conf_disable   = 0x0,
-       .conf_mask      = 0x7f,
+       .conf_mask      = MMP2_ICU_INT_ROUTE_PJ4_IRQ |
+                         MMP2_ICU_INT_ROUTE_PJ4_FIQ,
 };
 
 static void __exception_irq_entry mmp_handle_irq(struct pt_regs *regs)
index 5385f57..2793333 100644 (file)
@@ -71,14 +71,17 @@ static void xtensa_mx_irq_mask(struct irq_data *d)
        unsigned int mask = 1u << d->hwirq;
 
        if (mask & (XCHAL_INTTYPE_MASK_EXTERN_EDGE |
-                               XCHAL_INTTYPE_MASK_EXTERN_LEVEL)) {
-               set_er(1u << (xtensa_get_ext_irq_no(d->hwirq) -
-                                       HW_IRQ_MX_BASE), MIENG);
-       } else {
-               mask = __this_cpu_read(cached_irq_mask) & ~mask;
-               __this_cpu_write(cached_irq_mask, mask);
-               xtensa_set_sr(mask, intenable);
+                   XCHAL_INTTYPE_MASK_EXTERN_LEVEL)) {
+               unsigned int ext_irq = xtensa_get_ext_irq_no(d->hwirq);
+
+               if (ext_irq >= HW_IRQ_MX_BASE) {
+                       set_er(1u << (ext_irq - HW_IRQ_MX_BASE), MIENG);
+                       return;
+               }
        }
+       mask = __this_cpu_read(cached_irq_mask) & ~mask;
+       __this_cpu_write(cached_irq_mask, mask);
+       xtensa_set_sr(mask, intenable);
 }
 
 static void xtensa_mx_irq_unmask(struct irq_data *d)
@@ -86,14 +89,17 @@ static void xtensa_mx_irq_unmask(struct irq_data *d)
        unsigned int mask = 1u << d->hwirq;
 
        if (mask & (XCHAL_INTTYPE_MASK_EXTERN_EDGE |
-                               XCHAL_INTTYPE_MASK_EXTERN_LEVEL)) {
-               set_er(1u << (xtensa_get_ext_irq_no(d->hwirq) -
-                                       HW_IRQ_MX_BASE), MIENGSET);
-       } else {
-               mask |= __this_cpu_read(cached_irq_mask);
-               __this_cpu_write(cached_irq_mask, mask);
-               xtensa_set_sr(mask, intenable);
+                   XCHAL_INTTYPE_MASK_EXTERN_LEVEL)) {
+               unsigned int ext_irq = xtensa_get_ext_irq_no(d->hwirq);
+
+               if (ext_irq >= HW_IRQ_MX_BASE) {
+                       set_er(1u << (ext_irq - HW_IRQ_MX_BASE), MIENGSET);
+                       return;
+               }
        }
+       mask |= __this_cpu_read(cached_irq_mask);
+       __this_cpu_write(cached_irq_mask, mask);
+       xtensa_set_sr(mask, intenable);
 }
 
 static void xtensa_mx_irq_enable(struct irq_data *d)
@@ -113,7 +119,11 @@ static void xtensa_mx_irq_ack(struct irq_data *d)
 
 static int xtensa_mx_irq_retrigger(struct irq_data *d)
 {
-       xtensa_set_sr(1 << d->hwirq, intset);
+       unsigned int mask = 1u << d->hwirq;
+
+       if (WARN_ON(mask & ~XCHAL_INTTYPE_MASK_SOFTWARE))
+               return 0;
+       xtensa_set_sr(mask, intset);
        return 1;
 }
 
index c200234..ab12328 100644 (file)
@@ -70,7 +70,11 @@ static void xtensa_irq_ack(struct irq_data *d)
 
 static int xtensa_irq_retrigger(struct irq_data *d)
 {
-       xtensa_set_sr(1 << d->hwirq, intset);
+       unsigned int mask = 1u << d->hwirq;
+
+       if (WARN_ON(mask & ~XCHAL_INTTYPE_MASK_SOFTWARE))
+               return 0;
+       xtensa_set_sr(mask, intset);
        return 1;
 }
 
index ab0b63a..e1de8b1 100644 (file)
@@ -633,7 +633,7 @@ gigaset_tty_ioctl(struct tty_struct *tty, struct file *file,
                        flush_send_queue(cs);
                        break;
                }
-               /* Pass through */
+               /* fall through */
 
        default:
                /* pass through to underlying serial device */
index 81dd465..71a8312 100644 (file)
@@ -656,7 +656,7 @@ hfcpci_fill_fifo(struct BCState *bcs)
                                schedule_event(bcs, B_ACKPENDING);
                        }
 
-                       dev_kfree_skb_any(bcs->tx_skb);
+                       dev_consume_skb_any(bcs->tx_skb);
                        bcs->tx_skb = skb_dequeue(&bcs->squeue);        /* fetch next data */
                }
                test_and_clear_bit(BC_FLG_BUSY, &bcs->Flag);
index dc1cded..43700fc 100644 (file)
@@ -3642,7 +3642,7 @@ isdn_tty_edit_at(const char *p, int count, modem_info *info)
                                                break;
                                        } else
                                                m->mdmcmdl = 0;
-                                       /* Fall through, check for 'A' */
+                                       /* Fall through - check for 'A' */
                                case 0:
                                        if (c == 'A') {
                                                m->mdmcmd[m->mdmcmdl] = c;
index 2a5f666..d11fe76 100644 (file)
@@ -354,7 +354,7 @@ EncodeMatrix(unsigned char *buf, int len, unsigned char *m, int mlen)
                                printk(KERN_WARNING "isdn_v110 (EncodeMatrix): buffer full!\n");
                                return line;
                        }
-                       /* else: fall through */
+                       /* fall through */
                case 128:
                        m[line] = 128;  /* leftmost -> set byte to 1000000 */
                        mbit = 64;      /* current bit in the matrix line */
index 211ed6c..5789787 100644 (file)
@@ -170,8 +170,8 @@ dev_expire_timer(struct timer_list *t)
        spin_lock_irqsave(&timer->dev->lock, flags);
        if (timer->id >= 0)
                list_move_tail(&timer->list, &timer->dev->expired);
-       spin_unlock_irqrestore(&timer->dev->lock, flags);
        wake_up_interruptible(&timer->dev->wait);
+       spin_unlock_irqrestore(&timer->dev->lock, flags);
 }
 
 static int
index 47d4e0d..dd538e6 100644 (file)
@@ -932,7 +932,7 @@ static int dm_crypt_integrity_io_alloc(struct dm_crypt_io *io, struct bio *bio)
        if (IS_ERR(bip))
                return PTR_ERR(bip);
 
-       tag_len = io->cc->on_disk_tag_size * bio_sectors(bio);
+       tag_len = io->cc->on_disk_tag_size * (bio_sectors(bio) >> io->cc->sector_shift);
 
        bip->bip_iter.bi_size = tag_len;
        bip->bip_iter.bi_sector = io->cc->start + io->sector;
index 4eb5f8c..a20531e 100644 (file)
@@ -131,7 +131,7 @@ static void rq_end_stats(struct mapped_device *md, struct request *orig)
 static void rq_completed(struct mapped_device *md)
 {
        /* nudge anyone waiting on suspend queue */
-       if (unlikely(waitqueue_active(&md->wait)))
+       if (unlikely(wq_has_sleeper(&md->wait)))
                wake_up(&md->wait);
 
        /*
index ca8af21..e83b636 100644 (file)
@@ -257,6 +257,7 @@ struct pool {
 
        spinlock_t lock;
        struct bio_list deferred_flush_bios;
+       struct bio_list deferred_flush_completions;
        struct list_head prepared_mappings;
        struct list_head prepared_discards;
        struct list_head prepared_discards_pt2;
@@ -956,6 +957,39 @@ static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m)
        mempool_free(m, &m->tc->pool->mapping_pool);
 }
 
+static void complete_overwrite_bio(struct thin_c *tc, struct bio *bio)
+{
+       struct pool *pool = tc->pool;
+       unsigned long flags;
+
+       /*
+        * If the bio has the REQ_FUA flag set we must commit the metadata
+        * before signaling its completion.
+        */
+       if (!bio_triggers_commit(tc, bio)) {
+               bio_endio(bio);
+               return;
+       }
+
+       /*
+        * Complete bio with an error if earlier I/O caused changes to the
+        * metadata that can't be committed, e.g, due to I/O errors on the
+        * metadata device.
+        */
+       if (dm_thin_aborted_changes(tc->td)) {
+               bio_io_error(bio);
+               return;
+       }
+
+       /*
+        * Batch together any bios that trigger commits and then issue a
+        * single commit for them in process_deferred_bios().
+        */
+       spin_lock_irqsave(&pool->lock, flags);
+       bio_list_add(&pool->deferred_flush_completions, bio);
+       spin_unlock_irqrestore(&pool->lock, flags);
+}
+
 static void process_prepared_mapping(struct dm_thin_new_mapping *m)
 {
        struct thin_c *tc = m->tc;
@@ -988,7 +1022,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m)
         */
        if (bio) {
                inc_remap_and_issue_cell(tc, m->cell, m->data_block);
-               bio_endio(bio);
+               complete_overwrite_bio(tc, bio);
        } else {
                inc_all_io_entry(tc->pool, m->cell->holder);
                remap_and_issue(tc, m->cell->holder, m->data_block);
@@ -2317,7 +2351,7 @@ static void process_deferred_bios(struct pool *pool)
 {
        unsigned long flags;
        struct bio *bio;
-       struct bio_list bios;
+       struct bio_list bios, bio_completions;
        struct thin_c *tc;
 
        tc = get_first_thin(pool);
@@ -2328,26 +2362,36 @@ static void process_deferred_bios(struct pool *pool)
        }
 
        /*
-        * If there are any deferred flush bios, we must commit
-        * the metadata before issuing them.
+        * If there are any deferred flush bios, we must commit the metadata
+        * before issuing them or signaling their completion.
         */
        bio_list_init(&bios);
+       bio_list_init(&bio_completions);
+
        spin_lock_irqsave(&pool->lock, flags);
        bio_list_merge(&bios, &pool->deferred_flush_bios);
        bio_list_init(&pool->deferred_flush_bios);
+
+       bio_list_merge(&bio_completions, &pool->deferred_flush_completions);
+       bio_list_init(&pool->deferred_flush_completions);
        spin_unlock_irqrestore(&pool->lock, flags);
 
-       if (bio_list_empty(&bios) &&
+       if (bio_list_empty(&bios) && bio_list_empty(&bio_completions) &&
            !(dm_pool_changed_this_transaction(pool->pmd) && need_commit_due_to_time(pool)))
                return;
 
        if (commit(pool)) {
+               bio_list_merge(&bios, &bio_completions);
+
                while ((bio = bio_list_pop(&bios)))
                        bio_io_error(bio);
                return;
        }
        pool->last_commit_jiffies = jiffies;
 
+       while ((bio = bio_list_pop(&bio_completions)))
+               bio_endio(bio);
+
        while ((bio = bio_list_pop(&bios)))
                generic_make_request(bio);
 }
@@ -2954,6 +2998,7 @@ static struct pool *pool_create(struct mapped_device *pool_md,
        INIT_DELAYED_WORK(&pool->no_space_timeout, do_no_space_timeout);
        spin_lock_init(&pool->lock);
        bio_list_init(&pool->deferred_flush_bios);
+       bio_list_init(&pool->deferred_flush_completions);
        INIT_LIST_HEAD(&pool->prepared_mappings);
        INIT_LIST_HEAD(&pool->prepared_discards);
        INIT_LIST_HEAD(&pool->prepared_discards_pt2);
index 2b53c38..515e6af 100644 (file)
@@ -699,7 +699,7 @@ static void end_io_acct(struct dm_io *io)
                                    true, duration, &io->stats_aux);
 
        /* nudge anyone waiting on suspend queue */
-       if (unlikely(waitqueue_active(&md->wait)))
+       if (unlikely(wq_has_sleeper(&md->wait)))
                wake_up(&md->wait);
 }
 
@@ -1336,7 +1336,11 @@ static int clone_bio(struct dm_target_io *tio, struct bio *bio,
                        return r;
        }
 
-       bio_trim(clone, sector - clone->bi_iter.bi_sector, len);
+       bio_advance(clone, to_bytes(sector - clone->bi_iter.bi_sector));
+       clone->bi_iter.bi_size = to_bytes(len);
+
+       if (bio_integrity(bio))
+               bio_integrity_trim(clone);
 
        return 0;
 }
index 1d54109..fa47249 100644 (file)
@@ -1863,6 +1863,20 @@ static void end_sync_read(struct bio *bio)
                reschedule_retry(r1_bio);
 }
 
+static void abort_sync_write(struct mddev *mddev, struct r1bio *r1_bio)
+{
+       sector_t sync_blocks = 0;
+       sector_t s = r1_bio->sector;
+       long sectors_to_go = r1_bio->sectors;
+
+       /* make sure these bits don't get cleared. */
+       do {
+               md_bitmap_end_sync(mddev->bitmap, s, &sync_blocks, 1);
+               s += sync_blocks;
+               sectors_to_go -= sync_blocks;
+       } while (sectors_to_go > 0);
+}
+
 static void end_sync_write(struct bio *bio)
 {
        int uptodate = !bio->bi_status;
@@ -1874,15 +1888,7 @@ static void end_sync_write(struct bio *bio)
        struct md_rdev *rdev = conf->mirrors[find_bio_disk(r1_bio, bio)].rdev;
 
        if (!uptodate) {
-               sector_t sync_blocks = 0;
-               sector_t s = r1_bio->sector;
-               long sectors_to_go = r1_bio->sectors;
-               /* make sure these bits doesn't get cleared. */
-               do {
-                       md_bitmap_end_sync(mddev->bitmap, s, &sync_blocks, 1);
-                       s += sync_blocks;
-                       sectors_to_go -= sync_blocks;
-               } while (sectors_to_go > 0);
+               abort_sync_write(mddev, r1_bio);
                set_bit(WriteErrorSeen, &rdev->flags);
                if (!test_and_set_bit(WantReplacement, &rdev->flags))
                        set_bit(MD_RECOVERY_NEEDED, &
@@ -2172,8 +2178,10 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio)
                     (i == r1_bio->read_disk ||
                      !test_bit(MD_RECOVERY_SYNC, &mddev->recovery))))
                        continue;
-               if (test_bit(Faulty, &conf->mirrors[i].rdev->flags))
+               if (test_bit(Faulty, &conf->mirrors[i].rdev->flags)) {
+                       abort_sync_write(mddev, r1_bio);
                        continue;
+               }
 
                bio_set_op_attrs(wbio, REQ_OP_WRITE, 0);
                if (test_bit(FailFast, &conf->mirrors[i].rdev->flags))
index ec3a5ef..cbbe6b6 100644 (file)
@@ -1935,12 +1935,14 @@ out:
 }
 
 static struct stripe_head *
-r5c_recovery_alloc_stripe(struct r5conf *conf,
-                         sector_t stripe_sect)
+r5c_recovery_alloc_stripe(
+               struct r5conf *conf,
+               sector_t stripe_sect,
+               int noblock)
 {
        struct stripe_head *sh;
 
-       sh = raid5_get_active_stripe(conf, stripe_sect, 0, 1, 0);
+       sh = raid5_get_active_stripe(conf, stripe_sect, 0, noblock, 0);
        if (!sh)
                return NULL;  /* no more stripe available */
 
@@ -2150,7 +2152,7 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log,
                                                stripe_sect);
 
                if (!sh) {
-                       sh = r5c_recovery_alloc_stripe(conf, stripe_sect);
+                       sh = r5c_recovery_alloc_stripe(conf, stripe_sect, 1);
                        /*
                         * cannot get stripe from raid5_get_active_stripe
                         * try replay some stripes
@@ -2159,20 +2161,29 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log,
                                r5c_recovery_replay_stripes(
                                        cached_stripe_list, ctx);
                                sh = r5c_recovery_alloc_stripe(
-                                       conf, stripe_sect);
+                                       conf, stripe_sect, 1);
                        }
                        if (!sh) {
+                               int new_size = conf->min_nr_stripes * 2;
                                pr_debug("md/raid:%s: Increasing stripe cache size to %d to recovery data on journal.\n",
                                        mdname(mddev),
-                                       conf->min_nr_stripes * 2);
-                               raid5_set_cache_size(mddev,
-                                                    conf->min_nr_stripes * 2);
-                               sh = r5c_recovery_alloc_stripe(conf,
-                                                              stripe_sect);
+                                       new_size);
+                               ret = raid5_set_cache_size(mddev, new_size);
+                               if (conf->min_nr_stripes <= new_size / 2) {
+                                       pr_err("md/raid:%s: Cannot increase cache size, ret=%d, new_size=%d, min_nr_stripes=%d, max_nr_stripes=%d\n",
+                                               mdname(mddev),
+                                               ret,
+                                               new_size,
+                                               conf->min_nr_stripes,
+                                               conf->max_nr_stripes);
+                                       return -ENOMEM;
+                               }
+                               sh = r5c_recovery_alloc_stripe(
+                                       conf, stripe_sect, 0);
                        }
                        if (!sh) {
                                pr_err("md/raid:%s: Cannot get enough stripes due to memory pressure. Recovery failed.\n",
-                                      mdname(mddev));
+                                       mdname(mddev));
                                return -ENOMEM;
                        }
                        list_add_tail(&sh->lru, cached_stripe_list);
index 4990f03..cecea90 100644 (file)
@@ -6369,6 +6369,7 @@ raid5_show_stripe_cache_size(struct mddev *mddev, char *page)
 int
 raid5_set_cache_size(struct mddev *mddev, int size)
 {
+       int result = 0;
        struct r5conf *conf = mddev->private;
 
        if (size <= 16 || size > 32768)
@@ -6385,11 +6386,14 @@ raid5_set_cache_size(struct mddev *mddev, int size)
 
        mutex_lock(&conf->cache_size_mutex);
        while (size > conf->max_nr_stripes)
-               if (!grow_one_stripe(conf, GFP_KERNEL))
+               if (!grow_one_stripe(conf, GFP_KERNEL)) {
+                       conf->min_nr_stripes = conf->max_nr_stripes;
+                       result = -ENOMEM;
                        break;
+               }
        mutex_unlock(&conf->cache_size_mutex);
 
-       return 0;
+       return result;
 }
 EXPORT_SYMBOL(raid5_set_cache_size);
 
index f461460..76f9909 100644 (file)
@@ -1419,7 +1419,7 @@ config MFD_TPS65217
 
 config MFD_TPS68470
        bool "TI TPS68470 Power Management / LED chips"
-       depends on ACPI && I2C=y
+       depends on ACPI && PCI && I2C=y
        select MFD_CORE
        select REGMAP_I2C
        select I2C_DESIGNWARE_PLATFORM
index 1fc8ea0..ca4c9cc 100644 (file)
@@ -401,8 +401,11 @@ static void mei_io_list_flush_cl(struct list_head *head,
        struct mei_cl_cb *cb, *next;
 
        list_for_each_entry_safe(cb, next, head, list) {
-               if (cl == cb->cl)
+               if (cl == cb->cl) {
                        list_del_init(&cb->list);
+                       if (cb->fop_type == MEI_FOP_READ)
+                               mei_io_cb_free(cb);
+               }
        }
 }
 
index 23739a6..bb1ee98 100644 (file)
 #define MEI_DEV_ID_CNP_H      0xA360  /* Cannon Point H */
 #define MEI_DEV_ID_CNP_H_4    0xA364  /* Cannon Point H 4 (iTouch) */
 
+#define MEI_DEV_ID_ICP_LP     0x34E0  /* Ice Lake Point LP */
+
 /*
  * MEI HW Section
  */
index e89497f..3ab946a 100644 (file)
@@ -105,6 +105,8 @@ static const struct pci_device_id mei_me_pci_tbl[] = {
        {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_H, MEI_ME_PCH12_CFG)},
        {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_H_4, MEI_ME_PCH8_CFG)},
 
+       {MEI_PCI_DEVICE(MEI_DEV_ID_ICP_LP, MEI_ME_PCH12_CFG)},
+
        /* required last entry */
        {0, }
 };
index 2bfa3a9..744757f 100644 (file)
@@ -47,7 +47,8 @@
  * @dc: Virtio device control
  * @vpdev: VOP device which is the parent for this virtio device
  * @vr: Buffer for accessing the VRING
- * @used: Buffer for used
+ * @used_virt: Virtual address of used ring
+ * @used: DMA address of used ring
  * @used_size: Size of the used buffer
  * @reset_done: Track whether VOP reset is complete
  * @virtio_cookie: Cookie returned upon requesting a interrupt
@@ -61,6 +62,7 @@ struct _vop_vdev {
        struct mic_device_ctrl __iomem *dc;
        struct vop_device *vpdev;
        void __iomem *vr[VOP_MAX_VRINGS];
+       void *used_virt[VOP_MAX_VRINGS];
        dma_addr_t used[VOP_MAX_VRINGS];
        int used_size[VOP_MAX_VRINGS];
        struct completion reset_done;
@@ -260,12 +262,12 @@ static bool vop_notify(struct virtqueue *vq)
 static void vop_del_vq(struct virtqueue *vq, int n)
 {
        struct _vop_vdev *vdev = to_vopvdev(vq->vdev);
-       struct vring *vr = (struct vring *)(vq + 1);
        struct vop_device *vpdev = vdev->vpdev;
 
        dma_unmap_single(&vpdev->dev, vdev->used[n],
                         vdev->used_size[n], DMA_BIDIRECTIONAL);
-       free_pages((unsigned long)vr->used, get_order(vdev->used_size[n]));
+       free_pages((unsigned long)vdev->used_virt[n],
+                  get_order(vdev->used_size[n]));
        vring_del_virtqueue(vq);
        vpdev->hw_ops->iounmap(vpdev, vdev->vr[n]);
        vdev->vr[n] = NULL;
@@ -283,6 +285,26 @@ static void vop_del_vqs(struct virtio_device *dev)
                vop_del_vq(vq, idx++);
 }
 
+static struct virtqueue *vop_new_virtqueue(unsigned int index,
+                                     unsigned int num,
+                                     struct virtio_device *vdev,
+                                     bool context,
+                                     void *pages,
+                                     bool (*notify)(struct virtqueue *vq),
+                                     void (*callback)(struct virtqueue *vq),
+                                     const char *name,
+                                     void *used)
+{
+       bool weak_barriers = false;
+       struct vring vring;
+
+       vring_init(&vring, num, pages, MIC_VIRTIO_RING_ALIGN);
+       vring.used = used;
+
+       return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
+                                    notify, callback, name);
+}
+
 /*
  * This routine will assign vring's allocated in host/io memory. Code in
  * virtio_ring.c however continues to access this io memory as if it were local
@@ -302,7 +324,6 @@ static struct virtqueue *vop_find_vq(struct virtio_device *dev,
        struct _mic_vring_info __iomem *info;
        void *used;
        int vr_size, _vr_size, err, magic;
-       struct vring *vr;
        u8 type = ioread8(&vdev->desc->type);
 
        if (index >= ioread8(&vdev->desc->num_vq))
@@ -322,17 +343,7 @@ static struct virtqueue *vop_find_vq(struct virtio_device *dev,
                return ERR_PTR(-ENOMEM);
        vdev->vr[index] = va;
        memset_io(va, 0x0, _vr_size);
-       vq = vring_new_virtqueue(
-                               index,
-                               le16_to_cpu(config.num), MIC_VIRTIO_RING_ALIGN,
-                               dev,
-                               false,
-                               ctx,
-                               (void __force *)va, vop_notify, callback, name);
-       if (!vq) {
-               err = -ENOMEM;
-               goto unmap;
-       }
+
        info = va + _vr_size;
        magic = ioread32(&info->magic);
 
@@ -341,18 +352,27 @@ static struct virtqueue *vop_find_vq(struct virtio_device *dev,
                goto unmap;
        }
 
-       /* Allocate and reassign used ring now */
        vdev->used_size[index] = PAGE_ALIGN(sizeof(__u16) * 3 +
                                             sizeof(struct vring_used_elem) *
                                             le16_to_cpu(config.num));
        used = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
                                        get_order(vdev->used_size[index]));
+       vdev->used_virt[index] = used;
        if (!used) {
                err = -ENOMEM;
                dev_err(_vop_dev(vdev), "%s %d err %d\n",
                        __func__, __LINE__, err);
-               goto del_vq;
+               goto unmap;
+       }
+
+       vq = vop_new_virtqueue(index, le16_to_cpu(config.num), dev, ctx,
+                              (void __force *)va, vop_notify, callback,
+                              name, used);
+       if (!vq) {
+               err = -ENOMEM;
+               goto free_used;
        }
+
        vdev->used[index] = dma_map_single(&vpdev->dev, used,
                                            vdev->used_size[index],
                                            DMA_BIDIRECTIONAL);
@@ -360,26 +380,17 @@ static struct virtqueue *vop_find_vq(struct virtio_device *dev,
                err = -ENOMEM;
                dev_err(_vop_dev(vdev), "%s %d err %d\n",
                        __func__, __LINE__, err);
-               goto free_used;
+               goto del_vq;
        }
        writeq(vdev->used[index], &vqconfig->used_address);
-       /*
-        * To reassign the used ring here we are directly accessing
-        * struct vring_virtqueue which is a private data structure
-        * in virtio_ring.c. At the minimum, a BUILD_BUG_ON() in
-        * vring_new_virtqueue() would ensure that
-        *  (&vq->vring == (struct vring *) (&vq->vq + 1));
-        */
-       vr = (struct vring *)(vq + 1);
-       vr->used = used;
 
        vq->priv = vdev;
        return vq;
+del_vq:
+       vring_del_virtqueue(vq);
 free_used:
        free_pages((unsigned long)used,
                   get_order(vdev->used_size[index]));
-del_vq:
-       vring_del_virtqueue(vq);
 unmap:
        vpdev->hw_ops->iounmap(vpdev, vdev->vr[index]);
        return ERR_PTR(err);
@@ -581,6 +592,8 @@ static int _vop_remove_device(struct mic_device_desc __iomem *d,
        int ret = -1;
 
        if (ioread8(&dc->config_change) == MIC_VIRTIO_PARAM_DEV_REMOVE) {
+               struct device *dev = get_device(&vdev->vdev.dev);
+
                dev_dbg(&vpdev->dev,
                        "%s %d config_change %d type %d vdev %p\n",
                        __func__, __LINE__,
@@ -592,7 +605,7 @@ static int _vop_remove_device(struct mic_device_desc __iomem *d,
                iowrite8(-1, &dc->h2c_vdev_db);
                if (status & VIRTIO_CONFIG_S_DRIVER_OK)
                        wait_for_completion(&vdev->reset_done);
-               put_device(&vdev->vdev.dev);
+               put_device(dev);
                iowrite8(1, &dc->guest_ack);
                dev_dbg(&vpdev->dev, "%s %d guest_ack %d\n",
                        __func__, __LINE__, ioread8(&dc->guest_ack));
index aef1185..14f3fdb 100644 (file)
@@ -2112,7 +2112,7 @@ static void mmc_blk_mq_req_done(struct mmc_request *mrq)
                if (waiting)
                        wake_up(&mq->wait);
                else
-                       kblockd_schedule_work(&mq->complete_work);
+                       queue_work(mq->card->complete_wq, &mq->complete_work);
 
                return;
        }
@@ -2924,6 +2924,13 @@ static int mmc_blk_probe(struct mmc_card *card)
 
        mmc_fixup_device(card, mmc_blk_fixups);
 
+       card->complete_wq = alloc_workqueue("mmc_complete",
+                                       WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
+       if (unlikely(!card->complete_wq)) {
+               pr_err("Failed to create mmc completion workqueue");
+               return -ENOMEM;
+       }
+
        md = mmc_blk_alloc(card);
        if (IS_ERR(md))
                return PTR_ERR(md);
@@ -2987,6 +2994,7 @@ static void mmc_blk_remove(struct mmc_card *card)
        pm_runtime_put_noidle(&card->dev);
        mmc_blk_remove_req(md);
        dev_set_drvdata(&card->dev, NULL);
+       destroy_workqueue(card->complete_wq);
 }
 
 static int _mmc_blk_suspend(struct mmc_card *card)
index 5029352..c9e7aa5 100644 (file)
@@ -1431,6 +1431,8 @@ static int bcm2835_probe(struct platform_device *pdev)
 
 err:
        dev_dbg(dev, "%s -> err %d\n", __func__, ret);
+       if (host->dma_chan_rxtx)
+               dma_release_channel(host->dma_chan_rxtx);
        mmc_free_host(mmc);
 
        return ret;
index f19ec60..2eba507 100644 (file)
@@ -1338,7 +1338,8 @@ static int meson_mmc_probe(struct platform_device *pdev)
               host->regs + SD_EMMC_IRQ_EN);
 
        ret = request_threaded_irq(host->irq, meson_mmc_irq,
-                       meson_mmc_irq_thread, IRQF_SHARED, NULL, host);
+                                  meson_mmc_irq_thread, IRQF_SHARED,
+                                  dev_name(&pdev->dev), host);
        if (ret)
                goto err_init_clk;
 
index 8afeaf8..833ef05 100644 (file)
@@ -846,7 +846,7 @@ static void msdc_set_mclk(struct msdc_host *host, unsigned char timing, u32 hz)
 
        if (timing == MMC_TIMING_MMC_HS400 &&
            host->dev_comp->hs400_tune)
-               sdr_set_field(host->base + PAD_CMD_TUNE,
+               sdr_set_field(host->base + tune_reg,
                              MSDC_PAD_TUNE_CMDRRDLY,
                              host->hs400_cmd_int_delay);
        dev_dbg(host->dev, "sclk: %d, timing: %d\n", host->mmc->actual_clock,
index 279e326..70fadc9 100644 (file)
@@ -1399,13 +1399,37 @@ static int sunxi_mmc_probe(struct platform_device *pdev)
        mmc->caps              |= MMC_CAP_MMC_HIGHSPEED | MMC_CAP_SD_HIGHSPEED |
                                  MMC_CAP_ERASE | MMC_CAP_SDIO_IRQ;
 
-       if (host->cfg->clk_delays || host->use_new_timings)
+       /*
+        * Some H5 devices do not have signal traces precise enough to
+        * use HS DDR mode for their eMMC chips.
+        *
+        * We still enable HS DDR modes for all the other controller
+        * variants that support them.
+        */
+       if ((host->cfg->clk_delays || host->use_new_timings) &&
+           !of_device_is_compatible(pdev->dev.of_node,
+                                    "allwinner,sun50i-h5-emmc"))
                mmc->caps      |= MMC_CAP_1_8V_DDR | MMC_CAP_3_3V_DDR;
 
        ret = mmc_of_parse(mmc);
        if (ret)
                goto error_free_dma;
 
+       /*
+        * If we don't support delay chains in the SoC, we can't use any
+        * of the higher speed modes. Mask them out in case the device
+        * tree specifies the properties for them, which gets added to
+        * the caps by mmc_of_parse() above.
+        */
+       if (!(host->cfg->clk_delays || host->use_new_timings)) {
+               mmc->caps &= ~(MMC_CAP_3_3V_DDR | MMC_CAP_1_8V_DDR |
+                              MMC_CAP_1_2V_DDR | MMC_CAP_UHS);
+               mmc->caps2 &= ~MMC_CAP2_HS200;
+       }
+
+       /* TODO: This driver doesn't support HS400 mode yet */
+       mmc->caps2 &= ~MMC_CAP2_HS400;
+
        ret = sunxi_mmc_init_host(host);
        if (ret)
                goto error_free_dma;
index 60104e1..37f174c 100644 (file)
@@ -480,6 +480,10 @@ static struct mtd_part *allocate_partition(struct mtd_info *parent,
                /* let's register it anyway to preserve ordering */
                slave->offset = 0;
                slave->mtd.size = 0;
+
+               /* Initialize ->erasesize to make add_mtd_device() happy. */
+               slave->mtd.erasesize = parent->erasesize;
+
                printk(KERN_ERR"mtd: partition \"%s\" is out of reach -- disabled\n",
                        part->name);
                goto out_register;
@@ -632,7 +636,6 @@ err_remove_part:
        mutex_unlock(&mtd_partitions_mutex);
 
        free_partition(new);
-       pr_info("%s:%i\n", __func__, __LINE__);
 
        return ret;
 }
index bd4cfac..a4768df 100644 (file)
@@ -155,9 +155,10 @@ int gpmi_init(struct gpmi_nand_data *this)
 
        /*
         * Reset BCH here, too. We got failures otherwise :(
-        * See later BCH reset for explanation of MX23 handling
+        * See later BCH reset for explanation of MX23 and MX28 handling
         */
-       ret = gpmi_reset_block(r->bch_regs, GPMI_IS_MX23(this));
+       ret = gpmi_reset_block(r->bch_regs,
+                              GPMI_IS_MX23(this) || GPMI_IS_MX28(this));
        if (ret)
                goto err_out;
 
@@ -263,12 +264,10 @@ int bch_set_geometry(struct gpmi_nand_data *this)
        /*
        * Due to erratum #2847 of the MX23, the BCH cannot be soft reset on this
        * chip, otherwise it will lock up. So we skip resetting BCH on the MX23.
-       * On the other hand, the MX28 needs the reset, because one case has been
-       * seen where the BCH produced ECC errors constantly after 10000
-       * consecutive reboots. The latter case has not been seen on the MX23
-       * yet, still we don't know if it could happen there as well.
+       * and MX28.
        */
-       ret = gpmi_reset_block(r->bch_regs, GPMI_IS_MX23(this));
+       ret = gpmi_reset_block(r->bch_regs,
+                              GPMI_IS_MX23(this) || GPMI_IS_MX28(this));
        if (ret)
                goto err_out;
 
index cca4b24..839494a 100644 (file)
@@ -410,6 +410,7 @@ static int nand_check_wp(struct nand_chip *chip)
 
 /**
  * nand_fill_oob - [INTERN] Transfer client buffer to oob
+ * @chip: NAND chip object
  * @oob: oob data buffer
  * @len: oob data write length
  * @ops: oob ops structure
index 1b722fe..19a2b56 100644 (file)
@@ -158,7 +158,7 @@ static u32 add_marker_len(struct nand_bbt_descr *td)
 
 /**
  * read_bbt - [GENERIC] Read the bad block table starting from page
- * @chip: NAND chip object
+ * @this: NAND chip object
  * @buf: temporary buffer
  * @page: the starting page
  * @num: the number of bbt descriptors to read
index 479c2f2..fa87ae2 100644 (file)
@@ -304,24 +304,30 @@ static int spinand_write_to_cache_op(struct spinand_device *spinand,
        struct nand_device *nand = spinand_to_nand(spinand);
        struct mtd_info *mtd = nanddev_to_mtd(nand);
        struct nand_page_io_req adjreq = *req;
-       unsigned int nbytes = 0;
-       void *buf = NULL;
+       void *buf = spinand->databuf;
+       unsigned int nbytes;
        u16 column = 0;
        int ret;
 
-       memset(spinand->databuf, 0xff,
-              nanddev_page_size(nand) +
-              nanddev_per_page_oobsize(nand));
+       /*
+        * Looks like PROGRAM LOAD (AKA write cache) does not necessarily reset
+        * the cache content to 0xFF (depends on vendor implementation), so we
+        * must fill the page cache entirely even if we only want to program
+        * the data portion of the page, otherwise we might corrupt the BBM or
+        * user data previously programmed in OOB area.
+        */
+       nbytes = nanddev_page_size(nand) + nanddev_per_page_oobsize(nand);
+       memset(spinand->databuf, 0xff, nbytes);
+       adjreq.dataoffs = 0;
+       adjreq.datalen = nanddev_page_size(nand);
+       adjreq.databuf.out = spinand->databuf;
+       adjreq.ooblen = nanddev_per_page_oobsize(nand);
+       adjreq.ooboffs = 0;
+       adjreq.oobbuf.out = spinand->oobbuf;
 
-       if (req->datalen) {
+       if (req->datalen)
                memcpy(spinand->databuf + req->dataoffs, req->databuf.out,
                       req->datalen);
-               adjreq.dataoffs = 0;
-               adjreq.datalen = nanddev_page_size(nand);
-               adjreq.databuf.out = spinand->databuf;
-               nbytes = adjreq.datalen;
-               buf = spinand->databuf;
-       }
 
        if (req->ooblen) {
                if (req->mode == MTD_OPS_AUTO_OOB)
@@ -332,14 +338,6 @@ static int spinand_write_to_cache_op(struct spinand_device *spinand,
                else
                        memcpy(spinand->oobbuf + req->ooboffs, req->oobbuf.out,
                               req->ooblen);
-
-               adjreq.ooblen = nanddev_per_page_oobsize(nand);
-               adjreq.ooboffs = 0;
-               nbytes += nanddev_per_page_oobsize(nand);
-               if (!buf) {
-                       buf = spinand->oobbuf;
-                       column = nanddev_page_size(nand);
-               }
        }
 
        spinand_cache_op_adjust_colum(spinand, &adjreq, &column);
@@ -370,8 +368,8 @@ static int spinand_write_to_cache_op(struct spinand_device *spinand,
 
                /*
                 * We need to use the RANDOM LOAD CACHE operation if there's
-                * more than one iteration, because the LOAD operation resets
-                * the cache to 0xff.
+                * more than one iteration, because the LOAD operation might
+                * reset the cache to 0xff.
                 */
                if (nbytes) {
                        column = op.addr.val;
@@ -1018,11 +1016,11 @@ static int spinand_init(struct spinand_device *spinand)
        for (i = 0; i < nand->memorg.ntargets; i++) {
                ret = spinand_select_target(spinand, i);
                if (ret)
-                       goto err_free_bufs;
+                       goto err_manuf_cleanup;
 
                ret = spinand_lock_block(spinand, BL_ALL_UNLOCKED);
                if (ret)
-                       goto err_free_bufs;
+                       goto err_manuf_cleanup;
        }
 
        ret = nanddev_init(nand, &spinand_ops, THIS_MODULE);
index edb1c02..6210757 100644 (file)
@@ -145,13 +145,16 @@ config MACVTAP
          To compile this driver as a module, choose M here: the module
          will be called macvtap.
 
+config IPVLAN_L3S
+       depends on NETFILTER
+       depends on IPVLAN
+       def_bool y
+       select NET_L3_MASTER_DEV
 
 config IPVLAN
     tristate "IP-VLAN support"
     depends on INET
     depends on IPV6 || !IPV6
-    depends on NETFILTER
-    select NET_L3_MASTER_DEV
     ---help---
       This allows one to create virtual devices off of a main interface
       and packets will be delivered based on the dest L3 (IPv6/IPv4 addr)
@@ -197,9 +200,9 @@ config VXLAN
 
 config GENEVE
        tristate "Generic Network Virtualization Encapsulation"
-       depends on INET && NET_UDP_TUNNEL
+       depends on INET
        depends on IPV6 || !IPV6
-       select NET_IP_TUNNEL
+       select NET_UDP_TUNNEL
        select GRO_CELLS
        ---help---
          This allows one to create geneve virtual interfaces that provide
index f90bb72..b3c63d2 100644 (file)
@@ -301,7 +301,7 @@ static int __init cops_probe1(struct net_device *dev, int ioaddr)
                        dev->irq = cops_irq(ioaddr, board);
                        if (dev->irq)
                                break;
-                       /* No IRQ found on this port, fallthrough */
+                       /* fall through - Once no IRQ found on this port. */
                case 1:
                        retval = -EINVAL;
                        goto err_out;
index 4d5d01c..da1fc17 100644 (file)
@@ -1375,6 +1375,7 @@ static int bond_option_slaves_set(struct bonding *bond,
        sscanf(newval->string, "%16s", command); /* IFNAMSIZ*/
        ifname = command + 1;
        if ((strlen(command) <= 1) ||
+           (command[0] != '+' && command[0] != '-') ||
            !dev_valid_name(ifname))
                goto err_no_cmd;
 
@@ -1398,6 +1399,7 @@ static int bond_option_slaves_set(struct bonding *bond,
                break;
 
        default:
+               /* should not run here. */
                goto err_no_cmd;
        }
 
index d28a139..7608bc3 100644 (file)
@@ -73,35 +73,37 @@ MODULE_PARM_DESC(spi_down_tail_align, "SPI downlink tail alignment.");
 #define LOW_WATER_MARK   100
 #define HIGH_WATER_MARK  (LOW_WATER_MARK*5)
 
-#ifdef CONFIG_UML
+#ifndef CONFIG_HAS_DMA
 
 /*
  * We sometimes use UML for debugging, but it cannot handle
  * dma_alloc_coherent so we have to wrap it.
  */
-static inline void *dma_alloc(dma_addr_t *daddr)
+static inline void *dma_alloc(struct cfspi *cfspi, dma_addr_t *daddr)
 {
        return kmalloc(SPI_DMA_BUF_LEN, GFP_KERNEL);
 }
 
-static inline void dma_free(void *cpu_addr, dma_addr_t handle)
+static inline void dma_free(struct cfspi *cfspi, void *cpu_addr,
+               dma_addr_t handle)
 {
        kfree(cpu_addr);
 }
 
 #else
 
-static inline void *dma_alloc(dma_addr_t *daddr)
+static inline void *dma_alloc(struct cfspi *cfspi, dma_addr_t *daddr)
 {
-       return dma_alloc_coherent(NULL, SPI_DMA_BUF_LEN, daddr,
+       return dma_alloc_coherent(&cfspi->pdev->dev, SPI_DMA_BUF_LEN, daddr,
                                GFP_KERNEL);
 }
 
-static inline void dma_free(void *cpu_addr, dma_addr_t handle)
+static inline void dma_free(struct cfspi *cfspi, void *cpu_addr,
+               dma_addr_t handle)
 {
-       dma_free_coherent(NULL, SPI_DMA_BUF_LEN, cpu_addr, handle);
+       dma_free_coherent(&cfspi->pdev->dev, SPI_DMA_BUF_LEN, cpu_addr, handle);
 }
-#endif /* CONFIG_UML */
+#endif /* CONFIG_HAS_DMA */
 
 #ifdef CONFIG_DEBUG_FS
 
@@ -610,13 +612,13 @@ static int cfspi_init(struct net_device *dev)
        }
 
        /* Allocate DMA buffers. */
-       cfspi->xfer.va_tx[0] = dma_alloc(&cfspi->xfer.pa_tx[0]);
+       cfspi->xfer.va_tx[0] = dma_alloc(cfspi, &cfspi->xfer.pa_tx[0]);
        if (!cfspi->xfer.va_tx[0]) {
                res = -ENODEV;
                goto err_dma_alloc_tx_0;
        }
 
-       cfspi->xfer.va_rx = dma_alloc(&cfspi->xfer.pa_rx);
+       cfspi->xfer.va_rx = dma_alloc(cfspi, &cfspi->xfer.pa_rx);
 
        if (!cfspi->xfer.va_rx) {
                res = -ENODEV;
@@ -665,9 +667,9 @@ static int cfspi_init(struct net_device *dev)
        return 0;
 
  err_create_wq:
-       dma_free(cfspi->xfer.va_rx, cfspi->xfer.pa_rx);
+       dma_free(cfspi, cfspi->xfer.va_rx, cfspi->xfer.pa_rx);
  err_dma_alloc_rx:
-       dma_free(cfspi->xfer.va_tx[0], cfspi->xfer.pa_tx[0]);
+       dma_free(cfspi, cfspi->xfer.va_tx[0], cfspi->xfer.pa_tx[0]);
  err_dma_alloc_tx_0:
        return res;
 }
@@ -683,8 +685,8 @@ static void cfspi_uninit(struct net_device *dev)
 
        cfspi->ndev = NULL;
        /* Free DMA buffers. */
-       dma_free(cfspi->xfer.va_rx, cfspi->xfer.pa_rx);
-       dma_free(cfspi->xfer.va_tx[0], cfspi->xfer.pa_tx[0]);
+       dma_free(cfspi, cfspi->xfer.va_rx, cfspi->xfer.pa_rx);
+       dma_free(cfspi, cfspi->xfer.va_tx[0], cfspi->xfer.pa_tx[0]);
        set_bit(SPI_TERMINATE, &cfspi->state);
        wake_up_interruptible(&cfspi->wait);
        destroy_workqueue(cfspi->wq);
index 90f5142..d9c56a7 100644 (file)
@@ -511,9 +511,6 @@ static void b53_srab_prepare_irq(struct platform_device *pdev)
        /* Clear all pending interrupts */
        writel(0xffffffff, priv->regs + B53_SRAB_INTR);
 
-       if (dev->pdata && dev->pdata->chip_id != BCM58XX_DEVICE_ID)
-               return;
-
        for (i = 0; i < B53_N_PORTS; i++) {
                port = &priv->port_intrs[i];
 
index 361fbde..98696a8 100644 (file)
@@ -690,7 +690,7 @@ static int bcm_sf2_sw_suspend(struct dsa_switch *ds)
         * port, the other ones have already been disabled during
         * bcm_sf2_sw_setup
         */
-       for (port = 0; port < DSA_MAX_PORTS; port++) {
+       for (port = 0; port < ds->num_ports; port++) {
                if (dsa_is_user_port(ds, port) || dsa_is_cpu_port(ds, port))
                        bcm_sf2_port_disable(ds, port, NULL);
        }
@@ -894,12 +894,44 @@ static const struct b53_io_ops bcm_sf2_io_ops = {
        .write64 = bcm_sf2_core_write64,
 };
 
+static void bcm_sf2_sw_get_strings(struct dsa_switch *ds, int port,
+                                  u32 stringset, uint8_t *data)
+{
+       int cnt = b53_get_sset_count(ds, port, stringset);
+
+       b53_get_strings(ds, port, stringset, data);
+       bcm_sf2_cfp_get_strings(ds, port, stringset,
+                               data + cnt * ETH_GSTRING_LEN);
+}
+
+static void bcm_sf2_sw_get_ethtool_stats(struct dsa_switch *ds, int port,
+                                        uint64_t *data)
+{
+       int cnt = b53_get_sset_count(ds, port, ETH_SS_STATS);
+
+       b53_get_ethtool_stats(ds, port, data);
+       bcm_sf2_cfp_get_ethtool_stats(ds, port, data + cnt);
+}
+
+static int bcm_sf2_sw_get_sset_count(struct dsa_switch *ds, int port,
+                                    int sset)
+{
+       int cnt = b53_get_sset_count(ds, port, sset);
+
+       if (cnt < 0)
+               return cnt;
+
+       cnt += bcm_sf2_cfp_get_sset_count(ds, port, sset);
+
+       return cnt;
+}
+
 static const struct dsa_switch_ops bcm_sf2_ops = {
        .get_tag_protocol       = b53_get_tag_protocol,
        .setup                  = bcm_sf2_sw_setup,
-       .get_strings            = b53_get_strings,
-       .get_ethtool_stats      = b53_get_ethtool_stats,
-       .get_sset_count         = b53_get_sset_count,
+       .get_strings            = bcm_sf2_sw_get_strings,
+       .get_ethtool_stats      = bcm_sf2_sw_get_ethtool_stats,
+       .get_sset_count         = bcm_sf2_sw_get_sset_count,
        .get_ethtool_phy_stats  = b53_get_ethtool_phy_stats,
        .get_phy_flags          = bcm_sf2_sw_get_phy_flags,
        .phylink_validate       = bcm_sf2_sw_validate,
@@ -1062,7 +1094,6 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev)
        dev_set_drvdata(&pdev->dev, priv);
 
        spin_lock_init(&priv->indir_lock);
-       mutex_init(&priv->stats_mutex);
        mutex_init(&priv->cfp.lock);
        INIT_LIST_HEAD(&priv->cfp.rules_list);
 
index faaef32..eb3655b 100644 (file)
@@ -87,9 +87,6 @@ struct bcm_sf2_priv {
        /* Backing b53_device */
        struct b53_device               *dev;
 
-       /* Mutex protecting access to the MIB counters */
-       struct mutex                    stats_mutex;
-
        struct bcm_sf2_hw_params        hw_params;
 
        struct bcm_sf2_port_status      port_sts[DSA_MAX_PORTS];
@@ -216,5 +213,10 @@ int bcm_sf2_set_rxnfc(struct dsa_switch *ds, int port,
 int bcm_sf2_cfp_rst(struct bcm_sf2_priv *priv);
 void bcm_sf2_cfp_exit(struct dsa_switch *ds);
 int bcm_sf2_cfp_resume(struct dsa_switch *ds);
+void bcm_sf2_cfp_get_strings(struct dsa_switch *ds, int port,
+                            u32 stringset, uint8_t *data);
+void bcm_sf2_cfp_get_ethtool_stats(struct dsa_switch *ds, int port,
+                                  uint64_t *data);
+int bcm_sf2_cfp_get_sset_count(struct dsa_switch *ds, int port, int sset);
 
 #endif /* __BCM_SF2_H */
index 6d8059d..0b9ca4b 100644 (file)
@@ -213,6 +213,7 @@ static inline unsigned int bcm_sf2_cfp_rule_size(struct bcm_sf2_priv *priv)
 
 static int bcm_sf2_cfp_act_pol_set(struct bcm_sf2_priv *priv,
                                   unsigned int rule_index,
+                                  int src_port,
                                   unsigned int port_num,
                                   unsigned int queue_num,
                                   bool fwd_map_change)
@@ -230,6 +231,10 @@ static int bcm_sf2_cfp_act_pol_set(struct bcm_sf2_priv *priv,
        else
                reg = 0;
 
+       /* Enable looping back to the original port */
+       if (src_port == port_num)
+               reg |= LOOP_BK_EN;
+
        core_writel(priv, reg, CORE_ACT_POL_DATA0);
 
        /* Set classification ID that needs to be put in Broadcom tag */
@@ -443,7 +448,7 @@ static int bcm_sf2_cfp_ipv4_rule_set(struct bcm_sf2_priv *priv, int port,
        }
 
        /* Insert into Action and policer RAMs now */
-       ret = bcm_sf2_cfp_act_pol_set(priv, rule_index, port_num,
+       ret = bcm_sf2_cfp_act_pol_set(priv, rule_index, port, port_num,
                                      queue_num, true);
        if (ret)
                goto out_err_flow_rule;
@@ -733,7 +738,7 @@ static int bcm_sf2_cfp_ipv6_rule_set(struct bcm_sf2_priv *priv, int port,
        }
 
        /* Insert into Action and policer RAMs now */
-       ret = bcm_sf2_cfp_act_pol_set(priv, rule_index[0], port_num,
+       ret = bcm_sf2_cfp_act_pol_set(priv, rule_index[0], port, port_num,
                                      queue_num, false);
        if (ret)
                goto out_err_flow_rule;
@@ -795,7 +800,7 @@ static int bcm_sf2_cfp_ipv6_rule_set(struct bcm_sf2_priv *priv, int port,
        /* Insert into Action and policer RAMs now, set chain ID to
         * the one we are chained to
         */
-       ret = bcm_sf2_cfp_act_pol_set(priv, rule_index[1], port_num,
+       ret = bcm_sf2_cfp_act_pol_set(priv, rule_index[1], port, port_num,
                                      queue_num, true);
        if (ret)
                goto out_err_flow_rule;
@@ -1201,3 +1206,91 @@ int bcm_sf2_cfp_resume(struct dsa_switch *ds)
 
        return ret;
 }
+
+static const struct bcm_sf2_cfp_stat {
+       unsigned int offset;
+       unsigned int ram_loc;
+       const char *name;
+} bcm_sf2_cfp_stats[] = {
+       {
+               .offset = CORE_STAT_GREEN_CNTR,
+               .ram_loc = GREEN_STAT_RAM,
+               .name = "Green"
+       },
+       {
+               .offset = CORE_STAT_YELLOW_CNTR,
+               .ram_loc = YELLOW_STAT_RAM,
+               .name = "Yellow"
+       },
+       {
+               .offset = CORE_STAT_RED_CNTR,
+               .ram_loc = RED_STAT_RAM,
+               .name = "Red"
+       },
+};
+
+void bcm_sf2_cfp_get_strings(struct dsa_switch *ds, int port,
+                            u32 stringset, uint8_t *data)
+{
+       struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
+       unsigned int s = ARRAY_SIZE(bcm_sf2_cfp_stats);
+       char buf[ETH_GSTRING_LEN];
+       unsigned int i, j, iter;
+
+       if (stringset != ETH_SS_STATS)
+               return;
+
+       for (i = 1; i < priv->num_cfp_rules; i++) {
+               for (j = 0; j < s; j++) {
+                       snprintf(buf, sizeof(buf),
+                                "CFP%03d_%sCntr",
+                                i, bcm_sf2_cfp_stats[j].name);
+                       iter = (i - 1) * s + j;
+                       strlcpy(data + iter * ETH_GSTRING_LEN,
+                               buf, ETH_GSTRING_LEN);
+               }
+       }
+}
+
+void bcm_sf2_cfp_get_ethtool_stats(struct dsa_switch *ds, int port,
+                                  uint64_t *data)
+{
+       struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
+       unsigned int s = ARRAY_SIZE(bcm_sf2_cfp_stats);
+       const struct bcm_sf2_cfp_stat *stat;
+       unsigned int i, j, iter;
+       struct cfp_rule *rule;
+       int ret;
+
+       mutex_lock(&priv->cfp.lock);
+       for (i = 1; i < priv->num_cfp_rules; i++) {
+               rule = bcm_sf2_cfp_rule_find(priv, port, i);
+               if (!rule)
+                       continue;
+
+               for (j = 0; j < s; j++) {
+                       stat = &bcm_sf2_cfp_stats[j];
+
+                       bcm_sf2_cfp_rule_addr_set(priv, i);
+                       ret = bcm_sf2_cfp_op(priv, stat->ram_loc | OP_SEL_READ);
+                       if (ret)
+                               continue;
+
+                       iter = (i - 1) * s + j;
+                       data[iter] = core_readl(priv, stat->offset);
+               }
+
+       }
+       mutex_unlock(&priv->cfp.lock);
+}
+
+int bcm_sf2_cfp_get_sset_count(struct dsa_switch *ds, int port, int sset)
+{
+       struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
+
+       if (sset != ETH_SS_STATS)
+               return 0;
+
+       /* 3 counters per CFP rules */
+       return (priv->num_cfp_rules - 1) * ARRAY_SIZE(bcm_sf2_cfp_stats);
+}
index 0a1e530..67f0562 100644 (file)
@@ -400,6 +400,10 @@ enum bcm_sf2_reg_offs {
 #define CORE_RATE_METER6               0x281e0
 #define  CIR_REF_CNT_MASK              0x7ffff
 
+#define CORE_STAT_GREEN_CNTR           0x28200
+#define CORE_STAT_YELLOW_CNTR          0x28210
+#define CORE_STAT_RED_CNTR             0x28220
+
 #define CORE_CFP_CTL_REG               0x28400
 #define  CFP_EN_MAP_MASK               0x1ff
 
index 89ed059..674d77e 100644 (file)
@@ -397,6 +397,7 @@ static void ksz9477_port_stp_state_set(struct dsa_switch *ds, int port,
        struct ksz_port *p = &dev->ports[port];
        u8 data;
        int member = -1;
+       int forward = dev->member;
 
        ksz_pread8(dev, port, P_STP_CTRL, &data);
        data &= ~(PORT_TX_ENABLE | PORT_RX_ENABLE | PORT_LEARN_DISABLE);
@@ -464,10 +465,10 @@ static void ksz9477_port_stp_state_set(struct dsa_switch *ds, int port,
        }
 
        /* When topology has changed the function ksz_update_port_member
-        * should be called to modify port forwarding behavior.  However
-        * as the offload_fwd_mark indication cannot be reported here
-        * the switch forwarding function is not enabled.
+        * should be called to modify port forwarding behavior.
         */
+       if (forward != dev->member)
+               ksz_update_port_member(dev, port);
 }
 
 static void ksz9477_flush_dyn_mac_table(struct ksz_device *dev, int port)
index 669a7f1..32e7af5 100644 (file)
@@ -261,6 +261,7 @@ static irqreturn_t mv88e6xxx_g1_irq_thread_work(struct mv88e6xxx_chip *chip)
        unsigned int sub_irq;
        unsigned int n;
        u16 reg;
+       u16 ctl1;
        int err;
 
        mutex_lock(&chip->reg_lock);
@@ -270,13 +271,28 @@ static irqreturn_t mv88e6xxx_g1_irq_thread_work(struct mv88e6xxx_chip *chip)
        if (err)
                goto out;
 
-       for (n = 0; n < chip->g1_irq.nirqs; ++n) {
-               if (reg & (1 << n)) {
-                       sub_irq = irq_find_mapping(chip->g1_irq.domain, n);
-                       handle_nested_irq(sub_irq);
-                       ++nhandled;
+       do {
+               for (n = 0; n < chip->g1_irq.nirqs; ++n) {
+                       if (reg & (1 << n)) {
+                               sub_irq = irq_find_mapping(chip->g1_irq.domain,
+                                                          n);
+                               handle_nested_irq(sub_irq);
+                               ++nhandled;
+                       }
                }
-       }
+
+               mutex_lock(&chip->reg_lock);
+               err = mv88e6xxx_g1_read(chip, MV88E6XXX_G1_CTL1, &ctl1);
+               if (err)
+                       goto unlock;
+               err = mv88e6xxx_g1_read(chip, MV88E6XXX_G1_STS, &reg);
+unlock:
+               mutex_unlock(&chip->reg_lock);
+               if (err)
+                       goto out;
+               ctl1 &= GENMASK(chip->g1_irq.nirqs, 0);
+       } while (reg & ctl1);
+
 out:
        return (nhandled > 0 ? IRQ_HANDLED : IRQ_NONE);
 }
@@ -647,8 +663,10 @@ static void mv88e6390_phylink_validate(struct mv88e6xxx_chip *chip, int port,
                                       unsigned long *mask,
                                       struct phylink_link_state *state)
 {
-       if (port >= 9)
+       if (port >= 9) {
                phylink_set(mask, 2500baseX_Full);
+               phylink_set(mask, 2500baseT_Full);
+       }
 
        /* No ethtool bits for 200Mbps */
        phylink_set(mask, 1000baseT_Full);
index 5200e4b..ea24384 100644 (file)
@@ -314,6 +314,7 @@ static irqreturn_t mv88e6xxx_g1_atu_prob_irq_thread_fn(int irq, void *dev_id)
 {
        struct mv88e6xxx_chip *chip = dev_id;
        struct mv88e6xxx_atu_entry entry;
+       int spid;
        int err;
        u16 val;
 
@@ -336,6 +337,8 @@ static irqreturn_t mv88e6xxx_g1_atu_prob_irq_thread_fn(int irq, void *dev_id)
        if (err)
                goto out;
 
+       spid = entry.state;
+
        if (val & MV88E6XXX_G1_ATU_OP_AGE_OUT_VIOLATION) {
                dev_err_ratelimited(chip->dev,
                                    "ATU age out violation for %pM\n",
@@ -344,23 +347,23 @@ static irqreturn_t mv88e6xxx_g1_atu_prob_irq_thread_fn(int irq, void *dev_id)
 
        if (val & MV88E6XXX_G1_ATU_OP_MEMBER_VIOLATION) {
                dev_err_ratelimited(chip->dev,
-                                   "ATU member violation for %pM portvec %x\n",
-                                   entry.mac, entry.portvec);
-               chip->ports[entry.portvec].atu_member_violation++;
+                                   "ATU member violation for %pM portvec %x spid %d\n",
+                                   entry.mac, entry.portvec, spid);
+               chip->ports[spid].atu_member_violation++;
        }
 
        if (val & MV88E6XXX_G1_ATU_OP_MISS_VIOLATION) {
                dev_err_ratelimited(chip->dev,
-                                   "ATU miss violation for %pM portvec %x\n",
-                                   entry.mac, entry.portvec);
-               chip->ports[entry.portvec].atu_miss_violation++;
+                                   "ATU miss violation for %pM portvec %x spid %d\n",
+                                   entry.mac, entry.portvec, spid);
+               chip->ports[spid].atu_miss_violation++;
        }
 
        if (val & MV88E6XXX_G1_ATU_OP_FULL_VIOLATION) {
                dev_err_ratelimited(chip->dev,
-                                   "ATU full violation for %pM portvec %x\n",
-                                   entry.mac, entry.portvec);
-               chip->ports[entry.portvec].atu_full_violation++;
+                                   "ATU full violation for %pM portvec %x spid %d\n",
+                                   entry.mac, entry.portvec, spid);
+               chip->ports[spid].atu_full_violation++;
        }
        mutex_unlock(&chip->reg_lock);
 
index b648e3f..808abb6 100644 (file)
@@ -1177,7 +1177,7 @@ static irqreturn_t corkscrew_interrupt(int irq, void *dev_id)
                                if (inl(ioaddr + DownListPtr) == isa_virt_to_bus(&lp->tx_ring[entry]))
                                        break;  /* It still hasn't been processed. */
                                if (lp->tx_skbuff[entry]) {
-                                       dev_kfree_skb_irq(lp->tx_skbuff[entry]);
+                                       dev_consume_skb_irq(lp->tx_skbuff[entry]);
                                        lp->tx_skbuff[entry] = NULL;
                                }
                                dirty_tx++;
@@ -1192,7 +1192,7 @@ static irqreturn_t corkscrew_interrupt(int irq, void *dev_id)
 #ifdef VORTEX_BUS_MASTER
                if (status & DMADone) {
                        outw(0x1000, ioaddr + Wn7_MasterStatus);        /* Ack the event. */
-                       dev_kfree_skb_irq(lp->tx_skb);  /* Release the transferred buffer */
+                       dev_consume_skb_irq(lp->tx_skb);        /* Release the transferred buffer */
                        netif_wake_queue(dev);
                }
 #endif
index 40f421d..1470514 100644 (file)
@@ -2307,7 +2307,7 @@ _vortex_interrupt(int irq, struct net_device *dev)
                                dma_unmap_single(vp->gendev, vp->tx_skb_dma, (vp->tx_skb->len + 3) & ~3, DMA_TO_DEVICE);
                                pkts_compl++;
                                bytes_compl += vp->tx_skb->len;
-                               dev_kfree_skb_irq(vp->tx_skb); /* Release the transferred buffer */
+                               dev_consume_skb_irq(vp->tx_skb); /* Release the transferred buffer */
                                if (ioread16(ioaddr + TxFree) > 1536) {
                                        /*
                                         * AKPM: FIXME: I don't think we need this.  If the queue was stopped due to
@@ -2449,7 +2449,7 @@ _boomerang_interrupt(int irq, struct net_device *dev)
 #endif
                                        pkts_compl++;
                                        bytes_compl += skb->len;
-                                       dev_kfree_skb_irq(skb);
+                                       dev_consume_skb_irq(skb);
                                        vp->tx_skbuff[entry] = NULL;
                                } else {
                                        pr_debug("boomerang_interrupt: no skb!\n");
index 097467f..816540e 100644 (file)
@@ -1390,7 +1390,7 @@ static irqreturn_t intr_handler(int irq, void *dev_instance)
                                        }
                                }
 
-                               dev_kfree_skb_irq(skb);
+                               dev_consume_skb_irq(skb);
                        }
                        np->tx_done_q[np->tx_done].status = 0;
                        np->tx_done = (np->tx_done + 1) % DONE_Q_SIZE;
index a70bb1b..a6eacf2 100644 (file)
@@ -2663,11 +2663,6 @@ static int ena_restore_device(struct ena_adapter *adapter)
                goto err_device_destroy;
        }
 
-       clear_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags);
-       /* Make sure we don't have a race with AENQ Links state handler */
-       if (test_bit(ENA_FLAG_LINK_UP, &adapter->flags))
-               netif_carrier_on(adapter->netdev);
-
        rc = ena_enable_msix_and_set_admin_interrupts(adapter,
                                                      adapter->num_queues);
        if (rc) {
@@ -2684,6 +2679,11 @@ static int ena_restore_device(struct ena_adapter *adapter)
        }
 
        set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
+
+       clear_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags);
+       if (test_bit(ENA_FLAG_LINK_UP, &adapter->flags))
+               netif_carrier_on(adapter->netdev);
+
        mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
        dev_err(&pdev->dev,
                "Device reset completed successfully, Driver info: %s\n",
index dc8b617..6387007 100644 (file)
@@ -45,7 +45,7 @@
 
 #define DRV_MODULE_VER_MAJOR   2
 #define DRV_MODULE_VER_MINOR   0
-#define DRV_MODULE_VER_SUBMINOR 2
+#define DRV_MODULE_VER_SUBMINOR 3
 
 #define DRV_MODULE_NAME                "ena"
 #ifndef DRV_MODULE_VERSION
index e833d1b..e5073ae 100644 (file)
@@ -1167,7 +1167,7 @@ static int au1000_probe(struct platform_device *pdev)
        /* Allocate the data buffers
         * Snooping works fine with eth on all au1xxx
         */
-       aup->vaddr = (u32)dma_alloc_attrs(NULL, MAX_BUF_SIZE *
+       aup->vaddr = (u32)dma_alloc_attrs(&pdev->dev, MAX_BUF_SIZE *
                                          (NUM_TX_BUFFS + NUM_RX_BUFFS),
                                          &aup->dma_addr, 0,
                                          DMA_ATTR_NON_CONSISTENT);
@@ -1349,7 +1349,7 @@ err_remap3:
 err_remap2:
        iounmap(aup->mac);
 err_remap1:
-       dma_free_attrs(NULL, MAX_BUF_SIZE * (NUM_TX_BUFFS + NUM_RX_BUFFS),
+       dma_free_attrs(&pdev->dev, MAX_BUF_SIZE * (NUM_TX_BUFFS + NUM_RX_BUFFS),
                        (void *)aup->vaddr, aup->dma_addr,
                        DMA_ATTR_NON_CONSISTENT);
 err_vaddr:
@@ -1383,7 +1383,7 @@ static int au1000_remove(struct platform_device *pdev)
                if (aup->tx_db_inuse[i])
                        au1000_ReleaseDB(aup, aup->tx_db_inuse[i]);
 
-       dma_free_attrs(NULL, MAX_BUF_SIZE * (NUM_TX_BUFFS + NUM_RX_BUFFS),
+       dma_free_attrs(&pdev->dev, MAX_BUF_SIZE * (NUM_TX_BUFFS + NUM_RX_BUFFS),
                        (void *)aup->vaddr, aup->dma_addr,
                        DMA_ATTR_NON_CONSISTENT);
 
index b56d84c..f90b454 100644 (file)
@@ -1084,7 +1084,7 @@ static irqreturn_t lance_interrupt(int irq, void *dev_id)
                                /* We must free the original skb if it's not a data-only copy
                                   in the bounce buffer. */
                                if (lp->tx_skbuff[entry]) {
-                                       dev_kfree_skb_irq(lp->tx_skbuff[entry]);
+                                       dev_consume_skb_irq(lp->tx_skbuff[entry]);
                                        lp->tx_skbuff[entry] = NULL;
                                }
                                dirty_tx++;
index 8931ce6..87ff5d6 100644 (file)
@@ -1028,7 +1028,7 @@ static void ni65_xmit_intr(struct net_device *dev,int csr0)
 
 #ifdef XMT_VIA_SKB
                if(p->tmd_skb[p->tmdlast]) {
-                        dev_kfree_skb_irq(p->tmd_skb[p->tmdlast]);
+                        dev_consume_skb_irq(p->tmd_skb[p->tmdlast]);
                         p->tmd_skb[p->tmdlast] = NULL;
                }
 #endif
index 68b9ee4..4d9819d 100644 (file)
@@ -764,7 +764,7 @@ static irqreturn_t mace_interrupt(int irq, void *dev_id)
            dev->stats.tx_bytes += mp->tx_bufs[i]->len;
            ++dev->stats.tx_packets;
        }
-       dev_kfree_skb_irq(mp->tx_bufs[i]);
+       dev_consume_skb_irq(mp->tx_bufs[i]);
        --mp->tx_active;
        if (++i >= N_TX_RING)
            i = 0;
index 4406325..ff3d685 100644 (file)
@@ -148,7 +148,7 @@ static void arc_emac_tx_clean(struct net_device *ndev)
                                 dma_unmap_len(tx_buff, len), DMA_TO_DEVICE);
 
                /* return the sk_buff to system */
-               dev_kfree_skb_irq(skb);
+               dev_consume_skb_irq(skb);
 
                txbd->data = 0;
                txbd->info = 0;
index 3164aad..9dfe6a9 100644 (file)
@@ -1259,7 +1259,7 @@ static bool atl1e_clean_tx_irq(struct atl1e_adapter *adapter)
                }
 
                if (tx_buffer->skb) {
-                       dev_kfree_skb_irq(tx_buffer->skb);
+                       dev_consume_skb_irq(tx_buffer->skb);
                        tx_buffer->skb = NULL;
                }
 
index 63edc57..9e07b46 100644 (file)
@@ -2088,7 +2088,7 @@ static int atl1_intr_tx(struct atl1_adapter *adapter)
                }
 
                if (buffer_info->skb) {
-                       dev_kfree_skb_irq(buffer_info->skb);
+                       dev_consume_skb_irq(buffer_info->skb);
                        buffer_info->skb = NULL;
                }
 
index f9521d0..28c9b0b 100644 (file)
@@ -520,7 +520,6 @@ static void bcm_sysport_get_wol(struct net_device *dev,
                                struct ethtool_wolinfo *wol)
 {
        struct bcm_sysport_priv *priv = netdev_priv(dev);
-       u32 reg;
 
        wol->supported = WAKE_MAGIC | WAKE_MAGICSECURE | WAKE_FILTER;
        wol->wolopts = priv->wolopts;
@@ -528,11 +527,7 @@ static void bcm_sysport_get_wol(struct net_device *dev,
        if (!(priv->wolopts & WAKE_MAGICSECURE))
                return;
 
-       /* Return the programmed SecureOn password */
-       reg = umac_readl(priv, UMAC_PSW_MS);
-       put_unaligned_be16(reg, &wol->sopass[0]);
-       reg = umac_readl(priv, UMAC_PSW_LS);
-       put_unaligned_be32(reg, &wol->sopass[2]);
+       memcpy(wol->sopass, priv->sopass, sizeof(priv->sopass));
 }
 
 static int bcm_sysport_set_wol(struct net_device *dev,
@@ -548,13 +543,8 @@ static int bcm_sysport_set_wol(struct net_device *dev,
        if (wol->wolopts & ~supported)
                return -EINVAL;
 
-       /* Program the SecureOn password */
-       if (wol->wolopts & WAKE_MAGICSECURE) {
-               umac_writel(priv, get_unaligned_be16(&wol->sopass[0]),
-                           UMAC_PSW_MS);
-               umac_writel(priv, get_unaligned_be32(&wol->sopass[2]),
-                           UMAC_PSW_LS);
-       }
+       if (wol->wolopts & WAKE_MAGICSECURE)
+               memcpy(priv->sopass, wol->sopass, sizeof(priv->sopass));
 
        /* Flag the device and relevant IRQ as wakeup capable */
        if (wol->wolopts) {
@@ -2649,13 +2639,18 @@ static int bcm_sysport_suspend_to_wol(struct bcm_sysport_priv *priv)
        unsigned int index, i = 0;
        u32 reg;
 
-       /* Password has already been programmed */
        reg = umac_readl(priv, UMAC_MPD_CTRL);
        if (priv->wolopts & (WAKE_MAGIC | WAKE_MAGICSECURE))
                reg |= MPD_EN;
        reg &= ~PSW_EN;
-       if (priv->wolopts & WAKE_MAGICSECURE)
+       if (priv->wolopts & WAKE_MAGICSECURE) {
+               /* Program the SecureOn password */
+               umac_writel(priv, get_unaligned_be16(&priv->sopass[0]),
+                           UMAC_PSW_MS);
+               umac_writel(priv, get_unaligned_be32(&priv->sopass[2]),
+                           UMAC_PSW_LS);
                reg |= PSW_EN;
+       }
        umac_writel(priv, reg, UMAC_MPD_CTRL);
 
        if (priv->wolopts & WAKE_FILTER) {
index 0887e63..0b192fe 100644 (file)
@@ -12,6 +12,7 @@
 #define __BCM_SYSPORT_H
 
 #include <linux/bitmap.h>
+#include <linux/ethtool.h>
 #include <linux/if_vlan.h>
 #include <linux/net_dim.h>
 
@@ -778,6 +779,7 @@ struct bcm_sysport_priv {
        unsigned int            crc_fwd:1;
        u16                     rev;
        u32                     wolopts;
+       u8                      sopass[SOPASS_MAX];
        unsigned int            wol_irq_disabled:1;
 
        /* MIB related fields */
index 8e0a317..a9bdc21 100644 (file)
@@ -1654,13 +1654,9 @@ static int bnx2x_vf_mbx_macvlan_list(struct bnx2x *bp,
 {
        int i, j;
        struct bnx2x_vf_mac_vlan_filters *fl = NULL;
-       size_t fsz;
 
-       fsz = tlv->n_mac_vlan_filters *
-             sizeof(struct bnx2x_vf_mac_vlan_filter) +
-             sizeof(struct bnx2x_vf_mac_vlan_filters);
-
-       fl = kzalloc(fsz, GFP_KERNEL);
+       fl = kzalloc(struct_size(fl, filters, tlv->n_mac_vlan_filters),
+                    GFP_KERNEL);
        if (!fl)
                return -ENOMEM;
 
index 1c2987c..92d7345 100644 (file)
@@ -4973,12 +4973,18 @@ static int bnxt_hwrm_ring_alloc(struct bnxt *bp)
                struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
                struct bnxt_ring_struct *ring = &cpr->cp_ring_struct;
                u32 map_idx = ring->map_idx;
+               unsigned int vector;
 
+               vector = bp->irq_tbl[map_idx].vector;
+               disable_irq_nosync(vector);
                rc = hwrm_ring_alloc_send_msg(bp, ring, type, map_idx);
-               if (rc)
+               if (rc) {
+                       enable_irq(vector);
                        goto err_out;
+               }
                bnxt_set_db(bp, &cpr->cp_db, type, map_idx, ring->fw_ring_id);
                bnxt_db_nq(bp, &cpr->cp_db, cpr->cp_raw_cons);
+               enable_irq(vector);
                bp->grp_info[i].cp_fw_ring_id = ring->fw_ring_id;
 
                if (!i) {
index a6abfa4..e1feb97 100644 (file)
@@ -37,8 +37,6 @@ static const struct bnxt_dl_nvm_param nvm_params[] = {
         NVM_OFF_MSIX_VEC_PER_PF_MIN, BNXT_NVM_SHARED_CFG, 7},
        {BNXT_DEVLINK_PARAM_ID_GRE_VER_CHECK, NVM_OFF_DIS_GRE_VER_CHECK,
         BNXT_NVM_SHARED_CFG, 1},
-
-       {DEVLINK_PARAM_GENERIC_ID_WOL, NVM_OFF_WOL, BNXT_NVM_PORT_CFG, 1},
 };
 
 static int bnxt_hwrm_nvm_req(struct bnxt *bp, u32 param_id, void *msg,
@@ -72,8 +70,7 @@ static int bnxt_hwrm_nvm_req(struct bnxt *bp, u32 param_id, void *msg,
        bytesize = roundup(nvm_param.num_bits, BITS_PER_BYTE) / BITS_PER_BYTE;
        switch (bytesize) {
        case 1:
-               if (nvm_param.num_bits == 1 &&
-                   nvm_param.id != DEVLINK_PARAM_GENERIC_ID_WOL)
+               if (nvm_param.num_bits == 1)
                        buf = &val->vbool;
                else
                        buf = &val->vu8;
@@ -167,17 +164,6 @@ static int bnxt_dl_msix_validate(struct devlink *dl, u32 id,
        return 0;
 }
 
-static int bnxt_dl_wol_validate(struct devlink *dl, u32 id,
-                               union devlink_param_value val,
-                               struct netlink_ext_ack *extack)
-{
-       if (val.vu8 && val.vu8 != DEVLINK_PARAM_WAKE_MAGIC) {
-               NL_SET_ERR_MSG_MOD(extack, "WOL type is not supported");
-               return -EINVAL;
-       }
-       return 0;
-}
-
 static const struct devlink_param bnxt_dl_params[] = {
        DEVLINK_PARAM_GENERIC(ENABLE_SRIOV,
                              BIT(DEVLINK_PARAM_CMODE_PERMANENT),
@@ -203,9 +189,6 @@ static const struct devlink_param bnxt_dl_params[] = {
 };
 
 static const struct devlink_param bnxt_dl_port_params[] = {
-       DEVLINK_PARAM_GENERIC(WOL, BIT(DEVLINK_PARAM_CMODE_PERMANENT),
-                             bnxt_dl_nvm_param_get, bnxt_dl_nvm_param_set,
-                             bnxt_dl_wol_validate),
 };
 
 int bnxt_dl_register(struct bnxt *bp)
@@ -258,6 +241,9 @@ int bnxt_dl_register(struct bnxt *bp)
                netdev_err(bp->dev, "devlink_port_params_register failed");
                goto err_dl_port_unreg;
        }
+
+       devlink_params_publish(dl);
+
        return 0;
 
 err_dl_port_unreg:
index da065ca..5b6b2c7 100644 (file)
@@ -35,7 +35,6 @@ static inline void bnxt_link_bp_to_dl(struct bnxt *bp, struct devlink *dl)
 
 #define NVM_OFF_MSIX_VEC_PER_PF_MAX    108
 #define NVM_OFF_MSIX_VEC_PER_PF_MIN    114
-#define NVM_OFF_WOL                    152
 #define NVM_OFF_IGNORE_ARI             164
 #define NVM_OFF_DIS_GRE_VER_CHECK      171
 #define NVM_OFF_ENABLE_SRIOV           401
index 5db9f41..134ae28 100644 (file)
@@ -1288,7 +1288,7 @@ static void sbdma_tx_process(struct sbmac_softc *sc, struct sbmacdma *d,
                 * for transmits, we just free buffers.
                 */
 
-               dev_kfree_skb_irq(sb);
+               dev_consume_skb_irq(sb);
 
                /*
                 * .. and advance to the next buffer.
index 2b28826..f2915f2 100644 (file)
@@ -1734,7 +1734,7 @@ static int macb_pad_and_fcs(struct sk_buff **skb, struct net_device *ndev)
                if (!nskb)
                        return -ENOMEM;
 
-               dev_kfree_skb_any(*skb);
+               dev_consume_skb_any(*skb);
                *skb = nskb;
        }
 
@@ -3673,9 +3673,9 @@ static netdev_tx_t at91ether_start_xmit(struct sk_buff *skb,
                /* Store packet information (to free when Tx completed) */
                lp->skb = skb;
                lp->skb_length = skb->len;
-               lp->skb_physaddr = dma_map_single(NULL, skb->data, skb->len,
-                                                       DMA_TO_DEVICE);
-               if (dma_mapping_error(NULL, lp->skb_physaddr)) {
+               lp->skb_physaddr = dma_map_single(&lp->pdev->dev, skb->data,
+                                                 skb->len, DMA_TO_DEVICE);
+               if (dma_mapping_error(&lp->pdev->dev, lp->skb_physaddr)) {
                        dev_kfree_skb_any(skb);
                        dev->stats.tx_dropped++;
                        netdev_err(dev, "%s: DMA mapping error\n", __func__);
@@ -3763,9 +3763,9 @@ static irqreturn_t at91ether_interrupt(int irq, void *dev_id)
                        dev->stats.tx_errors++;
 
                if (lp->skb) {
-                       dev_kfree_skb_irq(lp->skb);
+                       dev_consume_skb_irq(lp->skb);
                        lp->skb = NULL;
-                       dma_unmap_single(NULL, lp->skb_physaddr,
+                       dma_unmap_single(&lp->pdev->dev, lp->skb_physaddr,
                                         lp->skb_length, DMA_TO_DEVICE);
                        dev->stats.tx_packets++;
                        dev->stats.tx_bytes += lp->skb_length;
@@ -3943,6 +3943,7 @@ static const struct of_device_id macb_dt_ids[] = {
        { .compatible = "cdns,np4-macb", .data = &np4_config },
        { .compatible = "cdns,pc302-gem", .data = &pc302gem_config },
        { .compatible = "cdns,gem", .data = &pc302gem_config },
+       { .compatible = "cdns,sam9x60-macb", .data = &at91sam9260_config },
        { .compatible = "atmel,sama5d2-gem", .data = &sama5d2_config },
        { .compatible = "atmel,sama5d3-gem", .data = &sama5d3_config },
        { .compatible = "atmel,sama5d3-macb", .data = &sama5d3macb_config },
index 5f03199..05f4a3b 100644 (file)
@@ -54,7 +54,6 @@ config CAVIUM_PTP
        tristate "Cavium PTP coprocessor as PTP clock"
        depends on 64BIT && PCI
        imply PTP_1588_CLOCK
-       default y
        ---help---
          This driver adds support for the Precision Time Protocol Clocks and
          Timestamping coprocessor (PTP) found on Cavium processors.
index 568715a..b7b0eb1 100644 (file)
@@ -617,6 +617,7 @@ enum {                                 /* adapter flags */
        FW_OFLD_CONN       = (1 << 9),
        ROOT_NO_RELAXED_ORDERING = (1 << 10),
        SHUTTING_DOWN      = (1 << 11),
+       SGE_DBQ_TIMER      = (1 << 12),
 };
 
 enum {
@@ -756,6 +757,8 @@ struct sge_eth_txq {                /* state for an SGE Ethernet Tx queue */
 #ifdef CONFIG_CHELSIO_T4_DCB
        u8 dcb_prio;                /* DCB Priority bound to queue */
 #endif
+       u8 dbqt;                    /* SGE Doorbell Queue Timer in use */
+       unsigned int dbqtimerix;    /* SGE Doorbell Queue Timer Index */
        unsigned long tso;          /* # of TSO requests */
        unsigned long tx_cso;       /* # of Tx checksum offloads */
        unsigned long vlan_ins;     /* # of Tx VLAN insertions */
@@ -816,6 +819,8 @@ struct sge {
        u16 nqs_per_uld;            /* # of Rx queues per ULD */
        u16 timer_val[SGE_NTIMERS];
        u8 counter_val[SGE_NCOUNTERS];
+       u16 dbqtimer_tick;
+       u16 dbqtimer_val[SGE_NDBQTIMERS];
        u32 fl_pg_order;            /* large page allocation size */
        u32 stat_len;               /* length of status page at ring end */
        u32 pktshift;               /* padding between CPL & packet data */
@@ -1402,7 +1407,7 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
                     rspq_flush_handler_t flush_handler, int cong);
 int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq,
                         struct net_device *dev, struct netdev_queue *netdevq,
-                        unsigned int iqid);
+                        unsigned int iqid, u8 dbqt);
 int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq,
                          struct net_device *dev, unsigned int iqid,
                          unsigned int cmplqid);
@@ -1415,6 +1420,8 @@ irqreturn_t t4_sge_intr_msix(int irq, void *cookie);
 int t4_sge_init(struct adapter *adap);
 void t4_sge_start(struct adapter *adap);
 void t4_sge_stop(struct adapter *adap);
+int t4_sge_eth_txq_egress_update(struct adapter *adap, struct sge_eth_txq *q,
+                                int maxreclaim);
 void cxgb4_set_ethtool_ops(struct net_device *netdev);
 int cxgb4_write_rss(const struct port_info *pi, const u16 *queues);
 enum cpl_tx_tnl_lso_type cxgb_encap_offload_supported(struct sk_buff *skb);
@@ -1821,6 +1828,8 @@ int t4_ctrl_eq_free(struct adapter *adap, unsigned int mbox, unsigned int pf,
 int t4_ofld_eq_free(struct adapter *adap, unsigned int mbox, unsigned int pf,
                    unsigned int vf, unsigned int eqid);
 int t4_sge_ctxt_flush(struct adapter *adap, unsigned int mbox, int ctxt_type);
+int t4_read_sge_dbqtimers(struct adapter *adap, unsigned int ndbqtimers,
+                         u16 *dbqtimers);
 void t4_handle_get_port_info(struct port_info *pi, const __be64 *rpl);
 int t4_update_port_info(struct port_info *pi);
 int t4_get_link_params(struct port_info *pi, unsigned int *link_okp,
index 7960435..65b8dc7 100644 (file)
@@ -932,11 +932,190 @@ static int get_adaptive_rx_setting(struct net_device *dev)
        return q->rspq.adaptive_rx;
 }
 
-static int set_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
+/* Return the current global Adapter SGE Doorbell Queue Timer Tick for all
+ * Ethernet TX Queues.
+ */
+static int get_dbqtimer_tick(struct net_device *dev)
+{
+       struct port_info *pi = netdev_priv(dev);
+       struct adapter *adap = pi->adapter;
+
+       if (!(adap->flags & SGE_DBQ_TIMER))
+               return 0;
+
+       return adap->sge.dbqtimer_tick;
+}
+
+/* Return the SGE Doorbell Queue Timer Value for the Ethernet TX Queues
+ * associated with a Network Device.
+ */
+static int get_dbqtimer(struct net_device *dev)
+{
+       struct port_info *pi = netdev_priv(dev);
+       struct adapter *adap = pi->adapter;
+       struct sge_eth_txq *txq;
+
+       txq = &adap->sge.ethtxq[pi->first_qset];
+
+       if (!(adap->flags & SGE_DBQ_TIMER))
+               return 0;
+
+       /* all of the TX Queues use the same Timer Index */
+       return adap->sge.dbqtimer_val[txq->dbqtimerix];
+}
+
+/* Set the global Adapter SGE Doorbell Queue Timer Tick for all Ethernet TX
+ * Queues.  This is the fundamental "Tick" that sets the scale of values which
+ * can be used.  Individual Ethernet TX Queues index into a relatively small
+ * array of Tick Multipliers.  Changing the base Tick will thus change all of
+ * the resulting Timer Values associated with those multipliers for all
+ * Ethernet TX Queues.
+ */
+static int set_dbqtimer_tick(struct net_device *dev, int usecs)
+{
+       struct port_info *pi = netdev_priv(dev);
+       struct adapter *adap = pi->adapter;
+       struct sge *s = &adap->sge;
+       u32 param, val;
+       int ret;
+
+       if (!(adap->flags & SGE_DBQ_TIMER))
+               return 0;
+
+       /* return early if it's the same Timer Tick we're already using */
+       if (s->dbqtimer_tick == usecs)
+               return 0;
+
+       /* attempt to set the new Timer Tick value */
+       param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
+                FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_DBQ_TIMERTICK));
+       val = usecs;
+       ret = t4_set_params(adap, adap->mbox, adap->pf, 0, 1, &param, &val);
+       if (ret)
+               return ret;
+       s->dbqtimer_tick = usecs;
+
+       /* if successful, reread resulting dependent Timer values */
+       ret = t4_read_sge_dbqtimers(adap, ARRAY_SIZE(s->dbqtimer_val),
+                                   s->dbqtimer_val);
+       return ret;
+}
+
+/* Set the SGE Doorbell Queue Timer Value for the Ethernet TX Queues
+ * associated with a Network Device.  There is a relatively small array of
+ * possible Timer Values so we need to pick the closest value available.
+ */
+static int set_dbqtimer(struct net_device *dev, int usecs)
+{
+       int qix, timerix, min_timerix, delta, min_delta;
+       struct port_info *pi = netdev_priv(dev);
+       struct adapter *adap = pi->adapter;
+       struct sge *s = &adap->sge;
+       struct sge_eth_txq *txq;
+       u32 param, val;
+       int ret;
+
+       if (!(adap->flags & SGE_DBQ_TIMER))
+               return 0;
+
+       /* Find the SGE Doorbell Timer Value that's closest to the requested
+        * value.
+        */
+       min_delta = INT_MAX;
+       min_timerix = 0;
+       for (timerix = 0; timerix < ARRAY_SIZE(s->dbqtimer_val); timerix++) {
+               delta = s->dbqtimer_val[timerix] - usecs;
+               if (delta < 0)
+                       delta = -delta;
+               if (delta < min_delta) {
+                       min_delta = delta;
+                       min_timerix = timerix;
+               }
+       }
+
+       /* Return early if it's the same Timer Index we're already using.
+        * We use the same Timer Index for all of the TX Queues for an
+        * interface so it's only necessary to check the first one.
+        */
+       txq = &s->ethtxq[pi->first_qset];
+       if (txq->dbqtimerix == min_timerix)
+               return 0;
+
+       for (qix = 0; qix < pi->nqsets; qix++, txq++) {
+               if (adap->flags & FULL_INIT_DONE) {
+                       param =
+                        (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) |
+                         FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DMAQ_EQ_TIMERIX) |
+                         FW_PARAMS_PARAM_YZ_V(txq->q.cntxt_id));
+                       val = min_timerix;
+                       ret = t4_set_params(adap, adap->mbox, adap->pf, 0,
+                                           1, &param, &val);
+                       if (ret)
+                               return ret;
+               }
+               txq->dbqtimerix = min_timerix;
+       }
+       return 0;
+}
+
+/* Set the global Adapter SGE Doorbell Queue Timer Tick for all Ethernet TX
+ * Queues and the Timer Value for the Ethernet TX Queues associated with a
+ * Network Device.  Since changing the global Tick changes all of the
+ * available Timer Values, we need to do this first before selecting the
+ * resulting closest Timer Value.  Moreover, since the Tick is global,
+ * changing it affects the Timer Values for all Network Devices on the
+ * adapter.  So, before changing the Tick, we grab all of the current Timer
+ * Values for other Network Devices on this Adapter and then attempt to select
+ * new Timer Values which are close to the old values ...
+ */
+static int set_dbqtimer_tickval(struct net_device *dev,
+                               int tick_usecs, int timer_usecs)
+{
+       struct port_info *pi = netdev_priv(dev);
+       struct adapter *adap = pi->adapter;
+       int timer[MAX_NPORTS];
+       unsigned int port;
+       int ret;
+
+       /* Grab the other adapter Network Interface current timers and fill in
+        * the new one for this Network Interface.
+        */
+       for_each_port(adap, port)
+               if (port == pi->port_id)
+                       timer[port] = timer_usecs;
+               else
+                       timer[port] = get_dbqtimer(adap->port[port]);
+
+       /* Change the global Tick first ... */
+       ret = set_dbqtimer_tick(dev, tick_usecs);
+       if (ret)
+               return ret;
+
+       /* ... and then set all of the Network Interface Timer Values ... */
+       for_each_port(adap, port) {
+               ret = set_dbqtimer(adap->port[port], timer[port]);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
+static int set_coalesce(struct net_device *dev,
+                       struct ethtool_coalesce *coalesce)
 {
-       set_adaptive_rx_setting(dev, c->use_adaptive_rx_coalesce);
-       return set_rx_intr_params(dev, c->rx_coalesce_usecs,
-                                 c->rx_max_coalesced_frames);
+       int ret;
+
+       set_adaptive_rx_setting(dev, coalesce->use_adaptive_rx_coalesce);
+
+       ret = set_rx_intr_params(dev, coalesce->rx_coalesce_usecs,
+                                coalesce->rx_max_coalesced_frames);
+       if (ret)
+               return ret;
+
+       return set_dbqtimer_tickval(dev,
+                                   coalesce->tx_coalesce_usecs_irq,
+                                   coalesce->tx_coalesce_usecs);
 }
 
 static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
@@ -949,6 +1128,8 @@ static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
        c->rx_max_coalesced_frames = (rq->intr_params & QINTR_CNT_EN_F) ?
                adap->sge.counter_val[rq->pktcnt_idx] : 0;
        c->use_adaptive_rx_coalesce = get_adaptive_rx_setting(dev);
+       c->tx_coalesce_usecs_irq = get_dbqtimer_tick(dev);
+       c->tx_coalesce_usecs = get_dbqtimer(dev);
        return 0;
 }
 
index adf75d1..bcbac24 100644 (file)
@@ -575,7 +575,7 @@ static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp,
                        struct sge_eth_txq *eq;
 
                        eq = container_of(txq, struct sge_eth_txq, q);
-                       netif_tx_wake_queue(eq->txq);
+                       t4_sge_eth_txq_egress_update(q->adap, eq, -1);
                } else {
                        struct sge_uld_txq *oq;
 
@@ -933,10 +933,13 @@ static int setup_sge_queues(struct adapter *adap)
                        q->rspq.idx = j;
                        memset(&q->stats, 0, sizeof(q->stats));
                }
-               for (j = 0; j < pi->nqsets; j++, t++) {
+
+               q = &s->ethrxq[pi->first_qset];
+               for (j = 0; j < pi->nqsets; j++, t++, q++) {
                        err = t4_sge_alloc_eth_txq(adap, t, dev,
                                        netdev_get_tx_queue(dev, j),
-                                       s->fw_evtq.cntxt_id);
+                                       q->rspq.cntxt_id,
+                                       !!(adap->flags & SGE_DBQ_TIMER));
                        if (err)
                                goto freeout;
                }
@@ -958,7 +961,7 @@ static int setup_sge_queues(struct adapter *adap)
        if (!is_t4(adap->params.chip)) {
                err = t4_sge_alloc_eth_txq(adap, &s->ptptxq, adap->port[0],
                                           netdev_get_tx_queue(adap->port[0], 0)
-                                          , s->fw_evtq.cntxt_id);
+                                          , s->fw_evtq.cntxt_id, false);
                if (err)
                        goto freeout;
        }
@@ -4325,6 +4328,24 @@ static int adap_init0(struct adapter *adap)
        if (ret < 0)
                goto bye;
 
+       /* Grab the SGE Doorbell Queue Timer values.  If successful, that
+        * indicates that the Firmware and Hardware support this.
+        */
+       params[0] = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
+                   FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_DBQ_TIMERTICK));
+       ret = t4_query_params(adap, adap->mbox, adap->pf, 0,
+                             1, params, val);
+
+       if (!ret) {
+               adap->sge.dbqtimer_tick = val[0];
+               ret = t4_read_sge_dbqtimers(adap,
+                                           ARRAY_SIZE(adap->sge.dbqtimer_val),
+                                           adap->sge.dbqtimer_val);
+       }
+
+       if (!ret)
+               adap->flags |= SGE_DBQ_TIMER;
+
        if (is_bypass_device(adap->pdev->device))
                adap->params.bypass = 1;
 
index fc0bc64..f18493f 100644 (file)
  * Max number of Tx descriptors we clean up at a time.  Should be modest as
  * freeing skbs isn't cheap and it happens while holding locks.  We just need
  * to free packets faster than they arrive, we eventually catch up and keep
- * the amortized cost reasonable.  Must be >= 2 * TXQ_STOP_THRES.
+ * the amortized cost reasonable.  Must be >= 2 * TXQ_STOP_THRES.  It should
+ * also match the CIDX Flush Threshold.
  */
-#define MAX_TX_RECLAIM 16
+#define MAX_TX_RECLAIM 32
 
 /*
  * Max number of Rx buffers we replenish at a time.  Again keep this modest,
@@ -401,31 +402,52 @@ static inline int reclaimable(const struct sge_txq *q)
 }
 
 /**
- *     cxgb4_reclaim_completed_tx - reclaims completed Tx descriptors
+ *     reclaim_completed_tx - reclaims completed TX Descriptors
  *     @adap: the adapter
  *     @q: the Tx queue to reclaim completed descriptors from
+ *     @maxreclaim: the maximum number of TX Descriptors to reclaim or -1
  *     @unmap: whether the buffers should be unmapped for DMA
  *
- *     Reclaims Tx descriptors that the SGE has indicated it has processed,
- *     and frees the associated buffers if possible.  Called with the Tx
- *     queue locked.
+ *     Reclaims Tx Descriptors that the SGE has indicated it has processed,
+ *     and frees the associated buffers if possible.  If @max == -1, then
+ *     we'll use a defaiult maximum.  Called with the TX Queue locked.
  */
-inline void cxgb4_reclaim_completed_tx(struct adapter *adap, struct sge_txq *q,
-                                       bool unmap)
+static inline int reclaim_completed_tx(struct adapter *adap, struct sge_txq *q,
+                                      int maxreclaim, bool unmap)
 {
-       int avail = reclaimable(q);
+       int reclaim = reclaimable(q);
 
-       if (avail) {
+       if (reclaim) {
                /*
                 * Limit the amount of clean up work we do at a time to keep
                 * the Tx lock hold time O(1).
                 */
-               if (avail > MAX_TX_RECLAIM)
-                       avail = MAX_TX_RECLAIM;
+               if (maxreclaim < 0)
+                       maxreclaim = MAX_TX_RECLAIM;
+               if (reclaim > maxreclaim)
+                       reclaim = maxreclaim;
 
-               free_tx_desc(adap, q, avail, unmap);
-               q->in_use -= avail;
+               free_tx_desc(adap, q, reclaim, unmap);
+               q->in_use -= reclaim;
        }
+
+       return reclaim;
+}
+
+/**
+ *     cxgb4_reclaim_completed_tx - reclaims completed Tx descriptors
+ *     @adap: the adapter
+ *     @q: the Tx queue to reclaim completed descriptors from
+ *     @unmap: whether the buffers should be unmapped for DMA
+ *
+ *     Reclaims Tx descriptors that the SGE has indicated it has processed,
+ *     and frees the associated buffers if possible.  Called with the Tx
+ *     queue locked.
+ */
+void cxgb4_reclaim_completed_tx(struct adapter *adap, struct sge_txq *q,
+                               bool unmap)
+{
+       (void)reclaim_completed_tx(adap, q, -1, unmap);
 }
 EXPORT_SYMBOL(cxgb4_reclaim_completed_tx);
 
@@ -1288,6 +1310,44 @@ static inline void t6_fill_tnl_lso(struct sk_buff *skb,
 }
 
 /**
+ *     t4_sge_eth_txq_egress_update - handle Ethernet TX Queue update
+ *     @adap: the adapter
+ *     @eq: the Ethernet TX Queue
+ *     @maxreclaim: the maximum number of TX Descriptors to reclaim or -1
+ *
+ *     We're typically called here to update the state of an Ethernet TX
+ *     Queue with respect to the hardware's progress in consuming the TX
+ *     Work Requests that we've put on that Egress Queue.  This happens
+ *     when we get Egress Queue Update messages and also prophylactically
+ *     in regular timer-based Ethernet TX Queue maintenance.
+ */
+int t4_sge_eth_txq_egress_update(struct adapter *adap, struct sge_eth_txq *eq,
+                                int maxreclaim)
+{
+       struct sge_txq *q = &eq->q;
+       unsigned int reclaimed;
+
+       if (!q->in_use || !__netif_tx_trylock(eq->txq))
+               return 0;
+
+       /* Reclaim pending completed TX Descriptors. */
+       reclaimed = reclaim_completed_tx(adap, &eq->q, maxreclaim, true);
+
+       /* If the TX Queue is currently stopped and there's now more than half
+        * the queue available, restart it.  Otherwise bail out since the rest
+        * of what we want do here is with the possibility of shipping any
+        * currently buffered Coalesced TX Work Request.
+        */
+       if (netif_tx_queue_stopped(eq->txq) && txq_avail(q) > (q->size / 2)) {
+               netif_tx_wake_queue(eq->txq);
+               eq->q.restarts++;
+       }
+
+       __netif_tx_unlock(eq->txq);
+       return reclaimed;
+}
+
+/**
  *     cxgb4_eth_xmit - add a packet to an Ethernet Tx queue
  *     @skb: the packet
  *     @dev: the egress net device
@@ -1357,7 +1417,7 @@ out_free: dev_kfree_skb_any(skb);
        }
        skb_tx_timestamp(skb);
 
-       cxgb4_reclaim_completed_tx(adap, &q->q, true);
+       reclaim_completed_tx(adap, &q->q, -1, true);
        cntrl = TXPKT_L4CSUM_DIS_F | TXPKT_IPCSUM_DIS_F;
 
 #ifdef CONFIG_CHELSIO_T4_FCOE
@@ -1400,8 +1460,25 @@ out_free:        dev_kfree_skb_any(skb);
 
        wr_mid = FW_WR_LEN16_V(DIV_ROUND_UP(flits, 2));
        if (unlikely(credits < ETHTXQ_STOP_THRES)) {
+               /* After we're done injecting the Work Request for this
+                * packet, we'll be below our "stop threshold" so stop the TX
+                * Queue now and schedule a request for an SGE Egress Queue
+                * Update message. The queue will get started later on when
+                * the firmware processes this Work Request and sends us an
+                * Egress Queue Status Update message indicating that space
+                * has opened up.
+                */
                eth_txq_stop(q);
-               wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
+
+               /* If we're using the SGE Doorbell Queue Timer facility, we
+                * don't need to ask the Firmware to send us Egress Queue CIDX
+                * Updates: the Hardware will do this automatically.  And
+                * since we send the Ingress Queue CIDX Updates to the
+                * corresponding Ethernet Response Queue, we'll get them very
+                * quickly.
+                */
+               if (!q->dbqt)
+                       wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
        }
 
        wr = (void *)&q->q.desc[q->q.pidx];
@@ -1671,7 +1748,7 @@ static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb,
        /* Take this opportunity to reclaim any TX Descriptors whose DMA
         * transfers have completed.
         */
-       cxgb4_reclaim_completed_tx(adapter, &txq->q, true);
+       reclaim_completed_tx(adapter, &txq->q, -1, true);
 
        /* Calculate the number of flits and TX Descriptors we're going to
         * need along with how many TX Descriptors will be left over after
@@ -1715,7 +1792,16 @@ static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb,
                 * has opened up.
                 */
                eth_txq_stop(txq);
-               wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
+
+               /* If we're using the SGE Doorbell Queue Timer facility, we
+                * don't need to ask the Firmware to send us Egress Queue CIDX
+                * Updates: the Hardware will do this automatically.  And
+                * since we send the Ingress Queue CIDX Updates to the
+                * corresponding Ethernet Response Queue, we'll get them very
+                * quickly.
+                */
+               if (!txq->dbqt)
+                       wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
        }
 
        /* Start filling in our Work Request.  Note that we do _not_ handle
@@ -2794,6 +2880,74 @@ static int t4_tx_hststamp(struct adapter *adapter, struct sk_buff *skb,
 }
 
 /**
+ *     t4_tx_completion_handler - handle CPL_SGE_EGR_UPDATE messages
+ *     @rspq: Ethernet RX Response Queue associated with Ethernet TX Queue
+ *     @rsp: Response Entry pointer into Response Queue
+ *     @gl: Gather List pointer
+ *
+ *     For adapters which support the SGE Doorbell Queue Timer facility,
+ *     we configure the Ethernet TX Queues to send CIDX Updates to the
+ *     Associated Ethernet RX Response Queue with CPL_SGE_EGR_UPDATE
+ *     messages.  This adds a small load to PCIe Link RX bandwidth and,
+ *     potentially, higher CPU Interrupt load, but allows us to respond
+ *     much more quickly to the CIDX Updates.  This is important for
+ *     Upper Layer Software which isn't willing to have a large amount
+ *     of TX Data outstanding before receiving DMA Completions.
+ */
+static void t4_tx_completion_handler(struct sge_rspq *rspq,
+                                    const __be64 *rsp,
+                                    const struct pkt_gl *gl)
+{
+       u8 opcode = ((const struct rss_header *)rsp)->opcode;
+       struct port_info *pi = netdev_priv(rspq->netdev);
+       struct adapter *adapter = rspq->adap;
+       struct sge *s = &adapter->sge;
+       struct sge_eth_txq *txq;
+
+       /* skip RSS header */
+       rsp++;
+
+       /* FW can send EGR_UPDATEs encapsulated in a CPL_FW4_MSG.
+        */
+       if (unlikely(opcode == CPL_FW4_MSG &&
+                    ((const struct cpl_fw4_msg *)rsp)->type ==
+                                                       FW_TYPE_RSSCPL)) {
+               rsp++;
+               opcode = ((const struct rss_header *)rsp)->opcode;
+               rsp++;
+       }
+
+       if (unlikely(opcode != CPL_SGE_EGR_UPDATE)) {
+               pr_info("%s: unexpected FW4/CPL %#x on Rx queue\n",
+                       __func__, opcode);
+               return;
+       }
+
+       txq = &s->ethtxq[pi->first_qset + rspq->idx];
+
+       /* We've got the Hardware Consumer Index Update in the Egress Update
+        * message.  If we're using the SGE Doorbell Queue Timer mechanism,
+        * these Egress Update messages will be our sole CIDX Updates we get
+        * since we don't want to chew up PCIe bandwidth for both Ingress
+        * Messages and Status Page writes.  However, The code which manages
+        * reclaiming successfully DMA'ed TX Work Requests uses the CIDX value
+        * stored in the Status Page at the end of the TX Queue.  It's easiest
+        * to simply copy the CIDX Update value from the Egress Update message
+        * to the Status Page.  Also note that no Endian issues need to be
+        * considered here since both are Big Endian and we're just copying
+        * bytes consistently ...
+        */
+       if (txq->dbqt) {
+               struct cpl_sge_egr_update *egr;
+
+               egr = (struct cpl_sge_egr_update *)rsp;
+               WRITE_ONCE(txq->q.stat->cidx, egr->cidx);
+       }
+
+       t4_sge_eth_txq_egress_update(adapter, txq, -1);
+}
+
+/**
  *     t4_ethrx_handler - process an ingress ethernet packet
  *     @q: the response queue that received the packet
  *     @rsp: the response queue descriptor holding the RX_PKT message
@@ -2816,6 +2970,15 @@ int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp,
        struct port_info *pi;
        int ret = 0;
 
+       /* If we're looking at TX Queue CIDX Update, handle that separately
+        * and return.
+        */
+       if (unlikely((*(u8 *)rsp == CPL_FW4_MSG) ||
+                    (*(u8 *)rsp == CPL_SGE_EGR_UPDATE))) {
+               t4_tx_completion_handler(q, rsp, si);
+               return 0;
+       }
+
        if (unlikely(*(u8 *)rsp == cpl_trace_pkt))
                return handle_trace_pkt(q->adap, si);
 
@@ -3289,10 +3452,10 @@ done:
 
 static void sge_tx_timer_cb(struct timer_list *t)
 {
-       unsigned long m;
-       unsigned int i, budget;
        struct adapter *adap = from_timer(adap, t, sge.tx_timer);
        struct sge *s = &adap->sge;
+       unsigned long m, period;
+       unsigned int i, budget;
 
        for (i = 0; i < BITS_TO_LONGS(s->egr_sz); i++)
                for (m = s->txq_maperr[i]; m; m &= m - 1) {
@@ -3320,29 +3483,29 @@ static void sge_tx_timer_cb(struct timer_list *t)
        budget = MAX_TIMER_TX_RECLAIM;
        i = s->ethtxq_rover;
        do {
-               struct sge_eth_txq *q = &s->ethtxq[i];
-
-               if (q->q.in_use &&
-                   time_after_eq(jiffies, q->txq->trans_start + HZ / 100) &&
-                   __netif_tx_trylock(q->txq)) {
-                       int avail = reclaimable(&q->q);
-
-                       if (avail) {
-                               if (avail > budget)
-                                       avail = budget;
-
-                               free_tx_desc(adap, &q->q, avail, true);
-                               q->q.in_use -= avail;
-                               budget -= avail;
-                       }
-                       __netif_tx_unlock(q->txq);
-               }
+               budget -= t4_sge_eth_txq_egress_update(adap, &s->ethtxq[i],
+                                                      budget);
+               if (!budget)
+                       break;
 
                if (++i >= s->ethqsets)
                        i = 0;
-       } while (budget && i != s->ethtxq_rover);
+       } while (i != s->ethtxq_rover);
        s->ethtxq_rover = i;
-       mod_timer(&s->tx_timer, jiffies + (budget ? TX_QCHECK_PERIOD : 2));
+
+       if (budget == 0) {
+               /* If we found too many reclaimable packets schedule a timer
+                * in the near future to continue where we left off.
+                */
+               period = 2;
+       } else {
+               /* We reclaimed all reclaimable TX Descriptors, so reschedule
+                * at the normal period.
+                */
+               period = TX_QCHECK_PERIOD;
+       }
+
+       mod_timer(&s->tx_timer, jiffies + period);
 }
 
 /**
@@ -3421,7 +3584,8 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
                                                        :  FW_IQ_IQTYPE_OFLD));
 
        if (fl) {
-               enum chip_type chip = CHELSIO_CHIP_VERSION(adap->params.chip);
+               unsigned int chip_ver =
+                       CHELSIO_CHIP_VERSION(adap->params.chip);
 
                /* Allocate the ring for the hardware free list (with space
                 * for its status page) along with the associated software
@@ -3459,10 +3623,10 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
                 * the smaller 64-byte value there).
                 */
                c.fl0dcaen_to_fl0cidxfthresh =
-                       htons(FW_IQ_CMD_FL0FBMIN_V(chip <= CHELSIO_T5 ?
+                       htons(FW_IQ_CMD_FL0FBMIN_V(chip_ver <= CHELSIO_T5 ?
                                                   FETCHBURSTMIN_128B_X :
-                                                  FETCHBURSTMIN_64B_X) |
-                             FW_IQ_CMD_FL0FBMAX_V((chip <= CHELSIO_T5) ?
+                                                  FETCHBURSTMIN_64B_T6_X) |
+                             FW_IQ_CMD_FL0FBMAX_V((chip_ver <= CHELSIO_T5) ?
                                                   FETCHBURSTMAX_512B_X :
                                                   FETCHBURSTMAX_256B_X));
                c.fl0size = htons(flsz);
@@ -3584,14 +3748,24 @@ static void init_txq(struct adapter *adap, struct sge_txq *q, unsigned int id)
        adap->sge.egr_map[id - adap->sge.egr_start] = q;
 }
 
+/**
+ *     t4_sge_alloc_eth_txq - allocate an Ethernet TX Queue
+ *     @adap: the adapter
+ *     @txq: the SGE Ethernet TX Queue to initialize
+ *     @dev: the Linux Network Device
+ *     @netdevq: the corresponding Linux TX Queue
+ *     @iqid: the Ingress Queue to which to deliver CIDX Update messages
+ *     @dbqt: whether this TX Queue will use the SGE Doorbell Queue Timers
+ */
 int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq,
                         struct net_device *dev, struct netdev_queue *netdevq,
-                        unsigned int iqid)
+                        unsigned int iqid, u8 dbqt)
 {
-       int ret, nentries;
-       struct fw_eq_eth_cmd c;
-       struct sge *s = &adap->sge;
+       unsigned int chip_ver = CHELSIO_CHIP_VERSION(adap->params.chip);
        struct port_info *pi = netdev_priv(dev);
+       struct sge *s = &adap->sge;
+       struct fw_eq_eth_cmd c;
+       int ret, nentries;
 
        /* Add status entries */
        nentries = txq->q.size + s->stat_len / sizeof(struct tx_desc);
@@ -3610,19 +3784,47 @@ int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq,
                            FW_EQ_ETH_CMD_VFN_V(0));
        c.alloc_to_len16 = htonl(FW_EQ_ETH_CMD_ALLOC_F |
                                 FW_EQ_ETH_CMD_EQSTART_F | FW_LEN16(c));
-       c.viid_pkd = htonl(FW_EQ_ETH_CMD_AUTOEQUEQE_F |
-                          FW_EQ_ETH_CMD_VIID_V(pi->viid));
+
+       /* For TX Ethernet Queues using the SGE Doorbell Queue Timer
+        * mechanism, we use Ingress Queue messages for Hardware Consumer
+        * Index Updates on the TX Queue.  Otherwise we have the Hardware
+        * write the CIDX Updates into the Status Page at the end of the
+        * TX Queue.
+        */
+       c.autoequiqe_to_viid = htonl((dbqt
+                                     ? FW_EQ_ETH_CMD_AUTOEQUIQE_F
+                                     : FW_EQ_ETH_CMD_AUTOEQUEQE_F) |
+                                    FW_EQ_ETH_CMD_VIID_V(pi->viid));
+
        c.fetchszm_to_iqid =
-               htonl(FW_EQ_ETH_CMD_HOSTFCMODE_V(HOSTFCMODE_STATUS_PAGE_X) |
+               htonl(FW_EQ_ETH_CMD_HOSTFCMODE_V(dbqt
+                                                ? HOSTFCMODE_INGRESS_QUEUE_X
+                                                : HOSTFCMODE_STATUS_PAGE_X) |
                      FW_EQ_ETH_CMD_PCIECHN_V(pi->tx_chan) |
                      FW_EQ_ETH_CMD_FETCHRO_F | FW_EQ_ETH_CMD_IQID_V(iqid));
+
+       /* Note that the CIDX Flush Threshold should match MAX_TX_RECLAIM. */
        c.dcaen_to_eqsize =
-               htonl(FW_EQ_ETH_CMD_FBMIN_V(FETCHBURSTMIN_64B_X) |
+               htonl(FW_EQ_ETH_CMD_FBMIN_V(chip_ver <= CHELSIO_T5
+                                           ? FETCHBURSTMIN_64B_X
+                                           : FETCHBURSTMIN_64B_T6_X) |
                      FW_EQ_ETH_CMD_FBMAX_V(FETCHBURSTMAX_512B_X) |
                      FW_EQ_ETH_CMD_CIDXFTHRESH_V(CIDXFLUSHTHRESH_32_X) |
                      FW_EQ_ETH_CMD_EQSIZE_V(nentries));
+
        c.eqaddr = cpu_to_be64(txq->q.phys_addr);
 
+       /* If we're using the SGE Doorbell Queue Timer mechanism, pass in the
+        * currently configured Timer Index.  THis can be changed later via an
+        * ethtool -C tx-usecs {Timer Val} command.  Note that the SGE
+        * Doorbell Queue mode is currently automatically enabled in the
+        * Firmware by setting either AUTOEQUEQE or AUTOEQUIQE ...
+        */
+       if (dbqt)
+               c.timeren_timerix =
+                       cpu_to_be32(FW_EQ_ETH_CMD_TIMEREN_F |
+                                   FW_EQ_ETH_CMD_TIMERIX_V(txq->dbqtimerix));
+
        ret = t4_wr_mbox(adap, adap->mbox, &c, sizeof(c), &c);
        if (ret) {
                kfree(txq->q.sdesc);
@@ -3639,6 +3841,8 @@ int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq,
        txq->txq = netdevq;
        txq->tso = txq->tx_cso = txq->vlan_ins = 0;
        txq->mapping_err = 0;
+       txq->dbqt = dbqt;
+
        return 0;
 }
 
@@ -3646,10 +3850,11 @@ int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq,
                          struct net_device *dev, unsigned int iqid,
                          unsigned int cmplqid)
 {
-       int ret, nentries;
-       struct fw_eq_ctrl_cmd c;
-       struct sge *s = &adap->sge;
+       unsigned int chip_ver = CHELSIO_CHIP_VERSION(adap->params.chip);
        struct port_info *pi = netdev_priv(dev);
+       struct sge *s = &adap->sge;
+       struct fw_eq_ctrl_cmd c;
+       int ret, nentries;
 
        /* Add status entries */
        nentries = txq->q.size + s->stat_len / sizeof(struct tx_desc);
@@ -3673,7 +3878,9 @@ int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq,
                      FW_EQ_CTRL_CMD_PCIECHN_V(pi->tx_chan) |
                      FW_EQ_CTRL_CMD_FETCHRO_F | FW_EQ_CTRL_CMD_IQID_V(iqid));
        c.dcaen_to_eqsize =
-               htonl(FW_EQ_CTRL_CMD_FBMIN_V(FETCHBURSTMIN_64B_X) |
+               htonl(FW_EQ_CTRL_CMD_FBMIN_V(chip_ver <= CHELSIO_T5
+                                            ? FETCHBURSTMIN_64B_X
+                                            : FETCHBURSTMIN_64B_T6_X) |
                      FW_EQ_CTRL_CMD_FBMAX_V(FETCHBURSTMAX_512B_X) |
                      FW_EQ_CTRL_CMD_CIDXFTHRESH_V(CIDXFLUSHTHRESH_32_X) |
                      FW_EQ_CTRL_CMD_EQSIZE_V(nentries));
@@ -3713,6 +3920,7 @@ int t4_sge_alloc_uld_txq(struct adapter *adap, struct sge_uld_txq *txq,
                         struct net_device *dev, unsigned int iqid,
                         unsigned int uld_type)
 {
+       unsigned int chip_ver = CHELSIO_CHIP_VERSION(adap->params.chip);
        int ret, nentries;
        struct fw_eq_ofld_cmd c;
        struct sge *s = &adap->sge;
@@ -3743,7 +3951,9 @@ int t4_sge_alloc_uld_txq(struct adapter *adap, struct sge_uld_txq *txq,
                      FW_EQ_OFLD_CMD_PCIECHN_V(pi->tx_chan) |
                      FW_EQ_OFLD_CMD_FETCHRO_F | FW_EQ_OFLD_CMD_IQID_V(iqid));
        c.dcaen_to_eqsize =
-               htonl(FW_EQ_OFLD_CMD_FBMIN_V(FETCHBURSTMIN_64B_X) |
+               htonl(FW_EQ_OFLD_CMD_FBMIN_V(chip_ver <= CHELSIO_T5
+                                            ? FETCHBURSTMIN_64B_X
+                                            : FETCHBURSTMIN_64B_T6_X) |
                      FW_EQ_OFLD_CMD_FBMAX_V(FETCHBURSTMAX_512B_X) |
                      FW_EQ_OFLD_CMD_CIDXFTHRESH_V(CIDXFLUSHTHRESH_32_X) |
                      FW_EQ_OFLD_CMD_EQSIZE_V(nentries));
index c5e5466..27af347 100644 (file)
@@ -6713,6 +6713,47 @@ int t4_sge_ctxt_flush(struct adapter *adap, unsigned int mbox, int ctxt_type)
 }
 
 /**
+ *     t4_read_sge_dbqtimers - reag SGE Doorbell Queue Timer values
+ *     @adap - the adapter
+ *     @ndbqtimers: size of the provided SGE Doorbell Queue Timer table
+ *     @dbqtimers: SGE Doorbell Queue Timer table
+ *
+ *     Reads the SGE Doorbell Queue Timer values into the provided table.
+ *     Returns 0 on success (Firmware and Hardware support this feature),
+ *     an error on failure.
+ */
+int t4_read_sge_dbqtimers(struct adapter *adap, unsigned int ndbqtimers,
+                         u16 *dbqtimers)
+{
+       int ret, dbqtimerix;
+
+       ret = 0;
+       dbqtimerix = 0;
+       while (dbqtimerix < ndbqtimers) {
+               int nparams, param;
+               u32 params[7], vals[7];
+
+               nparams = ndbqtimers - dbqtimerix;
+               if (nparams > ARRAY_SIZE(params))
+                       nparams = ARRAY_SIZE(params);
+
+               for (param = 0; param < nparams; param++)
+                       params[param] =
+                         (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
+                          FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_DBQ_TIMER) |
+                          FW_PARAMS_PARAM_Y_V(dbqtimerix + param));
+               ret = t4_query_params(adap, adap->mbox, adap->pf, 0,
+                                     nparams, params, vals);
+               if (ret)
+                       break;
+
+               for (param = 0; param < nparams; param++)
+                       dbqtimers[dbqtimerix++] = vals[param];
+       }
+       return ret;
+}
+
+/**
  *      t4_fw_hello - establish communication with FW
  *      @adap: the adapter
  *      @mbox: mailbox to use for the FW command
index 361d503..002fc62 100644 (file)
@@ -91,6 +91,7 @@ enum {
        SGE_CTXT_SIZE = 24,       /* size of SGE context */
        SGE_NTIMERS = 6,          /* # of interrupt holdoff timer values */
        SGE_NCOUNTERS = 4,        /* # of interrupt packet counter values */
+       SGE_NDBQTIMERS = 8,       /* # of Doorbell Queue Timer values */
        SGE_MAX_IQ_SIZE = 65520,
 
        SGE_TIMER_RSTRT_CNTR = 6, /* restart RX packet threshold counter */
index f6558cb..eb1aa82 100644 (file)
 #define FETCHBURSTMIN_64B_X            2
 #define FETCHBURSTMIN_128B_X           3
 
+/* T6 and later use a single-bit encoding for FetchBurstMin */
+#define FETCHBURSTMIN_64B_T6_X         0
+#define FETCHBURSTMIN_128B_T6_X                1
+
 #define FETCHBURSTMAX_256B_X           2
 #define FETCHBURSTMAX_512B_X           3
 
+#define HOSTFCMODE_INGRESS_QUEUE_X     1
 #define HOSTFCMODE_STATUS_PAGE_X       2
 
 #define CIDXFLUSHTHRESH_32_X           5
+#define CIDXFLUSHTHRESH_128_X          7
 
 #define UPDATEDELIVERY_INTERRUPT_X     1
 
index 1d9b3e1..631f166 100644 (file)
@@ -1254,6 +1254,8 @@ enum fw_params_param_dev {
        FW_PARAMS_PARAM_DEV_RDMA_WRITE_WITH_IMM = 0x21,
        FW_PARAMS_PARAM_DEV_RI_WRITE_CMPL_WR    = 0x24,
        FW_PARAMS_PARAM_DEV_OPAQUE_VIID_SMT_EXTN = 0x27,
+       FW_PARAMS_PARAM_DEV_DBQ_TIMER   = 0x29,
+       FW_PARAMS_PARAM_DEV_DBQ_TIMERTICK = 0x2A,
 };
 
 /*
@@ -1322,6 +1324,7 @@ enum fw_params_param_dmaq {
        FW_PARAMS_PARAM_DMAQ_EQ_CMPLIQID_CTRL = 0x11,
        FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH = 0x12,
        FW_PARAMS_PARAM_DMAQ_EQ_DCBPRIO_ETH = 0x13,
+       FW_PARAMS_PARAM_DMAQ_EQ_TIMERIX = 0x15,
        FW_PARAMS_PARAM_DMAQ_CONM_CTXT = 0x20,
 };
 
@@ -1751,8 +1754,8 @@ struct fw_eq_eth_cmd {
        __be32 fetchszm_to_iqid;
        __be32 dcaen_to_eqsize;
        __be64 eqaddr;
-       __be32 viid_pkd;
-       __be32 r8_lo;
+       __be32 autoequiqe_to_viid;
+       __be32 timeren_timerix;
        __be64 r9;
 };
 
@@ -1847,6 +1850,10 @@ struct fw_eq_eth_cmd {
 #define FW_EQ_ETH_CMD_EQSIZE_S         0
 #define FW_EQ_ETH_CMD_EQSIZE_V(x)      ((x) << FW_EQ_ETH_CMD_EQSIZE_S)
 
+#define FW_EQ_ETH_CMD_AUTOEQUIQE_S     31
+#define FW_EQ_ETH_CMD_AUTOEQUIQE_V(x)  ((x) << FW_EQ_ETH_CMD_AUTOEQUIQE_S)
+#define FW_EQ_ETH_CMD_AUTOEQUIQE_F     FW_EQ_ETH_CMD_AUTOEQUIQE_V(1U)
+
 #define FW_EQ_ETH_CMD_AUTOEQUEQE_S     30
 #define FW_EQ_ETH_CMD_AUTOEQUEQE_V(x)  ((x) << FW_EQ_ETH_CMD_AUTOEQUEQE_S)
 #define FW_EQ_ETH_CMD_AUTOEQUEQE_F     FW_EQ_ETH_CMD_AUTOEQUEQE_V(1U)
@@ -1854,6 +1861,19 @@ struct fw_eq_eth_cmd {
 #define FW_EQ_ETH_CMD_VIID_S   16
 #define FW_EQ_ETH_CMD_VIID_V(x)        ((x) << FW_EQ_ETH_CMD_VIID_S)
 
+#define FW_EQ_ETH_CMD_TIMEREN_S                3
+#define FW_EQ_ETH_CMD_TIMEREN_M                0x1
+#define FW_EQ_ETH_CMD_TIMEREN_V(x)     ((x) << FW_EQ_ETH_CMD_TIMEREN_S)
+#define FW_EQ_ETH_CMD_TIMEREN_G(x)     \
+    (((x) >> FW_EQ_ETH_CMD_TIMEREN_S) & FW_EQ_ETH_CMD_TIMEREN_M)
+#define FW_EQ_ETH_CMD_TIMEREN_F        FW_EQ_ETH_CMD_TIMEREN_V(1U)
+
+#define FW_EQ_ETH_CMD_TIMERIX_S                0
+#define FW_EQ_ETH_CMD_TIMERIX_M                0x7
+#define FW_EQ_ETH_CMD_TIMERIX_V(x)     ((x) << FW_EQ_ETH_CMD_TIMERIX_S)
+#define FW_EQ_ETH_CMD_TIMERIX_G(x)     \
+    (((x) >> FW_EQ_ETH_CMD_TIMERIX_S) & FW_EQ_ETH_CMD_TIMERIX_M)
+
 struct fw_eq_ctrl_cmd {
        __be32 op_to_vfn;
        __be32 alloc_to_len16;
index 84be331..3300b69 100644 (file)
@@ -1408,7 +1408,7 @@ static void fw_caps_to_lmm(enum fw_port_type port_type,
        case FW_PORT_TYPE_CR4_QSFP:
                SET_LMM(FIBRE);
                FW_CAPS_TO_LMM(SPEED_1G,  1000baseT_Full);
-               FW_CAPS_TO_LMM(SPEED_10G, 10000baseSR_Full);
+               FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
                FW_CAPS_TO_LMM(SPEED_40G, 40000baseSR4_Full);
                FW_CAPS_TO_LMM(SPEED_25G, 25000baseCR_Full);
                FW_CAPS_TO_LMM(SPEED_50G, 50000baseCR2_Full);
@@ -1419,6 +1419,13 @@ static void fw_caps_to_lmm(enum fw_port_type port_type,
                break;
        }
 
+       if (fw_caps & FW_PORT_CAP32_FEC_V(FW_PORT_CAP32_FEC_M)) {
+               FW_CAPS_TO_LMM(FEC_RS, FEC_RS);
+               FW_CAPS_TO_LMM(FEC_BASER_RS, FEC_BASER);
+       } else {
+               SET_LMM(FEC_NONE);
+       }
+
        FW_CAPS_TO_LMM(ANEG, Autoneg);
        FW_CAPS_TO_LMM(802_3_PAUSE, Pause);
        FW_CAPS_TO_LMM(802_3_ASM_DIR, Asym_Pause);
index 1d534f0..11d2ba0 100644 (file)
@@ -2268,7 +2268,7 @@ int t4vf_sge_alloc_rxq(struct adapter *adapter, struct sge_rspq *rspq,
        cmd.iqaddr = cpu_to_be64(rspq->phys_addr);
 
        if (fl) {
-               enum chip_type chip =
+               unsigned int chip_ver =
                        CHELSIO_CHIP_VERSION(adapter->params.chip);
                /*
                 * Allocate the ring for the hardware free list (with space
@@ -2319,10 +2319,10 @@ int t4vf_sge_alloc_rxq(struct adapter *adapter, struct sge_rspq *rspq,
                 */
                cmd.fl0dcaen_to_fl0cidxfthresh =
                        cpu_to_be16(
-                               FW_IQ_CMD_FL0FBMIN_V(chip <= CHELSIO_T5 ?
-                                                    FETCHBURSTMIN_128B_X :
-                                                    FETCHBURSTMIN_64B_X) |
-                               FW_IQ_CMD_FL0FBMAX_V((chip <= CHELSIO_T5) ?
+                               FW_IQ_CMD_FL0FBMIN_V(chip_ver <= CHELSIO_T5
+                                                    ? FETCHBURSTMIN_128B_X
+                                                    : FETCHBURSTMIN_64B_T6_X) |
+                               FW_IQ_CMD_FL0FBMAX_V((chip_ver <= CHELSIO_T5) ?
                                                     FETCHBURSTMAX_512B_X :
                                                     FETCHBURSTMAX_256B_X));
                cmd.fl0size = cpu_to_be16(flsz);
@@ -2411,10 +2411,11 @@ int t4vf_sge_alloc_eth_txq(struct adapter *adapter, struct sge_eth_txq *txq,
                           struct net_device *dev, struct netdev_queue *devq,
                           unsigned int iqid)
 {
+       unsigned int chip_ver = CHELSIO_CHIP_VERSION(adapter->params.chip);
+       struct port_info *pi = netdev_priv(dev);
+       struct fw_eq_eth_cmd cmd, rpl;
        struct sge *s = &adapter->sge;
        int ret, nentries;
-       struct fw_eq_eth_cmd cmd, rpl;
-       struct port_info *pi = netdev_priv(dev);
 
        /*
         * Calculate the size of the hardware TX Queue (including the Status
@@ -2448,17 +2449,19 @@ int t4vf_sge_alloc_eth_txq(struct adapter *adapter, struct sge_eth_txq *txq,
        cmd.alloc_to_len16 = cpu_to_be32(FW_EQ_ETH_CMD_ALLOC_F |
                                         FW_EQ_ETH_CMD_EQSTART_F |
                                         FW_LEN16(cmd));
-       cmd.viid_pkd = cpu_to_be32(FW_EQ_ETH_CMD_AUTOEQUEQE_F |
-                                  FW_EQ_ETH_CMD_VIID_V(pi->viid));
+       cmd.autoequiqe_to_viid = cpu_to_be32(FW_EQ_ETH_CMD_AUTOEQUEQE_F |
+                                            FW_EQ_ETH_CMD_VIID_V(pi->viid));
        cmd.fetchszm_to_iqid =
                cpu_to_be32(FW_EQ_ETH_CMD_HOSTFCMODE_V(SGE_HOSTFCMODE_STPG) |
                            FW_EQ_ETH_CMD_PCIECHN_V(pi->port_id) |
                            FW_EQ_ETH_CMD_IQID_V(iqid));
        cmd.dcaen_to_eqsize =
-               cpu_to_be32(FW_EQ_ETH_CMD_FBMIN_V(SGE_FETCHBURSTMIN_64B) |
-                           FW_EQ_ETH_CMD_FBMAX_V(SGE_FETCHBURSTMAX_512B) |
+               cpu_to_be32(FW_EQ_ETH_CMD_FBMIN_V(chip_ver <= CHELSIO_T5
+                                                 ? FETCHBURSTMIN_64B_X
+                                                 : FETCHBURSTMIN_64B_T6_X) |
+                           FW_EQ_ETH_CMD_FBMAX_V(FETCHBURSTMAX_512B_X) |
                            FW_EQ_ETH_CMD_CIDXFTHRESH_V(
-                                               SGE_CIDXFLUSHTHRESH_32) |
+                                               CIDXFLUSHTHRESH_32_X) |
                            FW_EQ_ETH_CMD_EQSIZE_V(nentries));
        cmd.eqaddr = cpu_to_be64(txq->q.phys_addr);
 
index 60641e2..9a7f70d 100644 (file)
@@ -1434,7 +1434,8 @@ static void enic_rq_indicate_buf(struct vnic_rq *rq,
                 * csum is correct or is zero.
                 */
                if ((netdev->features & NETIF_F_RXCSUM) && !csum_not_calc &&
-                   tcp_udp_csum_ok && ipv4_csum_ok && outer_csum_ok) {
+                   tcp_udp_csum_ok && outer_csum_ok &&
+                   (ipv4_csum_ok || ipv6)) {
                        skb->ip_summed = CHECKSUM_UNNECESSARY;
                        skb->csum_level = encap;
                }
index 13430f7..f1a2da1 100644 (file)
@@ -585,7 +585,7 @@ static void de_tx (struct de_private *de)
                                netif_dbg(de, tx_done, de->dev,
                                          "tx done, slot %d\n", tx_tail);
                        }
-                       dev_kfree_skb_irq(skb);
+                       dev_consume_skb_irq(skb);
                }
 
 next:
index d8d423f..cfcdfee 100644 (file)
@@ -843,9 +843,9 @@ rio_free_tx (struct net_device *dev, int irq)
                                  desc_to_dma(&np->tx_ring[entry]),
                                  skb->len, PCI_DMA_TODEVICE);
                if (irq)
-                       dev_kfree_skb_irq (skb);
+                       dev_consume_skb_irq(skb);
                else
-                       dev_kfree_skb (skb);
+                       dev_kfree_skb(skb);
 
                np->tx_skbuff[entry] = NULL;
                entry = (entry + 1) % TX_RING_SIZE;
index 1a27176..4a37a69 100644 (file)
@@ -1193,7 +1193,6 @@ static irqreturn_t intr_handler(int irq, void *dev_instance)
        int handled = 0;
        int i;
 
-
        do {
                int intr_status = ioread16(ioaddr + IntrStatus);
                iowrite16(intr_status, ioaddr + IntrStatus);
@@ -1286,7 +1285,7 @@ static irqreturn_t intr_handler(int irq, void *dev_instance)
                                dma_unmap_single(&np->pci_dev->dev,
                                        le32_to_cpu(np->tx_ring[entry].frag[0].addr),
                                        skb->len, DMA_TO_DEVICE);
-                               dev_kfree_skb_irq (np->tx_skbuff[entry]);
+                               dev_consume_skb_irq(np->tx_skbuff[entry]);
                                np->tx_skbuff[entry] = NULL;
                                np->tx_ring[entry].frag[0].addr = 0;
                                np->tx_ring[entry].frag[0].length = 0;
@@ -1305,7 +1304,7 @@ static irqreturn_t intr_handler(int irq, void *dev_instance)
                                dma_unmap_single(&np->pci_dev->dev,
                                        le32_to_cpu(np->tx_ring[entry].frag[0].addr),
                                        skb->len, DMA_TO_DEVICE);
-                               dev_kfree_skb_irq (np->tx_skbuff[entry]);
+                               dev_consume_skb_irq(np->tx_skbuff[entry]);
                                np->tx_skbuff[entry] = NULL;
                                np->tx_ring[entry].frag[0].addr = 0;
                                np->tx_ring[entry].frag[0].length = 0;
index ae55da6..c24fd56 100644 (file)
@@ -1531,7 +1531,7 @@ static irqreturn_t intr_handler(int irq, void *dev_instance)
                        /* Free the original skb. */
                        pci_unmap_single(np->pci_dev, np->cur_tx->buffer,
                                np->cur_tx->skbuff->len, PCI_DMA_TODEVICE);
-                       dev_kfree_skb_irq(np->cur_tx->skbuff);
+                       dev_consume_skb_irq(np->cur_tx->skbuff);
                        np->cur_tx->skbuff = NULL;
                        --np->really_tx_count;
                        if (np->cur_tx->control & TXLD) {
index 6249711..bdee441 100644 (file)
@@ -501,7 +501,7 @@ static int dpaa_get_ts_info(struct net_device *net_dev,
        struct device_node *mac_node = dev->of_node;
        struct device_node *fman_node = NULL, *ptp_node = NULL;
        struct platform_device *ptp_dev = NULL;
-       struct qoriq_ptp *ptp = NULL;
+       struct ptp_qoriq *ptp = NULL;
 
        info->phc_index = -1;
 
index f9dd26f..8429f5c 100644 (file)
@@ -17,3 +17,15 @@ config FSL_ENETC_VF
          virtual function (VF) devices enabled by the ENETC PF driver.
 
          If compiled as module (M), the module name is fsl-enetc-vf.
+
+config FSL_ENETC_PTP_CLOCK
+       tristate "ENETC PTP clock driver"
+       depends on PTP_1588_CLOCK_QORIQ && (FSL_ENETC || FSL_ENETC_VF)
+       default y
+       help
+         This driver adds support for using the ENETC 1588 timer
+         as a PTP clock. This clock is only useful if your PTP
+         programs are getting hardware time stamps on the PTP Ethernet
+         packets using the SO_TIMESTAMPING API.
+
+         If compiled as module (M), the module name is fsl-enetc-ptp.
index 9529b01..6976602 100644 (file)
@@ -13,3 +13,6 @@ fsl-enetc-vf-$(CONFIG_FSL_ENETC_VF) += enetc.o enetc_cbdr.o \
                                       enetc_ethtool.o
 fsl-enetc-vf-objs := enetc_vf.o $(fsl-enetc-vf-y)
 endif
+
+obj-$(CONFIG_FSL_ENETC_PTP_CLOCK) += fsl-enetc-ptp.o
+fsl-enetc-ptp-$(CONFIG_FSL_ENETC_PTP_CLOCK) += enetc_ptp.o
index efa0b1a..df8eb88 100644 (file)
@@ -4,8 +4,9 @@
 #include <linux/bitops.h>
 
 /* ENETC device IDs */
-#define ENETC_DEV_ID_PF        0xe100
-#define ENETC_DEV_ID_VF        0xef00
+#define ENETC_DEV_ID_PF                0xe100
+#define ENETC_DEV_ID_VF                0xef00
+#define ENETC_DEV_ID_PTP       0xee02
 
 /* ENETC register block BAR */
 #define ENETC_BAR_REGS 0
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ptp.c b/drivers/net/ethernet/freescale/enetc/enetc_ptp.c
new file mode 100644 (file)
index 0000000..dc2f58a
--- /dev/null
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
+/* Copyright 2019 NXP */
+
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/fsl/ptp_qoriq.h>
+
+#include "enetc.h"
+
+static struct ptp_clock_info enetc_ptp_caps = {
+       .owner          = THIS_MODULE,
+       .name           = "ENETC PTP clock",
+       .max_adj        = 512000,
+       .n_alarm        = 0,
+       .n_ext_ts       = 2,
+       .n_per_out      = 0,
+       .n_pins         = 0,
+       .pps            = 1,
+       .adjfine        = ptp_qoriq_adjfine,
+       .adjtime        = ptp_qoriq_adjtime,
+       .gettime64      = ptp_qoriq_gettime,
+       .settime64      = ptp_qoriq_settime,
+       .enable         = ptp_qoriq_enable,
+};
+
+static int enetc_ptp_probe(struct pci_dev *pdev,
+                          const struct pci_device_id *ent)
+{
+       struct ptp_qoriq *ptp_qoriq;
+       void __iomem *base;
+       int err, len, n;
+
+       if (pdev->dev.of_node && !of_device_is_available(pdev->dev.of_node)) {
+               dev_info(&pdev->dev, "device is disabled, skipping\n");
+               return -ENODEV;
+       }
+
+       err = pci_enable_device_mem(pdev);
+       if (err) {
+               dev_err(&pdev->dev, "device enable failed\n");
+               return err;
+       }
+
+       /* set up for high or low dma */
+       err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+       if (err) {
+               err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+               if (err) {
+                       dev_err(&pdev->dev,
+                               "DMA configuration failed: 0x%x\n", err);
+                       goto err_dma;
+               }
+       }
+
+       err = pci_request_mem_regions(pdev, KBUILD_MODNAME);
+       if (err) {
+               dev_err(&pdev->dev, "pci_request_regions failed err=%d\n", err);
+               goto err_pci_mem_reg;
+       }
+
+       pci_set_master(pdev);
+
+       ptp_qoriq = kzalloc(sizeof(*ptp_qoriq), GFP_KERNEL);
+       if (!ptp_qoriq) {
+               err = -ENOMEM;
+               goto err_alloc_ptp;
+       }
+
+       len = pci_resource_len(pdev, ENETC_BAR_REGS);
+
+       base = ioremap(pci_resource_start(pdev, ENETC_BAR_REGS), len);
+       if (!base) {
+               err = -ENXIO;
+               dev_err(&pdev->dev, "ioremap() failed\n");
+               goto err_ioremap;
+       }
+
+       /* Allocate 1 interrupt */
+       n = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSIX);
+       if (n != 1) {
+               err = -EPERM;
+               goto err_irq;
+       }
+
+       ptp_qoriq->irq = pci_irq_vector(pdev, 0);
+
+       err = request_irq(ptp_qoriq->irq, ptp_qoriq_isr, 0, DRIVER, ptp_qoriq);
+       if (err) {
+               dev_err(&pdev->dev, "request_irq() failed!\n");
+               goto err_irq;
+       }
+
+       ptp_qoriq->dev = &pdev->dev;
+
+       err = ptp_qoriq_init(ptp_qoriq, base, enetc_ptp_caps);
+       if (err)
+               goto err_no_clock;
+
+       pci_set_drvdata(pdev, ptp_qoriq);
+
+       return 0;
+
+err_no_clock:
+       free_irq(ptp_qoriq->irq, ptp_qoriq);
+err_irq:
+       iounmap(base);
+err_ioremap:
+       kfree(ptp_qoriq);
+err_alloc_ptp:
+       pci_release_mem_regions(pdev);
+err_pci_mem_reg:
+err_dma:
+       pci_disable_device(pdev);
+
+       return err;
+}
+
+static void enetc_ptp_remove(struct pci_dev *pdev)
+{
+       struct ptp_qoriq *ptp_qoriq = pci_get_drvdata(pdev);
+
+       ptp_qoriq_free(ptp_qoriq);
+       kfree(ptp_qoriq);
+
+       pci_release_mem_regions(pdev);
+       pci_disable_device(pdev);
+}
+
+static const struct pci_device_id enetc_ptp_id_table[] = {
+       { PCI_DEVICE(PCI_VENDOR_ID_FREESCALE, ENETC_DEV_ID_PTP) },
+       { 0, } /* End of table. */
+};
+MODULE_DEVICE_TABLE(pci, enetc_ptp_id_table);
+
+static struct pci_driver enetc_ptp_driver = {
+       .name = KBUILD_MODNAME,
+       .id_table = enetc_ptp_id_table,
+       .probe = enetc_ptp_probe,
+       .remove = enetc_ptp_remove,
+};
+module_pci_driver(enetc_ptp_driver);
+
+MODULE_DESCRIPTION("ENETC PTP clock driver");
+MODULE_LICENSE("Dual BSD/GPL");
index 2370dc2..697c242 100644 (file)
@@ -2098,6 +2098,7 @@ static int fec_enet_get_regs_len(struct net_device *ndev)
 #if defined(CONFIG_M523x) || defined(CONFIG_M527x) || defined(CONFIG_M528x) || \
        defined(CONFIG_M520x) || defined(CONFIG_M532x) || defined(CONFIG_ARM) || \
        defined(CONFIG_ARM64) || defined(CONFIG_COMPILE_TEST)
+static __u32 fec_enet_register_version = 2;
 static u32 fec_enet_register_offset[] = {
        FEC_IEVENT, FEC_IMASK, FEC_R_DES_ACTIVE_0, FEC_X_DES_ACTIVE_0,
        FEC_ECNTRL, FEC_MII_DATA, FEC_MII_SPEED, FEC_MIB_CTRLSTAT, FEC_R_CNTRL,
@@ -2128,6 +2129,7 @@ static u32 fec_enet_register_offset[] = {
        IEEE_R_FDXFC, IEEE_R_OCTETS_OK
 };
 #else
+static __u32 fec_enet_register_version = 1;
 static u32 fec_enet_register_offset[] = {
        FEC_ECNTRL, FEC_IEVENT, FEC_IMASK, FEC_IVEC, FEC_R_DES_ACTIVE_0,
        FEC_R_DES_ACTIVE_1, FEC_R_DES_ACTIVE_2, FEC_X_DES_ACTIVE_0,
@@ -2149,6 +2151,8 @@ static void fec_enet_get_regs(struct net_device *ndev,
        u32 *buf = (u32 *)regbuf;
        u32 i, off;
 
+       regs->version = fec_enet_register_version;
+
        memset(buf, 0, regs->len);
 
        for (i = 0; i < ARRAY_SIZE(fec_enet_register_offset); i++) {
index b90bab7..c1968b3 100644 (file)
@@ -369,7 +369,7 @@ static irqreturn_t mpc52xx_fec_tx_interrupt(int irq, void *dev_id)
                dma_unmap_single(dev->dev.parent, bd->skb_pa, skb->len,
                                 DMA_TO_DEVICE);
 
-               dev_kfree_skb_irq(skb);
+               dev_consume_skb_irq(skb);
        }
        spin_unlock(&priv->lock);
 
index 241325c..27ed995 100644 (file)
@@ -1492,7 +1492,7 @@ static int gfar_get_ts_info(struct net_device *dev,
        struct gfar_private *priv = netdev_priv(dev);
        struct platform_device *ptp_dev;
        struct device_node *ptp_node;
-       struct qoriq_ptp *ptp = NULL;
+       struct ptp_qoriq *ptp = NULL;
 
        info->phc_index = -1;
 
index c3d539e..eb3e65e 100644 (file)
@@ -1879,6 +1879,8 @@ static void ucc_geth_free_tx(struct ucc_geth_private *ugeth)
        u16 i, j;
        u8 __iomem *bd;
 
+       netdev_reset_queue(ugeth->ndev);
+
        ug_info = ugeth->ug_info;
        uf_info = &ug_info->uf_info;
 
index 3b9e74b..b8155f5 100644 (file)
@@ -3081,6 +3081,7 @@ int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool dereset)
        dsaf_dev = dev_get_drvdata(&pdev->dev);
        if (!dsaf_dev) {
                dev_err(&pdev->dev, "dsaf_dev is NULL\n");
+               put_device(&pdev->dev);
                return -ENODEV;
        }
 
@@ -3088,6 +3089,7 @@ int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool dereset)
        if (AE_IS_VER1(dsaf_dev->dsaf_ver)) {
                dev_err(dsaf_dev->dev, "%s v1 chip doesn't support RoCE!\n",
                        dsaf_dev->ae_dev.name);
+               put_device(&pdev->dev);
                return -ENODEV;
        }
 
index 2f7ae11..1274ad2 100644 (file)
@@ -1194,7 +1194,7 @@ static irqreturn_t i596_interrupt(int irq, void *dev_id)
                                dma_unmap_single(dev->dev.parent,
                                                 tx_cmd->dma_addr,
                                                 skb->len, DMA_TO_DEVICE);
-                               dev_kfree_skb_irq(skb);
+                               dev_consume_skb_irq(skb);
 
                                tx_cmd->cmd.command = 0; /* Mark free */
                                break;
index 6fd15a7..5a04194 100644 (file)
@@ -1603,14 +1603,12 @@ static int fm10k_alloc_q_vector(struct fm10k_intfc *interface,
 {
        struct fm10k_q_vector *q_vector;
        struct fm10k_ring *ring;
-       int ring_count, size;
+       int ring_count;
 
        ring_count = txr_count + rxr_count;
-       size = sizeof(struct fm10k_q_vector) +
-              (sizeof(struct fm10k_ring) * ring_count);
 
        /* allocate q_vector and rings */
-       q_vector = kzalloc(size, GFP_KERNEL);
+       q_vector = kzalloc(struct_size(q_vector, ring, ring_count), GFP_KERNEL);
        if (!q_vector)
                return -ENOMEM;
 
index 6d812e9..69b230c 100644 (file)
@@ -1189,15 +1189,15 @@ static int igb_alloc_q_vector(struct igb_adapter *adapter,
 {
        struct igb_q_vector *q_vector;
        struct igb_ring *ring;
-       int ring_count, size;
+       int ring_count;
+       size_t size;
 
        /* igb only supports 1 Tx and/or 1 Rx queue per vector */
        if (txr_count > 1 || rxr_count > 1)
                return -ENOMEM;
 
        ring_count = txr_count + rxr_count;
-       size = sizeof(struct igb_q_vector) +
-              (sizeof(struct igb_ring) * ring_count);
+       size = struct_size(q_vector, ring, ring_count);
 
        /* allocate q_vector and rings */
        q_vector = adapter->q_vector[v_idx];
index 11c7543..87a1187 100644 (file)
@@ -2958,22 +2958,21 @@ static int igc_alloc_q_vector(struct igc_adapter *adapter,
 {
        struct igc_q_vector *q_vector;
        struct igc_ring *ring;
-       int ring_count, size;
+       int ring_count;
 
        /* igc only supports 1 Tx and/or 1 Rx queue per vector */
        if (txr_count > 1 || rxr_count > 1)
                return -ENOMEM;
 
        ring_count = txr_count + rxr_count;
-       size = sizeof(struct igc_q_vector) +
-               (sizeof(struct igc_ring) * ring_count);
 
        /* allocate q_vector and rings */
        q_vector = adapter->q_vector[v_idx];
        if (!q_vector)
-               q_vector = kzalloc(size, GFP_KERNEL);
+               q_vector = kzalloc(struct_size(q_vector, ring, ring_count),
+                                  GFP_KERNEL);
        else
-               memset(q_vector, 0, size);
+               memset(q_vector, 0, struct_size(q_vector, ring, ring_count));
        if (!q_vector)
                return -ENOMEM;
 
index 62e6499..cc3196a 100644 (file)
@@ -836,12 +836,10 @@ static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter,
        struct ixgbe_ring *ring;
        int node = NUMA_NO_NODE;
        int cpu = -1;
-       int ring_count, size;
+       int ring_count;
        u8 tcs = adapter->hw_tcs;
 
        ring_count = txr_count + rxr_count + xdp_count;
-       size = sizeof(struct ixgbe_q_vector) +
-              (sizeof(struct ixgbe_ring) * ring_count);
 
        /* customize cpu for Flow Director mapping */
        if ((tcs <= 1) && !(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)) {
@@ -855,9 +853,11 @@ static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter,
        }
 
        /* allocate q_vector and rings */
-       q_vector = kzalloc_node(size, GFP_KERNEL, node);
+       q_vector = kzalloc_node(struct_size(q_vector, ring, ring_count),
+                               GFP_KERNEL, node);
        if (!q_vector)
-               q_vector = kzalloc(size, GFP_KERNEL);
+               q_vector = kzalloc(struct_size(q_vector, ring, ring_count),
+                                  GFP_KERNEL);
        if (!q_vector)
                return -ENOMEM;
 
index 32ac904..f9bb890 100644 (file)
@@ -112,10 +112,12 @@ struct ltq_etop_priv {
 static int
 ltq_etop_alloc_skb(struct ltq_etop_chan *ch)
 {
+       struct ltq_etop_priv *priv = netdev_priv(ch->netdev);
+
        ch->skb[ch->dma.desc] = netdev_alloc_skb(ch->netdev, MAX_DMA_DATA_LEN);
        if (!ch->skb[ch->dma.desc])
                return -ENOMEM;
-       ch->dma.desc_base[ch->dma.desc].addr = dma_map_single(NULL,
+       ch->dma.desc_base[ch->dma.desc].addr = dma_map_single(&priv->pdev->dev,
                ch->skb[ch->dma.desc]->data, MAX_DMA_DATA_LEN,
                DMA_FROM_DEVICE);
        ch->dma.desc_base[ch->dma.desc].addr =
@@ -487,7 +489,7 @@ ltq_etop_tx(struct sk_buff *skb, struct net_device *dev)
        netif_trans_update(dev);
 
        spin_lock_irqsave(&priv->lock, flags);
-       desc->addr = ((unsigned int) dma_map_single(NULL, skb->data, len,
+       desc->addr = ((unsigned int) dma_map_single(&priv->pdev->dev, skb->data, len,
                                                DMA_TO_DEVICE)) - byte_offset;
        wmb();
        desc->ctl = LTQ_DMA_OWN | LTQ_DMA_SOP | LTQ_DMA_EOP |
index 398328f..96e3f06 100644 (file)
 #define     MVPP2_GMAC_STATUS0_GMII_SPEED      BIT(1)
 #define     MVPP2_GMAC_STATUS0_MII_SPEED       BIT(2)
 #define     MVPP2_GMAC_STATUS0_FULL_DUPLEX     BIT(3)
-#define     MVPP2_GMAC_STATUS0_RX_PAUSE                BIT(6)
-#define     MVPP2_GMAC_STATUS0_TX_PAUSE                BIT(7)
+#define     MVPP2_GMAC_STATUS0_RX_PAUSE                BIT(4)
+#define     MVPP2_GMAC_STATUS0_TX_PAUSE                BIT(5)
 #define     MVPP2_GMAC_STATUS0_AN_COMPLETE     BIT(11)
 #define MVPP2_GMAC_PORT_FIFO_CFG_1_REG         0x1c
 #define     MVPP2_GMAC_TX_FIFO_MIN_TH_OFFS     6
index 5c0c26f..191d9ce 100644 (file)
@@ -965,6 +965,11 @@ mvpp2_shared_interrupt_mask_unmask(struct mvpp2_port *port, bool mask)
 }
 
 /* Port configuration routines */
+static bool mvpp2_is_xlg(phy_interface_t interface)
+{
+       return interface == PHY_INTERFACE_MODE_10GKR ||
+              interface == PHY_INTERFACE_MODE_XAUI;
+}
 
 static void mvpp22_gop_init_rgmii(struct mvpp2_port *port)
 {
@@ -1101,7 +1106,7 @@ static void mvpp22_gop_unmask_irq(struct mvpp2_port *port)
        if (port->gop_id == 0) {
                /* Enable the XLG/GIG irqs for this port */
                val = readl(port->base + MVPP22_XLG_EXT_INT_MASK);
-               if (port->phy_interface == PHY_INTERFACE_MODE_10GKR)
+               if (mvpp2_is_xlg(port->phy_interface))
                        val |= MVPP22_XLG_EXT_INT_MASK_XLG;
                else
                        val |= MVPP22_XLG_EXT_INT_MASK_GIG;
@@ -1181,9 +1186,7 @@ static void mvpp2_port_enable(struct mvpp2_port *port)
        u32 val;
 
        /* Only GOP port 0 has an XLG MAC */
-       if (port->gop_id == 0 &&
-           (port->phy_interface == PHY_INTERFACE_MODE_XAUI ||
-            port->phy_interface == PHY_INTERFACE_MODE_10GKR)) {
+       if (port->gop_id == 0 && mvpp2_is_xlg(port->phy_interface)) {
                val = readl(port->base + MVPP22_XLG_CTRL0_REG);
                val |= MVPP22_XLG_CTRL0_PORT_EN |
                       MVPP22_XLG_CTRL0_MAC_RESET_DIS;
@@ -1202,9 +1205,7 @@ static void mvpp2_port_disable(struct mvpp2_port *port)
        u32 val;
 
        /* Only GOP port 0 has an XLG MAC */
-       if (port->gop_id == 0 &&
-           (port->phy_interface == PHY_INTERFACE_MODE_XAUI ||
-            port->phy_interface == PHY_INTERFACE_MODE_10GKR)) {
+       if (port->gop_id == 0 && mvpp2_is_xlg(port->phy_interface)) {
                val = readl(port->base + MVPP22_XLG_CTRL0_REG);
                val &= ~MVPP22_XLG_CTRL0_PORT_EN;
                writel(val, port->base + MVPP22_XLG_CTRL0_REG);
@@ -1377,13 +1378,9 @@ static void mvpp2_port_reset(struct mvpp2_port *port)
        for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_regs); i++)
                mvpp2_read_count(port, &mvpp2_ethtool_regs[i]);
 
-       val = readl(port->base + MVPP2_GMAC_CTRL_2_REG) &
-                   ~MVPP2_GMAC_PORT_RESET_MASK;
+       val = readl(port->base + MVPP2_GMAC_CTRL_2_REG) |
+             MVPP2_GMAC_PORT_RESET_MASK;
        writel(val, port->base + MVPP2_GMAC_CTRL_2_REG);
-
-       while (readl(port->base + MVPP2_GMAC_CTRL_2_REG) &
-              MVPP2_GMAC_PORT_RESET_MASK)
-               continue;
 }
 
 /* Change maximum receive size of the port */
@@ -2459,8 +2456,7 @@ static irqreturn_t mvpp2_link_status_isr(int irq, void *dev_id)
 
        mvpp22_gop_mask_irq(port);
 
-       if (port->gop_id == 0 &&
-           port->phy_interface == PHY_INTERFACE_MODE_10GKR) {
+       if (port->gop_id == 0 && mvpp2_is_xlg(port->phy_interface)) {
                val = readl(port->base + MVPP22_XLG_INT_STAT);
                if (val & MVPP22_XLG_INT_STAT_LINK) {
                        event = true;
@@ -3150,8 +3146,7 @@ static void mvpp22_mode_reconfigure(struct mvpp2_port *port)
                ctrl3 = readl(port->base + MVPP22_XLG_CTRL3_REG);
                ctrl3 &= ~MVPP22_XLG_CTRL3_MACMODESELECT_MASK;
 
-               if (port->phy_interface == PHY_INTERFACE_MODE_XAUI ||
-                   port->phy_interface == PHY_INTERFACE_MODE_10GKR)
+               if (mvpp2_is_xlg(port->phy_interface))
                        ctrl3 |= MVPP22_XLG_CTRL3_MACMODESELECT_10G;
                else
                        ctrl3 |= MVPP22_XLG_CTRL3_MACMODESELECT_GMAC;
@@ -3159,9 +3154,7 @@ static void mvpp22_mode_reconfigure(struct mvpp2_port *port)
                writel(ctrl3, port->base + MVPP22_XLG_CTRL3_REG);
        }
 
-       if (port->gop_id == 0 &&
-           (port->phy_interface == PHY_INTERFACE_MODE_XAUI ||
-            port->phy_interface == PHY_INTERFACE_MODE_10GKR))
+       if (port->gop_id == 0 && mvpp2_is_xlg(port->phy_interface))
                mvpp2_xlg_max_rx_size_set(port);
        else
                mvpp2_gmac_max_rx_size_set(port);
@@ -4501,17 +4494,12 @@ static int mvpp2_phylink_mac_link_state(struct net_device *dev,
 static void mvpp2_mac_an_restart(struct net_device *dev)
 {
        struct mvpp2_port *port = netdev_priv(dev);
-       u32 val;
+       u32 val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
 
-       if (port->phy_interface != PHY_INTERFACE_MODE_SGMII)
-               return;
-
-       val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
-       /* The RESTART_AN bit is cleared by the h/w after restarting the AN
-        * process.
-        */
-       val |= MVPP2_GMAC_IN_BAND_RESTART_AN | MVPP2_GMAC_IN_BAND_AUTONEG;
-       writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+       writel(val | MVPP2_GMAC_IN_BAND_RESTART_AN,
+              port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+       writel(val & ~MVPP2_GMAC_IN_BAND_RESTART_AN,
+              port->base + MVPP2_GMAC_AUTONEG_CONFIG);
 }
 
 static void mvpp2_xlg_config(struct mvpp2_port *port, unsigned int mode,
@@ -4524,8 +4512,13 @@ static void mvpp2_xlg_config(struct mvpp2_port *port, unsigned int mode,
 
        if (state->pause & MLO_PAUSE_TX)
                ctrl0 |= MVPP22_XLG_CTRL0_TX_FLOW_CTRL_EN;
+       else
+               ctrl0 &= ~MVPP22_XLG_CTRL0_TX_FLOW_CTRL_EN;
+
        if (state->pause & MLO_PAUSE_RX)
                ctrl0 |= MVPP22_XLG_CTRL0_RX_FLOW_CTRL_EN;
+       else
+               ctrl0 &= ~MVPP22_XLG_CTRL0_RX_FLOW_CTRL_EN;
 
        ctrl4 &= ~MVPP22_XLG_CTRL4_MACMODSELECT_GMAC;
        ctrl4 |= MVPP22_XLG_CTRL4_FWD_FC | MVPP22_XLG_CTRL4_FWD_PFC |
@@ -4538,83 +4531,130 @@ static void mvpp2_xlg_config(struct mvpp2_port *port, unsigned int mode,
 static void mvpp2_gmac_config(struct mvpp2_port *port, unsigned int mode,
                              const struct phylink_link_state *state)
 {
-       u32 an, ctrl0, ctrl2, ctrl4;
-
-       an = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
-       ctrl0 = readl(port->base + MVPP2_GMAC_CTRL_0_REG);
-       ctrl2 = readl(port->base + MVPP2_GMAC_CTRL_2_REG);
-       ctrl4 = readl(port->base + MVPP22_GMAC_CTRL_4_REG);
-
-       /* Force link down */
-       an &= ~MVPP2_GMAC_FORCE_LINK_PASS;
-       an |= MVPP2_GMAC_FORCE_LINK_DOWN;
-       writel(an, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+       u32 old_an, an;
+       u32 old_ctrl0, ctrl0;
+       u32 old_ctrl2, ctrl2;
+       u32 old_ctrl4, ctrl4;
 
-       /* Set the GMAC in a reset state */
-       ctrl2 |= MVPP2_GMAC_PORT_RESET_MASK;
-       writel(ctrl2, port->base + MVPP2_GMAC_CTRL_2_REG);
+       old_an = an = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+       old_ctrl0 = ctrl0 = readl(port->base + MVPP2_GMAC_CTRL_0_REG);
+       old_ctrl2 = ctrl2 = readl(port->base + MVPP2_GMAC_CTRL_2_REG);
+       old_ctrl4 = ctrl4 = readl(port->base + MVPP22_GMAC_CTRL_4_REG);
 
        an &= ~(MVPP2_GMAC_CONFIG_MII_SPEED | MVPP2_GMAC_CONFIG_GMII_SPEED |
                MVPP2_GMAC_AN_SPEED_EN | MVPP2_GMAC_FC_ADV_EN |
                MVPP2_GMAC_FC_ADV_ASM_EN | MVPP2_GMAC_FLOW_CTRL_AUTONEG |
                MVPP2_GMAC_CONFIG_FULL_DUPLEX | MVPP2_GMAC_AN_DUPLEX_EN |
-               MVPP2_GMAC_FORCE_LINK_DOWN);
+               MVPP2_GMAC_IN_BAND_AUTONEG | MVPP2_GMAC_IN_BAND_AUTONEG_BYPASS);
        ctrl0 &= ~MVPP2_GMAC_PORT_TYPE_MASK;
-       ctrl2 &= ~(MVPP2_GMAC_PORT_RESET_MASK | MVPP2_GMAC_PCS_ENABLE_MASK);
+       ctrl2 &= ~(MVPP2_GMAC_INBAND_AN_MASK | MVPP2_GMAC_PORT_RESET_MASK |
+                  MVPP2_GMAC_PCS_ENABLE_MASK);
+       ctrl4 &= ~(MVPP22_CTRL4_RX_FC_EN | MVPP22_CTRL4_TX_FC_EN);
 
+       /* Configure port type */
        if (phy_interface_mode_is_8023z(state->interface)) {
-               /* 1000BaseX and 2500BaseX ports cannot negotiate speed nor can
-                * they negotiate duplex: they are always operating with a fixed
-                * speed of 1000/2500Mbps in full duplex, so force 1000/2500
-                * speed and full duplex here.
-                */
-               ctrl0 |= MVPP2_GMAC_PORT_TYPE_MASK;
-               an |= MVPP2_GMAC_CONFIG_GMII_SPEED |
-                     MVPP2_GMAC_CONFIG_FULL_DUPLEX;
-       } else if (!phy_interface_mode_is_rgmii(state->interface)) {
-               an |= MVPP2_GMAC_AN_SPEED_EN | MVPP2_GMAC_FLOW_CTRL_AUTONEG;
+               ctrl2 |= MVPP2_GMAC_PCS_ENABLE_MASK;
+               ctrl4 &= ~MVPP22_CTRL4_EXT_PIN_GMII_SEL;
+               ctrl4 |= MVPP22_CTRL4_SYNC_BYPASS_DIS |
+                        MVPP22_CTRL4_DP_CLK_SEL |
+                        MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE;
+       } else if (state->interface == PHY_INTERFACE_MODE_SGMII) {
+               ctrl2 |= MVPP2_GMAC_PCS_ENABLE_MASK | MVPP2_GMAC_INBAND_AN_MASK;
+               ctrl4 &= ~MVPP22_CTRL4_EXT_PIN_GMII_SEL;
+               ctrl4 |= MVPP22_CTRL4_SYNC_BYPASS_DIS |
+                        MVPP22_CTRL4_DP_CLK_SEL |
+                        MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE;
+       } else if (phy_interface_mode_is_rgmii(state->interface)) {
+               ctrl4 &= ~MVPP22_CTRL4_DP_CLK_SEL;
+               ctrl4 |= MVPP22_CTRL4_EXT_PIN_GMII_SEL |
+                        MVPP22_CTRL4_SYNC_BYPASS_DIS |
+                        MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE;
        }
 
-       if (state->duplex)
-               an |= MVPP2_GMAC_CONFIG_FULL_DUPLEX;
+       /* Configure advertisement bits */
        if (phylink_test(state->advertising, Pause))
                an |= MVPP2_GMAC_FC_ADV_EN;
        if (phylink_test(state->advertising, Asym_Pause))
                an |= MVPP2_GMAC_FC_ADV_ASM_EN;
 
-       if (phy_interface_mode_is_8023z(state->interface) ||
-           state->interface == PHY_INTERFACE_MODE_SGMII) {
-               an |= MVPP2_GMAC_IN_BAND_AUTONEG;
-               ctrl2 |= MVPP2_GMAC_INBAND_AN_MASK | MVPP2_GMAC_PCS_ENABLE_MASK;
+       /* Configure negotiation style */
+       if (!phylink_autoneg_inband(mode)) {
+               /* Phy or fixed speed - no in-band AN */
+               if (state->duplex)
+                       an |= MVPP2_GMAC_CONFIG_FULL_DUPLEX;
 
-               ctrl4 &= ~(MVPP22_CTRL4_EXT_PIN_GMII_SEL |
-                          MVPP22_CTRL4_RX_FC_EN | MVPP22_CTRL4_TX_FC_EN);
-               ctrl4 |= MVPP22_CTRL4_SYNC_BYPASS_DIS |
-                        MVPP22_CTRL4_DP_CLK_SEL |
-                        MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE;
+               if (state->speed == SPEED_1000 || state->speed == SPEED_2500)
+                       an |= MVPP2_GMAC_CONFIG_GMII_SPEED;
+               else if (state->speed == SPEED_100)
+                       an |= MVPP2_GMAC_CONFIG_MII_SPEED;
 
                if (state->pause & MLO_PAUSE_TX)
                        ctrl4 |= MVPP22_CTRL4_TX_FC_EN;
                if (state->pause & MLO_PAUSE_RX)
                        ctrl4 |= MVPP22_CTRL4_RX_FC_EN;
-       } else if (phy_interface_mode_is_rgmii(state->interface)) {
-               an |= MVPP2_GMAC_IN_BAND_AUTONEG_BYPASS;
+       } else if (state->interface == PHY_INTERFACE_MODE_SGMII) {
+               /* SGMII in-band mode receives the speed and duplex from
+                * the PHY. Flow control information is not received. */
+               an &= ~(MVPP2_GMAC_FORCE_LINK_DOWN | MVPP2_GMAC_FORCE_LINK_PASS);
+               an |= MVPP2_GMAC_IN_BAND_AUTONEG |
+                     MVPP2_GMAC_AN_SPEED_EN |
+                     MVPP2_GMAC_AN_DUPLEX_EN;
 
-               if (state->speed == SPEED_1000)
-                       an |= MVPP2_GMAC_CONFIG_GMII_SPEED;
-               else if (state->speed == SPEED_100)
-                       an |= MVPP2_GMAC_CONFIG_MII_SPEED;
+               if (state->pause & MLO_PAUSE_TX)
+                       ctrl4 |= MVPP22_CTRL4_TX_FC_EN;
+               if (state->pause & MLO_PAUSE_RX)
+                       ctrl4 |= MVPP22_CTRL4_RX_FC_EN;
+       } else if (phy_interface_mode_is_8023z(state->interface)) {
+               /* 1000BaseX and 2500BaseX ports cannot negotiate speed nor can
+                * they negotiate duplex: they are always operating with a fixed
+                * speed of 1000/2500Mbps in full duplex, so force 1000/2500
+                * speed and full duplex here.
+                */
+               ctrl0 |= MVPP2_GMAC_PORT_TYPE_MASK;
+               an &= ~(MVPP2_GMAC_FORCE_LINK_DOWN | MVPP2_GMAC_FORCE_LINK_PASS);
+               an |= MVPP2_GMAC_IN_BAND_AUTONEG |
+                     MVPP2_GMAC_CONFIG_GMII_SPEED |
+                     MVPP2_GMAC_CONFIG_FULL_DUPLEX;
 
-               ctrl4 &= ~MVPP22_CTRL4_DP_CLK_SEL;
-               ctrl4 |= MVPP22_CTRL4_EXT_PIN_GMII_SEL |
-                        MVPP22_CTRL4_SYNC_BYPASS_DIS |
-                        MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE;
+               if (state->pause & MLO_PAUSE_AN && state->an_enabled) {
+                       an |= MVPP2_GMAC_FLOW_CTRL_AUTONEG;
+               } else {
+                       if (state->pause & MLO_PAUSE_TX)
+                               ctrl4 |= MVPP22_CTRL4_TX_FC_EN;
+                       if (state->pause & MLO_PAUSE_RX)
+                               ctrl4 |= MVPP22_CTRL4_RX_FC_EN;
+               }
        }
 
-       writel(ctrl0, port->base + MVPP2_GMAC_CTRL_0_REG);
-       writel(ctrl2, port->base + MVPP2_GMAC_CTRL_2_REG);
-       writel(ctrl4, port->base + MVPP22_GMAC_CTRL_4_REG);
-       writel(an, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+       if ((old_ctrl0 ^ ctrl0) & MVPP2_GMAC_PORT_TYPE_MASK ||
+           (old_ctrl2 ^ ctrl2) & MVPP2_GMAC_INBAND_AN_MASK ||
+           (old_an ^ an) & MVPP2_GMAC_IN_BAND_AUTONEG) {
+               /* Force link down */
+               old_an &= ~MVPP2_GMAC_FORCE_LINK_PASS;
+               old_an |= MVPP2_GMAC_FORCE_LINK_DOWN;
+               writel(old_an, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+
+               /* Set the GMAC in a reset state - do this in a way that
+                * ensures we clear it below.
+                */
+               old_ctrl2 |= MVPP2_GMAC_PORT_RESET_MASK;
+               writel(old_ctrl2, port->base + MVPP2_GMAC_CTRL_2_REG);
+       }
+
+       if (old_ctrl0 != ctrl0)
+               writel(ctrl0, port->base + MVPP2_GMAC_CTRL_0_REG);
+       if (old_ctrl2 != ctrl2)
+               writel(ctrl2, port->base + MVPP2_GMAC_CTRL_2_REG);
+       if (old_ctrl4 != ctrl4)
+               writel(ctrl4, port->base + MVPP22_GMAC_CTRL_4_REG);
+       if (old_an != an)
+               writel(an, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+
+       if (old_ctrl2 & MVPP2_GMAC_PORT_RESET_MASK) {
+               while (readl(port->base + MVPP2_GMAC_CTRL_2_REG) &
+                      MVPP2_GMAC_PORT_RESET_MASK)
+                       continue;
+       }
 }
 
 static void mvpp2_mac_config(struct net_device *dev, unsigned int mode,
@@ -4624,7 +4664,7 @@ static void mvpp2_mac_config(struct net_device *dev, unsigned int mode,
        bool change_interface = port->phy_interface != state->interface;
 
        /* Check for invalid configuration */
-       if (state->interface == PHY_INTERFACE_MODE_10GKR && port->gop_id != 0) {
+       if (mvpp2_is_xlg(state->interface) && port->gop_id != 0) {
                netdev_err(dev, "Invalid mode on %s\n", dev->name);
                return;
        }
@@ -4644,7 +4684,7 @@ static void mvpp2_mac_config(struct net_device *dev, unsigned int mode,
        }
 
        /* mac (re)configuration */
-       if (state->interface == PHY_INTERFACE_MODE_10GKR)
+       if (mvpp2_is_xlg(state->interface))
                mvpp2_xlg_config(port, mode, state);
        else if (phy_interface_mode_is_rgmii(state->interface) ||
                 phy_interface_mode_is_8023z(state->interface) ||
@@ -4666,12 +4706,10 @@ static void mvpp2_mac_link_up(struct net_device *dev, unsigned int mode,
        struct mvpp2_port *port = netdev_priv(dev);
        u32 val;
 
-       if (!phylink_autoneg_inband(mode) &&
-           interface != PHY_INTERFACE_MODE_10GKR) {
+       if (!phylink_autoneg_inband(mode) && !mvpp2_is_xlg(interface)) {
                val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
                val &= ~MVPP2_GMAC_FORCE_LINK_DOWN;
-               if (phy_interface_mode_is_rgmii(interface))
-                       val |= MVPP2_GMAC_FORCE_LINK_PASS;
+               val |= MVPP2_GMAC_FORCE_LINK_PASS;
                writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
        }
 
@@ -4688,8 +4726,7 @@ static void mvpp2_mac_link_down(struct net_device *dev, unsigned int mode,
        struct mvpp2_port *port = netdev_priv(dev);
        u32 val;
 
-       if (!phylink_autoneg_inband(mode) &&
-           interface != PHY_INTERFACE_MODE_10GKR) {
+       if (!phylink_autoneg_inband(mode) && !mvpp2_is_xlg(interface)) {
                val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
                val &= ~MVPP2_GMAC_FORCE_LINK_PASS;
                val |= MVPP2_GMAC_FORCE_LINK_DOWN;
index f8a6d6e..35f2142 100644 (file)
@@ -201,6 +201,7 @@ struct tx_desc {
 };
 
 struct pxa168_eth_private {
+       struct platform_device *pdev;
        int port_num;           /* User Ethernet port number    */
        int phy_addr;
        int phy_speed;
@@ -331,7 +332,7 @@ static void rxq_refill(struct net_device *dev)
                used_rx_desc = pep->rx_used_desc_q;
                p_used_rx_desc = &pep->p_rx_desc_area[used_rx_desc];
                size = skb_end_pointer(skb) - skb->data;
-               p_used_rx_desc->buf_ptr = dma_map_single(NULL,
+               p_used_rx_desc->buf_ptr = dma_map_single(&pep->pdev->dev,
                                                         skb->data,
                                                         size,
                                                         DMA_FROM_DEVICE);
@@ -743,7 +744,7 @@ static int txq_reclaim(struct net_device *dev, int force)
                                netdev_err(dev, "Error in TX\n");
                        dev->stats.tx_errors++;
                }
-               dma_unmap_single(NULL, addr, count, DMA_TO_DEVICE);
+               dma_unmap_single(&pep->pdev->dev, addr, count, DMA_TO_DEVICE);
                if (skb)
                        dev_kfree_skb_irq(skb);
                released++;
@@ -805,7 +806,7 @@ static int rxq_process(struct net_device *dev, int budget)
                if (rx_next_curr_desc == rx_used_desc)
                        pep->rx_resource_err = 1;
                pep->rx_desc_count--;
-               dma_unmap_single(NULL, rx_desc->buf_ptr,
+               dma_unmap_single(&pep->pdev->dev, rx_desc->buf_ptr,
                                 rx_desc->buf_size,
                                 DMA_FROM_DEVICE);
                received_packets++;
@@ -1274,7 +1275,8 @@ pxa168_eth_start_xmit(struct sk_buff *skb, struct net_device *dev)
        length = skb->len;
        pep->tx_skb[tx_index] = skb;
        desc->byte_cnt = length;
-       desc->buf_ptr = dma_map_single(NULL, skb->data, length, DMA_TO_DEVICE);
+       desc->buf_ptr = dma_map_single(&pep->pdev->dev, skb->data, length,
+                                       DMA_TO_DEVICE);
 
        skb_tx_timestamp(skb);
 
@@ -1528,6 +1530,7 @@ static int pxa168_eth_probe(struct platform_device *pdev)
        if (err)
                goto err_free_mdio;
 
+       pep->pdev = pdev;
        SET_NETDEV_DEV(dev, &pdev->dev);
        pxa168_init_hw(pep);
        err = register_netdev(dev);
index 04fd1f1..654ac53 100644 (file)
@@ -152,8 +152,10 @@ static void skge_get_regs(struct net_device *dev, struct ethtool_regs *regs,
        memset(p, 0, regs->len);
        memcpy_fromio(p, io, B3_RAM_ADDR);
 
-       memcpy_fromio(p + B3_RI_WTO_R1, io + B3_RI_WTO_R1,
-                     regs->len - B3_RI_WTO_R1);
+       if (regs->len > B3_RI_WTO_R1) {
+               memcpy_fromio(p + B3_RI_WTO_R1, io + B3_RI_WTO_R1,
+                             regs->len - B3_RI_WTO_R1);
+       }
 }
 
 /* Wake on Lan only supported on Yukon chips with rev 1 or above */
index 9a0881c..6c01314 100644 (file)
@@ -617,6 +617,8 @@ static int get_fixed_ipv6_csum(__wsum hw_checksum, struct sk_buff *skb,
 }
 #endif
 
+#define short_frame(size) ((size) <= ETH_ZLEN + ETH_FCS_LEN)
+
 /* We reach this function only after checking that any of
  * the (IPv4 | IPv6) bits are set in cqe->status.
  */
@@ -624,9 +626,20 @@ static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va,
                      netdev_features_t dev_features)
 {
        __wsum hw_checksum = 0;
+       void *hdr;
+
+       /* CQE csum doesn't cover padding octets in short ethernet
+        * frames. And the pad field is appended prior to calculating
+        * and appending the FCS field.
+        *
+        * Detecting these padded frames requires to verify and parse
+        * IP headers, so we simply force all those small frames to skip
+        * checksum complete.
+        */
+       if (short_frame(skb->len))
+               return -EINVAL;
 
-       void *hdr = (u8 *)va + sizeof(struct ethhdr);
-
+       hdr = (u8 *)va + sizeof(struct ethhdr);
        hw_checksum = csum_unfold((__force __sum16)cqe->checksum);
 
        if (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_CVLAN_PRESENT_MASK) &&
@@ -819,6 +832,11 @@ xdp_drop_no_cnt:
                skb_record_rx_queue(skb, cq_ring);
 
                if (likely(dev->features & NETIF_F_RXCSUM)) {
+                       /* TODO: For IP non TCP/UDP packets when csum complete is
+                        * not an option (not supported or any other reason) we can
+                        * actually check cqe IPOK status bit and report
+                        * CHECKSUM_UNNECESSARY rather than CHECKSUM_NONE
+                        */
                        if ((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_TCP |
                                                       MLX4_CQE_STATUS_UDP)) &&
                            (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) &&
index bdb8dd1..1f6e16d 100644 (file)
@@ -3981,6 +3981,7 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
        if (ret)
                goto err_params_unregister;
 
+       devlink_params_publish(devlink);
        pci_save_state(pdev);
        return 0;
 
index 6bb2a86..82d636b 100644 (file)
@@ -13,7 +13,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o
 #
 mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
                health.o mcg.o cq.o alloc.o qp.o port.o mr.o pd.o \
-               mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
+               transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
                fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \
                lib/devcom.o diag/fs_tracepoint.o diag/fw_tracer.o
 
@@ -35,7 +35,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH)     += en_rep.o en_tc.o en/tc_tun.o
 #
 # Core extra
 #
-mlx5_core-$(CONFIG_MLX5_ESWITCH)   += eswitch.o eswitch_offloads.o
+mlx5_core-$(CONFIG_MLX5_ESWITCH)   += eswitch.o eswitch_offloads.o ecpf.o
 mlx5_core-$(CONFIG_MLX5_MPFS)      += lib/mpfs.o
 mlx5_core-$(CONFIG_VXLAN)          += lib/vxlan.o
 mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o
index 3e0fa8a..be48c64 100644 (file)
@@ -316,6 +316,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
        case MLX5_CMD_OP_DESTROY_GENERAL_OBJECT:
        case MLX5_CMD_OP_DEALLOC_MEMIC:
        case MLX5_CMD_OP_PAGE_FAULT_RESUME:
+       case MLX5_CMD_OP_QUERY_HOST_PARAMS:
                return MLX5_CMD_STAT_OK;
 
        case MLX5_CMD_OP_QUERY_HCA_CAP:
@@ -627,6 +628,7 @@ const char *mlx5_command_str(int command)
        MLX5_COMMAND_STR_CASE(QUERY_MODIFY_HEADER_CONTEXT);
        MLX5_COMMAND_STR_CASE(ALLOC_MEMIC);
        MLX5_COMMAND_STR_CASE(DEALLOC_MEMIC);
+       MLX5_COMMAND_STR_CASE(QUERY_HOST_PARAMS);
        default: return "unknown command opcode";
        }
 }
@@ -1583,6 +1585,24 @@ no_trig:
        spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
 }
 
+void mlx5_cmd_flush(struct mlx5_core_dev *dev)
+{
+       struct mlx5_cmd *cmd = &dev->cmd;
+       int i;
+
+       for (i = 0; i < cmd->max_reg_cmds; i++)
+               while (down_trylock(&cmd->sem))
+                       mlx5_cmd_trigger_completions(dev);
+
+       while (down_trylock(&cmd->pages_sem))
+               mlx5_cmd_trigger_completions(dev);
+
+       /* Unlock cmdif */
+       up(&cmd->pages_sem);
+       for (i = 0; i < cmd->max_reg_cmds; i++)
+               up(&cmd->sem);
+}
+
 static int status_to_err(u8 status)
 {
        return status ? -1 : 0; /* TBD more meaningful codes */
@@ -1711,12 +1731,57 @@ int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
 }
 EXPORT_SYMBOL(mlx5_cmd_exec);
 
-int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size,
-                    void *out, int out_size, mlx5_cmd_cbk_t callback,
-                    void *context)
+void mlx5_cmd_init_async_ctx(struct mlx5_core_dev *dev,
+                            struct mlx5_async_ctx *ctx)
+{
+       ctx->dev = dev;
+       /* Starts at 1 to avoid doing wake_up if we are not cleaning up */
+       atomic_set(&ctx->num_inflight, 1);
+       init_waitqueue_head(&ctx->wait);
+}
+EXPORT_SYMBOL(mlx5_cmd_init_async_ctx);
+
+/**
+ * mlx5_cmd_cleanup_async_ctx - Clean up an async_ctx
+ * @ctx: The ctx to clean
+ *
+ * Upon return all callbacks given to mlx5_cmd_exec_cb() have been called. The
+ * caller must ensure that mlx5_cmd_exec_cb() is not called during or after
+ * the call mlx5_cleanup_async_ctx().
+ */
+void mlx5_cmd_cleanup_async_ctx(struct mlx5_async_ctx *ctx)
+{
+       atomic_dec(&ctx->num_inflight);
+       wait_event(ctx->wait, atomic_read(&ctx->num_inflight) == 0);
+}
+EXPORT_SYMBOL(mlx5_cmd_cleanup_async_ctx);
+
+static void mlx5_cmd_exec_cb_handler(int status, void *_work)
+{
+       struct mlx5_async_work *work = _work;
+       struct mlx5_async_ctx *ctx = work->ctx;
+
+       work->user_callback(status, work);
+       if (atomic_dec_and_test(&ctx->num_inflight))
+               wake_up(&ctx->wait);
+}
+
+int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size,
+                    void *out, int out_size, mlx5_async_cbk_t callback,
+                    struct mlx5_async_work *work)
 {
-       return cmd_exec(dev, in, in_size, out, out_size, callback, context,
-                       false);
+       int ret;
+
+       work->ctx = ctx;
+       work->user_callback = callback;
+       if (WARN_ON(!atomic_inc_not_zero(&ctx->num_inflight)))
+               return -EIO;
+       ret = cmd_exec(ctx->dev, in, in_size, out, out_size,
+                      mlx5_cmd_exec_cb_handler, work, false);
+       if (ret && atomic_dec_and_test(&ctx->num_inflight))
+               wake_up(&ctx->wait);
+
+       return ret;
 }
 EXPORT_SYMBOL(mlx5_cmd_exec_cb);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
new file mode 100644 (file)
index 0000000..4746f2d
--- /dev/null
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "ecpf.h"
+
+bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev)
+{
+       return (ioread32be(&dev->iseg->initializing) >> MLX5_ECPU_BIT_NUM) & 1;
+}
+
+static int mlx5_peer_pf_enable_hca(struct mlx5_core_dev *dev)
+{
+       u32 out[MLX5_ST_SZ_DW(enable_hca_out)] = {};
+       u32 in[MLX5_ST_SZ_DW(enable_hca_in)]   = {};
+
+       MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA);
+       MLX5_SET(enable_hca_in, in, function_id, 0);
+       MLX5_SET(enable_hca_in, in, embedded_cpu_function, 0);
+       return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+}
+
+static int mlx5_peer_pf_disable_hca(struct mlx5_core_dev *dev)
+{
+       u32 out[MLX5_ST_SZ_DW(disable_hca_out)] = {};
+       u32 in[MLX5_ST_SZ_DW(disable_hca_in)]   = {};
+
+       MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA);
+       MLX5_SET(disable_hca_in, in, function_id, 0);
+       MLX5_SET(enable_hca_in, in, embedded_cpu_function, 0);
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+static int mlx5_peer_pf_init(struct mlx5_core_dev *dev)
+{
+       int err;
+
+       err = mlx5_peer_pf_enable_hca(dev);
+       if (err)
+               mlx5_core_err(dev, "Failed to enable peer PF HCA err(%d)\n",
+                             err);
+
+       return err;
+}
+
+static void mlx5_peer_pf_cleanup(struct mlx5_core_dev *dev)
+{
+       int err;
+
+       err = mlx5_peer_pf_disable_hca(dev);
+       if (err) {
+               mlx5_core_err(dev, "Failed to disable peer PF HCA err(%d)\n",
+                             err);
+               return;
+       }
+
+       err = mlx5_wait_for_pages(dev, &dev->priv.peer_pf_pages);
+       if (err)
+               mlx5_core_warn(dev, "Timeout reclaiming peer PF pages err(%d)\n",
+                              err);
+}
+
+int mlx5_ec_init(struct mlx5_core_dev *dev)
+{
+       int err = 0;
+
+       if (!mlx5_core_is_ecpf(dev))
+               return 0;
+
+       /* ECPF shall enable HCA for peer PF in the same way a PF
+        * does this for its VFs.
+        */
+       err = mlx5_peer_pf_init(dev);
+       if (err)
+               return err;
+
+       return 0;
+}
+
+void mlx5_ec_cleanup(struct mlx5_core_dev *dev)
+{
+       if (!mlx5_core_is_ecpf(dev))
+               return;
+
+       mlx5_peer_pf_cleanup(dev);
+}
+
+static int mlx5_query_host_params_context(struct mlx5_core_dev *dev,
+                                         u32 *out, int outlen)
+{
+       u32 in[MLX5_ST_SZ_DW(query_host_params_in)] = {};
+
+       MLX5_SET(query_host_params_in, in, opcode,
+                MLX5_CMD_OP_QUERY_HOST_PARAMS);
+
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+}
+
+int mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf)
+{
+       u32 out[MLX5_ST_SZ_DW(query_host_params_out)] = {};
+       int err;
+
+       err = mlx5_query_host_params_context(dev, out, sizeof(out));
+       if (err)
+               return err;
+
+       *num_vf = MLX5_GET(query_host_params_out, out,
+                          host_params_context.host_num_of_vfs);
+       mlx5_core_dbg(dev, "host_num_of_vfs %d\n", *num_vf);
+
+       return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h
new file mode 100644 (file)
index 0000000..346372d
--- /dev/null
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_ECPF_H__
+#define __MLX5_ECPF_H__
+
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+
+#ifdef CONFIG_MLX5_ESWITCH
+
+enum {
+       MLX5_ECPU_BIT_NUM = 23,
+};
+
+bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev);
+int mlx5_ec_init(struct mlx5_core_dev *dev);
+void mlx5_ec_cleanup(struct mlx5_core_dev *dev);
+int mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf);
+
+#else  /* CONFIG_MLX5_ESWITCH */
+
+static inline bool
+mlx5_read_embedded_cpu(struct mlx5_core_dev *dev) { return false; }
+static inline int mlx5_ec_init(struct mlx5_core_dev *dev) { return 0; }
+static inline void mlx5_ec_cleanup(struct mlx5_core_dev *dev) {}
+static inline int
+mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf)
+{ return -EOPNOTSUPP; }
+
+#endif /* CONFIG_MLX5_ESWITCH */
+
+#endif /* __MLX5_ECPF_H__ */
index 66e510b..e9acfa9 100644 (file)
@@ -655,6 +655,7 @@ struct mlx5e_channel_stats {
 enum {
        MLX5E_STATE_OPENED,
        MLX5E_STATE_DESTROYING,
+       MLX5E_STATE_XDP_TX_ENABLED,
 };
 
 struct mlx5e_rqt {
index 4a37713..122927f 100644 (file)
@@ -63,66 +63,168 @@ static const u32 mlx5e_link_speed[MLX5E_LINK_MODES_NUMBER] = {
        [MLX5E_50GBASE_KR2]       = 50000,
 };
 
-u32 mlx5e_port_ptys2speed(u32 eth_proto_oper)
+static const u32 mlx5e_ext_link_speed[MLX5E_EXT_LINK_MODES_NUMBER] = {
+       [MLX5E_SGMII_100M]                      = 100,
+       [MLX5E_1000BASE_X_SGMII]                = 1000,
+       [MLX5E_5GBASE_R]                        = 5000,
+       [MLX5E_10GBASE_XFI_XAUI_1]              = 10000,
+       [MLX5E_40GBASE_XLAUI_4_XLPPI_4]         = 40000,
+       [MLX5E_25GAUI_1_25GBASE_CR_KR]          = 25000,
+       [MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2] = 50000,
+       [MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR]   = 50000,
+       [MLX5E_CAUI_4_100GBASE_CR4_KR4]         = 100000,
+       [MLX5E_200GAUI_4_200GBASE_CR4_KR4]      = 200000,
+       [MLX5E_400GAUI_8]                       = 400000,
+};
+
+static void mlx5e_port_get_speed_arr(struct mlx5_core_dev *mdev,
+                                    const u32 **arr, u32 *size)
+{
+       bool ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet);
+
+       *size = ext ? ARRAY_SIZE(mlx5e_ext_link_speed) :
+                     ARRAY_SIZE(mlx5e_link_speed);
+       *arr  = ext ? mlx5e_ext_link_speed : mlx5e_link_speed;
+}
+
+int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext,
+                             struct mlx5e_port_eth_proto *eproto)
+{
+       u32 out[MLX5_ST_SZ_DW(ptys_reg)];
+       int err;
+
+       if (!eproto)
+               return -EINVAL;
+
+       if (ext !=  MLX5_CAP_PCAM_FEATURE(dev, ptys_extended_ethernet))
+               return -EOPNOTSUPP;
+
+       err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, port);
+       if (err)
+               return err;
+
+       eproto->cap   = MLX5_GET_ETH_PROTO(ptys_reg, out, ext,
+                                          eth_proto_capability);
+       eproto->admin = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_admin);
+       eproto->oper  = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_oper);
+       return 0;
+}
+
+void mlx5_port_query_eth_autoneg(struct mlx5_core_dev *dev, u8 *an_status,
+                                u8 *an_disable_cap, u8 *an_disable_admin)
+{
+       u32 out[MLX5_ST_SZ_DW(ptys_reg)];
+
+       *an_status = 0;
+       *an_disable_cap = 0;
+       *an_disable_admin = 0;
+
+       if (mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, 1))
+               return;
+
+       *an_status = MLX5_GET(ptys_reg, out, an_status);
+       *an_disable_cap = MLX5_GET(ptys_reg, out, an_disable_cap);
+       *an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin);
+}
+
+int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable,
+                          u32 proto_admin, bool ext)
+{
+       u32 out[MLX5_ST_SZ_DW(ptys_reg)];
+       u32 in[MLX5_ST_SZ_DW(ptys_reg)];
+       u8 an_disable_admin;
+       u8 an_disable_cap;
+       u8 an_status;
+
+       mlx5_port_query_eth_autoneg(dev, &an_status, &an_disable_cap,
+                                   &an_disable_admin);
+       if (!an_disable_cap && an_disable)
+               return -EPERM;
+
+       memset(in, 0, sizeof(in));
+
+       MLX5_SET(ptys_reg, in, local_port, 1);
+       MLX5_SET(ptys_reg, in, an_disable_admin, an_disable);
+       MLX5_SET(ptys_reg, in, proto_mask, MLX5_PTYS_EN);
+       if (ext)
+               MLX5_SET(ptys_reg, in, ext_eth_proto_admin, proto_admin);
+       else
+               MLX5_SET(ptys_reg, in, eth_proto_admin, proto_admin);
+
+       return mlx5_core_access_reg(dev, in, sizeof(in), out,
+                           sizeof(out), MLX5_REG_PTYS, 0, 1);
+}
+
+u32 mlx5e_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper)
 {
        unsigned long temp = eth_proto_oper;
+       const u32 *table;
        u32 speed = 0;
+       u32 max_size;
        int i;
 
-       i = find_first_bit(&temp, MLX5E_LINK_MODES_NUMBER);
-       if (i < MLX5E_LINK_MODES_NUMBER)
-               speed = mlx5e_link_speed[i];
-
+       mlx5e_port_get_speed_arr(mdev, &table, &max_size);
+       i = find_first_bit(&temp, max_size);
+       if (i < max_size)
+               speed = table[i];
        return speed;
 }
 
 int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed)
 {
-       u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {};
-       u32 eth_proto_oper;
+       struct mlx5e_port_eth_proto eproto;
+       bool ext;
        int err;
 
-       err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1);
+       ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet);
+       err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto);
        if (err)
-               return err;
+               goto out;
 
-       eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
-       *speed = mlx5e_port_ptys2speed(eth_proto_oper);
+       *speed = mlx5e_port_ptys2speed(mdev, eproto.oper);
        if (!(*speed))
                err = -EINVAL;
 
+out:
        return err;
 }
 
 int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed)
 {
+       struct mlx5e_port_eth_proto eproto;
        u32 max_speed = 0;
-       u32 proto_cap;
+       const u32 *table;
+       u32 max_size;
+       bool ext;
        int err;
        int i;
 
-       err = mlx5_query_port_proto_cap(mdev, &proto_cap, MLX5_PTYS_EN);
+       ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet);
+       err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto);
        if (err)
                return err;
 
-       for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i)
-               if (proto_cap & MLX5E_PROT_MASK(i))
-                       max_speed = max(max_speed, mlx5e_link_speed[i]);
+       mlx5e_port_get_speed_arr(mdev, &table, &max_size);
+       for (i = 0; i < max_size; ++i)
+               if (eproto.cap & MLX5E_PROT_MASK(i))
+                       max_speed = max(max_speed, table[i]);
 
        *speed = max_speed;
        return 0;
 }
 
-u32 mlx5e_port_speed2linkmodes(u32 speed)
+u32 mlx5e_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed)
 {
        u32 link_modes = 0;
+       const u32 *table;
+       u32 max_size;
        int i;
 
-       for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
-               if (mlx5e_link_speed[i] == speed)
+       mlx5e_port_get_speed_arr(mdev, &table, &max_size);
+       for (i = 0; i < max_size; ++i) {
+               if (table[i] == speed)
                        link_modes |= MLX5E_PROT_MASK(i);
        }
-
        return link_modes;
 }
 
index cd2160b..70f536e 100644 (file)
 #include <linux/mlx5/driver.h>
 #include "en.h"
 
-u32 mlx5e_port_ptys2speed(u32 eth_proto_oper);
+struct mlx5e_port_eth_proto {
+       u32 cap;
+       u32 admin;
+       u32 oper;
+};
+
+int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext,
+                             struct mlx5e_port_eth_proto *eproto);
+void mlx5_port_query_eth_autoneg(struct mlx5_core_dev *dev, u8 *an_status,
+                                u8 *an_disable_cap, u8 *an_disable_admin);
+int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable,
+                          u32 proto_admin, bool ext);
+u32 mlx5e_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper);
 int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);
 int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);
-u32 mlx5e_port_speed2linkmodes(u32 speed);
+u32 mlx5e_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed);
 
 int mlx5e_port_query_pbmc(struct mlx5_core_dev *mdev, void *out);
 int mlx5e_port_set_pbmc(struct mlx5_core_dev *mdev, void *in);
index 9e71f4d..bdcc5e7 100644 (file)
@@ -256,6 +256,7 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
        e->m_neigh.family = n->ops->family;
        memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
        e->out_dev = out_dev;
+       e->route_dev = route_dev;
 
        /* It's important to add the neigh to the hash table before checking
         * the neigh validity state. So if we'll get a notification, in case the
@@ -369,6 +370,7 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
        e->m_neigh.family = n->ops->family;
        memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
        e->out_dev = out_dev;
+       e->route_dev = route_dev;
 
        /* It's importent to add the neigh to the hash table before checking
         * the neigh validity state. So if we'll get a notification, in case the
@@ -602,16 +604,18 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev,
                       struct mlx5_flow_spec *spec,
                       struct tc_cls_flower_offload *f,
                       void *headers_c,
-                      void *headers_v)
+                      void *headers_v, u8 *match_level)
 {
        int tunnel_type;
        int err = 0;
 
        tunnel_type = mlx5e_tc_tun_get_type(filter_dev);
        if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) {
+               *match_level = MLX5_MATCH_L4;
                err = mlx5e_tc_tun_parse_vxlan(priv, spec, f,
                                               headers_c, headers_v);
        } else if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) {
+               *match_level = MLX5_MATCH_L3;
                err = mlx5e_tc_tun_parse_gretap(priv, spec, f,
                                                headers_c, headers_v);
        } else {
index 706ce7b..b63f15d 100644 (file)
@@ -39,6 +39,6 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev,
                       struct mlx5_flow_spec *spec,
                       struct tc_cls_flower_offload *f,
                       void *headers_c,
-                      void *headers_v);
+                      void *headers_v, u8 *match_level);
 
 #endif //__MLX5_EN_TC_TUNNEL_H__
index 3740177..03b2a9f 100644 (file)
@@ -365,7 +365,8 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
        int sq_num;
        int i;
 
-       if (unlikely(!test_bit(MLX5E_STATE_OPENED, &priv->state)))
+       /* this flag is sufficient, no need to test internal sq state */
+       if (unlikely(!mlx5e_xdp_tx_is_enabled(priv)))
                return -ENETDOWN;
 
        if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
@@ -378,9 +379,6 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
 
        sq = &priv->channels.c[sq_num]->xdpsq;
 
-       if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
-               return -ENETDOWN;
-
        for (i = 0; i < n; i++) {
                struct xdp_frame *xdpf = frames[i];
                struct mlx5e_xdp_info xdpi;
index 3a67cb3..ee27a7c 100644 (file)
@@ -50,6 +50,23 @@ void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq);
 int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
                   u32 flags);
 
+static inline void mlx5e_xdp_tx_enable(struct mlx5e_priv *priv)
+{
+       set_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state);
+}
+
+static inline void mlx5e_xdp_tx_disable(struct mlx5e_priv *priv)
+{
+       clear_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state);
+       /* let other device's napi(s) see our new state */
+       synchronize_rcu();
+}
+
+static inline bool mlx5e_xdp_tx_is_enabled(struct mlx5e_priv *priv)
+{
+       return test_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state);
+}
+
 static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq)
 {
        if (sq->doorbell_cseg) {
index 3bbccea..3cd7325 100644 (file)
@@ -354,9 +354,6 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
 
        new_channels.params = priv->channels.params;
        new_channels.params.num_channels = count;
-       if (!netif_is_rxfh_configured(priv->netdev))
-               mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt,
-                                             MLX5E_INDIR_RQT_SIZE, count);
 
        if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
                priv->channels.params = new_channels.params;
@@ -372,6 +369,10 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
        if (arfs_enabled)
                mlx5e_arfs_disable(priv);
 
+       if (!netif_is_rxfh_configured(priv->netdev))
+               mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt,
+                                             MLX5E_INDIR_RQT_SIZE, count);
+
        /* Switch to new channels, set new parameters and close old ones */
        mlx5e_switch_priv_channels(priv, &new_channels, NULL);
 
@@ -695,13 +696,14 @@ static void get_speed_duplex(struct net_device *netdev,
                             u32 eth_proto_oper,
                             struct ethtool_link_ksettings *link_ksettings)
 {
+       struct mlx5e_priv *priv = netdev_priv(netdev);
        u32 speed = SPEED_UNKNOWN;
        u8 duplex = DUPLEX_UNKNOWN;
 
        if (!netif_carrier_ok(netdev))
                goto out;
 
-       speed = mlx5e_port_ptys2speed(eth_proto_oper);
+       speed = mlx5e_port_ptys2speed(priv->mdev, eth_proto_oper);
        if (!speed) {
                speed = SPEED_UNKNOWN;
                goto out;
@@ -885,7 +887,7 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv,
                                     const struct ethtool_link_ksettings *link_ksettings)
 {
        struct mlx5_core_dev *mdev = priv->mdev;
-       u32 eth_proto_cap, eth_proto_admin;
+       struct mlx5e_port_eth_proto eproto;
        bool an_changes = false;
        u8 an_disable_admin;
        u8 an_disable_cap;
@@ -899,16 +901,16 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv,
 
        link_modes = link_ksettings->base.autoneg == AUTONEG_ENABLE ?
                mlx5e_ethtool2ptys_adver_link(link_ksettings->link_modes.advertising) :
-               mlx5e_port_speed2linkmodes(speed);
+               mlx5e_port_speed2linkmodes(mdev, speed);
 
-       err = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
+       err = mlx5_port_query_eth_proto(mdev, 1, false, &eproto);
        if (err) {
-               netdev_err(priv->netdev, "%s: query port eth proto cap failed: %d\n",
+               netdev_err(priv->netdev, "%s: query port eth proto failed: %d\n",
                           __func__, err);
                goto out;
        }
 
-       link_modes = link_modes & eth_proto_cap;
+       link_modes = link_modes & eproto.cap;
        if (!link_modes) {
                netdev_err(priv->netdev, "%s: Not supported link mode(s) requested",
                           __func__);
@@ -916,24 +918,17 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv,
                goto out;
        }
 
-       err = mlx5_query_port_proto_admin(mdev, &eth_proto_admin, MLX5_PTYS_EN);
-       if (err) {
-               netdev_err(priv->netdev, "%s: query port eth proto admin failed: %d\n",
-                          __func__, err);
-               goto out;
-       }
-
-       mlx5_query_port_autoneg(mdev, MLX5_PTYS_EN, &an_status,
-                               &an_disable_cap, &an_disable_admin);
+       mlx5_port_query_eth_autoneg(mdev, &an_status, &an_disable_cap,
+                                   &an_disable_admin);
 
        an_disable = link_ksettings->base.autoneg == AUTONEG_DISABLE;
        an_changes = ((!an_disable && an_disable_admin) ||
                      (an_disable && !an_disable_admin));
 
-       if (!an_changes && link_modes == eth_proto_admin)
+       if (!an_changes && link_modes == eproto.admin)
                goto out;
 
-       mlx5_set_port_ptys(mdev, an_disable, link_modes, MLX5_PTYS_EN);
+       mlx5_port_set_eth_ptys(mdev, an_disable, link_modes, false);
        mlx5_toggle_port_link(mdev);
 
 out:
index d81ebba..83510ca 100644 (file)
@@ -2859,6 +2859,7 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
 
        mlx5e_build_tx2sq_maps(priv);
        mlx5e_activate_channels(&priv->channels);
+       mlx5e_xdp_tx_enable(priv);
        netif_tx_start_all_queues(priv->netdev);
 
        if (mlx5e_is_vport_rep(priv))
@@ -2880,6 +2881,7 @@ void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv)
         */
        netif_tx_stop_all_queues(priv->netdev);
        netif_tx_disable(priv->netdev);
+       mlx5e_xdp_tx_disable(priv);
        mlx5e_deactivate_channels(&priv->channels);
 }
 
index 0b1988b..287d48e 100644 (file)
@@ -153,7 +153,7 @@ static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv)
        struct mlx5e_rep_priv *rpriv = priv->ppriv;
        struct mlx5_eswitch_rep *rep = rpriv->rep;
 
-       if (rep->vport == FDB_UPLINK_VPORT)
+       if (rep->vport == MLX5_VPORT_UPLINK)
                mlx5e_uplink_rep_update_hw_counters(priv);
        else
                mlx5e_vf_rep_update_hw_counters(priv);
@@ -579,6 +579,10 @@ static void mlx5e_rep_update_flows(struct mlx5e_priv *priv,
        if (neigh_connected && !(e->flags & MLX5_ENCAP_ENTRY_VALID)) {
                ether_addr_copy(e->h_dest, ha);
                ether_addr_copy(eth->h_dest, ha);
+               /* Update the encap source mac, in case that we delete
+                * the flows when encap source mac changed.
+                */
+               ether_addr_copy(eth->h_source, e->route_dev->dev_addr);
 
                mlx5e_tc_encap_flows_add(priv, e);
        }
@@ -1079,7 +1083,8 @@ static int mlx5e_vf_rep_open(struct net_device *dev)
 
        if (!mlx5_modify_vport_admin_state(priv->mdev,
                                           MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
-                                          rep->vport, MLX5_VPORT_ADMIN_STATE_UP))
+                                          rep->vport, 1,
+                                          MLX5_VPORT_ADMIN_STATE_UP))
                netif_carrier_on(dev);
 
 unlock:
@@ -1097,7 +1102,8 @@ static int mlx5e_vf_rep_close(struct net_device *dev)
        mutex_lock(&priv->state_lock);
        mlx5_modify_vport_admin_state(priv->mdev,
                                      MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
-                                     rep->vport, MLX5_VPORT_ADMIN_STATE_DOWN);
+                                     rep->vport, 1,
+                                     MLX5_VPORT_ADMIN_STATE_DOWN);
        ret = mlx5e_close_locked(dev);
        mutex_unlock(&priv->state_lock);
        return ret;
@@ -1115,7 +1121,7 @@ static int mlx5e_rep_get_phys_port_name(struct net_device *dev,
        if (ret)
                return ret;
 
-       if (rep->vport == FDB_UPLINK_VPORT)
+       if (rep->vport == MLX5_VPORT_UPLINK)
                ret = snprintf(buf, len, "p%d", pf_num);
        else
                ret = snprintf(buf, len, "pf%dvf%d", pf_num, rep->vport - 1);
@@ -1202,7 +1208,7 @@ bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv)
                return false;
 
        rep = rpriv->rep;
-       return (rep->vport == FDB_UPLINK_VPORT);
+       return (rep->vport == MLX5_VPORT_UPLINK);
 }
 
 static bool mlx5e_rep_has_offload_stats(const struct net_device *dev, int attr_id)
@@ -1340,7 +1346,7 @@ static void mlx5e_build_rep_params(struct net_device *netdev)
        params->sw_mtu      = netdev->mtu;
 
        /* SQ */
-       if (rep->vport == FDB_UPLINK_VPORT)
+       if (rep->vport == MLX5_VPORT_UPLINK)
                params->log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
        else
                params->log_sq_size = MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE;
@@ -1367,7 +1373,7 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev)
        struct mlx5_eswitch_rep *rep = rpriv->rep;
        struct mlx5_core_dev *mdev = priv->mdev;
 
-       if (rep->vport == FDB_UPLINK_VPORT) {
+       if (rep->vport == MLX5_VPORT_UPLINK) {
                SET_NETDEV_DEV(netdev, &priv->mdev->pdev->dev);
                netdev->netdev_ops = &mlx5e_netdev_ops_uplink_rep;
                /* we want a persistent mac for the uplink rep */
@@ -1397,7 +1403,7 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev)
        netdev->hw_features    |= NETIF_F_TSO6;
        netdev->hw_features    |= NETIF_F_RXCSUM;
 
-       if (rep->vport != FDB_UPLINK_VPORT)
+       if (rep->vport != MLX5_VPORT_UPLINK)
                netdev->features |= NETIF_F_VLAN_CHALLENGED;
 
        netdev->features |= netdev->hw_features;
@@ -1550,7 +1556,7 @@ static int mlx5e_init_rep_tx(struct mlx5e_priv *priv)
                return err;
        }
 
-       if (rpriv->rep->vport == FDB_UPLINK_VPORT) {
+       if (rpriv->rep->vport == MLX5_VPORT_UPLINK) {
                uplink_priv = &rpriv->uplink_priv;
 
                /* init shared tc flow table */
@@ -1586,7 +1592,7 @@ static void mlx5e_cleanup_rep_tx(struct mlx5e_priv *priv)
        for (tc = 0; tc < priv->profile->max_tc; tc++)
                mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]);
 
-       if (rpriv->rep->vport == FDB_UPLINK_VPORT) {
+       if (rpriv->rep->vport == MLX5_VPORT_UPLINK) {
                /* clean indirect TC block notifications */
                unregister_netdevice_notifier(&rpriv->uplink_priv.netdevice_nb);
                mlx5e_rep_indr_clean_block_privs(rpriv);
@@ -1705,7 +1711,7 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
        rpriv->rep = rep;
 
        nch = mlx5e_get_max_num_channels(dev);
-       profile = (rep->vport == FDB_UPLINK_VPORT) ? &mlx5e_uplink_rep_profile : &mlx5e_vf_rep_profile;
+       profile = (rep->vport == MLX5_VPORT_UPLINK) ? &mlx5e_uplink_rep_profile : &mlx5e_vf_rep_profile;
        netdev = mlx5e_create_netdev(dev, profile, nch, rpriv);
        if (!netdev) {
                pr_warn("Failed to create representor netdev for vport %d\n",
@@ -1718,7 +1724,7 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
        rep->rep_if[REP_ETH].priv = rpriv;
        INIT_LIST_HEAD(&rpriv->vport_sqs_list);
 
-       if (rep->vport == FDB_UPLINK_VPORT) {
+       if (rep->vport == MLX5_VPORT_UPLINK) {
                err = mlx5e_create_mdev_resources(dev);
                if (err)
                        goto err_destroy_netdev;
@@ -1754,7 +1760,7 @@ err_detach_netdev:
        mlx5e_detach_netdev(netdev_priv(netdev));
 
 err_destroy_mdev_resources:
-       if (rep->vport == FDB_UPLINK_VPORT)
+       if (rep->vport == MLX5_VPORT_UPLINK)
                mlx5e_destroy_mdev_resources(dev);
 
 err_destroy_netdev:
@@ -1774,7 +1780,7 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep)
        unregister_netdev(netdev);
        mlx5e_rep_neigh_cleanup(rpriv);
        mlx5e_detach_netdev(priv);
-       if (rep->vport == FDB_UPLINK_VPORT)
+       if (rep->vport == MLX5_VPORT_UPLINK)
                mlx5e_destroy_mdev_resources(priv->mdev);
        mlx5e_destroy_netdev(priv);
        kfree(ppriv); /* mlx5e_rep_priv */
@@ -1792,25 +1798,18 @@ static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep)
 void mlx5e_rep_register_vport_reps(struct mlx5_core_dev *mdev)
 {
        struct mlx5_eswitch *esw = mdev->priv.eswitch;
-       int total_vfs = MLX5_TOTAL_VPORTS(mdev);
-       int vport;
+       struct mlx5_eswitch_rep_if rep_if = {};
 
-       for (vport = 0; vport < total_vfs; vport++) {
-               struct mlx5_eswitch_rep_if rep_if = {};
+       rep_if.load = mlx5e_vport_rep_load;
+       rep_if.unload = mlx5e_vport_rep_unload;
+       rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev;
 
-               rep_if.load = mlx5e_vport_rep_load;
-               rep_if.unload = mlx5e_vport_rep_unload;
-               rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev;
-               mlx5_eswitch_register_vport_rep(esw, vport, &rep_if, REP_ETH);
-       }
+       mlx5_eswitch_register_vport_reps(esw, &rep_if, REP_ETH);
 }
 
 void mlx5e_rep_unregister_vport_reps(struct mlx5_core_dev *mdev)
 {
        struct mlx5_eswitch *esw = mdev->priv.eswitch;
-       int total_vfs = MLX5_TOTAL_VPORTS(mdev);
-       int vport;
 
-       for (vport = total_vfs - 1; vport >= 0; vport--)
-               mlx5_eswitch_unregister_vport_rep(esw, vport, REP_ETH);
+       mlx5_eswitch_unregister_vport_reps(esw, REP_ETH);
 }
index edd7228..36eafc8 100644 (file)
@@ -148,6 +148,7 @@ struct mlx5e_encap_entry {
        unsigned char h_dest[ETH_ALEN]; /* destination eth addr */
 
        struct net_device *out_dev;
+       struct net_device *route_dev;
        int tunnel_type;
        int tunnel_hlen;
        int reformat_type;
index 85c5dd7..05892fc 100644 (file)
@@ -127,6 +127,7 @@ struct mlx5e_tc_flow_parse_attr {
        struct net_device *filter_dev;
        struct mlx5_flow_spec spec;
        int num_mod_hdr_actions;
+       int max_mod_hdr_actions;
        void *mod_hdr_actions;
        int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS];
 };
@@ -1301,7 +1302,7 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
 static int parse_tunnel_attr(struct mlx5e_priv *priv,
                             struct mlx5_flow_spec *spec,
                             struct tc_cls_flower_offload *f,
-                            struct net_device *filter_dev)
+                            struct net_device *filter_dev, u8 *match_level)
 {
        struct netlink_ext_ack *extack = f->common.extack;
        void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
@@ -1313,7 +1314,7 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
        int err;
 
        err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f,
-                                headers_c, headers_v);
+                                headers_c, headers_v, match_level);
        if (err) {
                NL_SET_ERR_MSG_MOD(extack,
                                   "failed to parse tunnel attributes");
@@ -1413,7 +1414,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
                              struct mlx5_flow_spec *spec,
                              struct tc_cls_flower_offload *f,
                              struct net_device *filter_dev,
-                             u8 *match_level)
+                             u8 *match_level, u8 *tunnel_match_level)
 {
        struct netlink_ext_ack *extack = f->common.extack;
        void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
@@ -1464,7 +1465,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
                switch (match.key->addr_type) {
                case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
                case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
-                       if (parse_tunnel_attr(priv, spec, f, filter_dev))
+                       if (parse_tunnel_attr(priv, spec, f, filter_dev, tunnel_match_level))
                                return -EOPNOTSUPP;
                        break;
                default:
@@ -1767,15 +1768,15 @@ static int parse_cls_flower(struct mlx5e_priv *priv,
        struct mlx5_core_dev *dev = priv->mdev;
        struct mlx5_eswitch *esw = dev->priv.eswitch;
        struct mlx5e_rep_priv *rpriv = priv->ppriv;
+       u8 match_level, tunnel_match_level = MLX5_MATCH_NONE;
        struct mlx5_eswitch_rep *rep;
-       u8 match_level;
        int err;
 
-       err = __parse_cls_flower(priv, spec, f, filter_dev, &match_level);
+       err = __parse_cls_flower(priv, spec, f, filter_dev, &match_level, &tunnel_match_level);
 
        if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH)) {
                rep = rpriv->rep;
-               if (rep->vport != FDB_UPLINK_VPORT &&
+               if (rep->vport != MLX5_VPORT_UPLINK &&
                    (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE &&
                    esw->offloads.inline_mode < match_level)) {
                        NL_SET_ERR_MSG_MOD(extack,
@@ -1787,10 +1788,12 @@ static int parse_cls_flower(struct mlx5e_priv *priv,
                }
        }
 
-       if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
+       if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
                flow->esw_attr->match_level = match_level;
-       else
+               flow->esw_attr->tunnel_match_level = tunnel_match_level;
+       } else {
                flow->nic_attr->match_level = match_level;
+       }
 
        return err;
 }
@@ -1880,9 +1883,9 @@ static struct mlx5_fields fields[] = {
        OFFLOAD(UDP_DPORT, 2, udp.dest,   0),
 };
 
-/* On input attr->num_mod_hdr_actions tells how many HW actions can be parsed at
- * max from the SW pedit action. On success, it says how many HW actions were
- * actually parsed.
+/* On input attr->max_mod_hdr_actions tells how many HW actions can be parsed at
+ * max from the SW pedit action. On success, attr->num_mod_hdr_actions
+ * says how many HW actions were actually parsed.
  */
 static int offload_pedit_fields(struct pedit_headers_action *hdrs,
                                struct mlx5e_tc_flow_parse_attr *parse_attr,
@@ -1905,9 +1908,11 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs,
        add_vals = &hdrs[1].vals;
 
        action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto);
-       action = parse_attr->mod_hdr_actions;
-       max_actions = parse_attr->num_mod_hdr_actions;
-       nactions = 0;
+       action = parse_attr->mod_hdr_actions +
+                parse_attr->num_mod_hdr_actions * action_size;
+
+       max_actions = parse_attr->max_mod_hdr_actions;
+       nactions = parse_attr->num_mod_hdr_actions;
 
        for (i = 0; i < ARRAY_SIZE(fields); i++) {
                f = &fields[i];
@@ -2020,7 +2025,7 @@ static int alloc_mod_hdr_actions(struct mlx5e_priv *priv,
        if (!parse_attr->mod_hdr_actions)
                return -ENOMEM;
 
-       parse_attr->num_mod_hdr_actions = max_actions;
+       parse_attr->max_mod_hdr_actions = max_actions;
        return 0;
 }
 
@@ -2069,9 +2074,11 @@ static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace,
        int err;
        u8 cmd;
 
-       err = alloc_mod_hdr_actions(priv, hdrs, namespace, parse_attr);
-       if (err)
-               goto out_err;
+       if (!parse_attr->mod_hdr_actions) {
+               err = alloc_mod_hdr_actions(priv, hdrs, namespace, parse_attr);
+               if (err)
+                       goto out_err;
+       }
 
        err = offload_pedit_fields(hdrs, parse_attr, extack);
        if (err < 0)
@@ -2129,6 +2136,7 @@ static bool csum_offload_supported(struct mlx5e_priv *priv,
 
 static bool modify_header_match_supported(struct mlx5_flow_spec *spec,
                                          struct flow_action *flow_action,
+                                         u32 actions,
                                          struct netlink_ext_ack *extack)
 {
        const struct flow_action_entry *act;
@@ -2138,7 +2146,11 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec,
        u16 ethertype;
        int i;
 
-       headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
+       if (actions & MLX5_FLOW_CONTEXT_ACTION_DECAP)
+               headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, inner_headers);
+       else
+               headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
+
        ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
 
        /* for non-IP we only re-write MACs, so we're okay */
@@ -2191,7 +2203,7 @@ static bool actions_match_supported(struct mlx5e_priv *priv,
 
        if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
                return modify_header_match_supported(&parse_attr->spec,
-                                                    flow_action,
+                                                    flow_action, actions,
                                                     extack);
 
        return true;
@@ -2681,7 +2693,7 @@ static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv, int flags)
 static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow)
 {
        struct mlx5_esw_flow_attr *attr = flow->esw_attr;
-       bool is_rep_ingress = attr->in_rep->vport != FDB_UPLINK_VPORT &&
+       bool is_rep_ingress = attr->in_rep->vport != MLX5_VPORT_UPLINK &&
                              flow->flags & MLX5E_TC_FLOW_INGRESS;
        bool act_is_encap = !!(attr->action &
                               MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT);
@@ -2804,7 +2816,7 @@ static int mlx5e_tc_add_fdb_peer_flow(struct tc_cls_flower_offload *f,
         * original flow and packets redirected from uplink use the
         * peer mdev.
         */
-       if (flow->esw_attr->in_rep->vport == FDB_UPLINK_VPORT)
+       if (flow->esw_attr->in_rep->vport == MLX5_VPORT_UPLINK)
                in_mdev = peer_priv->mdev;
        else
                in_mdev = priv->mdev;
index 1892112..c1334a8 100644 (file)
@@ -387,8 +387,14 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
        num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
        contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
        if (unlikely(contig_wqebbs_room < num_wqebbs)) {
+#ifdef CONFIG_MLX5_EN_IPSEC
+               struct mlx5_wqe_eth_seg cur_eth = wqe->eth;
+#endif
                mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
                mlx5e_sq_fetch_wqe(sq, &wqe, &pi);
+#ifdef CONFIG_MLX5_EN_IPSEC
+               wqe->eth = cur_eth;
+#endif
        }
 
        /* fill wqe */
index ee04aab..bb6e5b5 100644 (file)
@@ -34,6 +34,7 @@
 #include <linux/notifier.h>
 #include <linux/module.h>
 #include <linux/mlx5/driver.h>
+#include <linux/mlx5/vport.h>
 #include <linux/mlx5/eq.h>
 #include <linux/mlx5/cmd.h>
 #ifdef CONFIG_RFS_ACCEL
@@ -114,11 +115,11 @@ static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn)
        struct mlx5_cq_table *table = &eq->cq_table;
        struct mlx5_core_cq *cq = NULL;
 
-       spin_lock(&table->lock);
+       rcu_read_lock();
        cq = radix_tree_lookup(&table->tree, cqn);
        if (likely(cq))
                mlx5_cq_hold(cq);
-       spin_unlock(&table->lock);
+       rcu_read_unlock();
 
        return cq;
 }
@@ -371,9 +372,9 @@ int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
        struct mlx5_cq_table *table = &eq->cq_table;
        int err;
 
-       spin_lock_irq(&table->lock);
+       spin_lock(&table->lock);
        err = radix_tree_insert(&table->tree, cq->cqn, cq);
-       spin_unlock_irq(&table->lock);
+       spin_unlock(&table->lock);
 
        return err;
 }
@@ -383,9 +384,9 @@ int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
        struct mlx5_cq_table *table = &eq->cq_table;
        struct mlx5_core_cq *tmp;
 
-       spin_lock_irq(&table->lock);
+       spin_lock(&table->lock);
        tmp = radix_tree_delete(&table->tree, cq->cqn);
-       spin_unlock_irq(&table->lock);
+       spin_unlock(&table->lock);
 
        if (!tmp) {
                mlx5_core_warn(eq->dev, "cq 0x%x not found in eq 0x%x tree\n", eq->eqn, cq->cqn);
@@ -530,6 +531,9 @@ static u64 gather_async_events_mask(struct mlx5_core_dev *dev)
        if (MLX5_CAP_GEN(dev, max_num_of_monitor_counters))
                async_event_mask |= (1ull << MLX5_EVENT_TYPE_MONITOR_COUNTER);
 
+       if (mlx5_core_is_ecpf_esw_manager(dev))
+               async_event_mask |= (1ull << MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE);
+
        return async_event_mask;
 }
 
index 5b492b6..e18af31 100644 (file)
@@ -39,8 +39,7 @@
 #include "lib/eq.h"
 #include "eswitch.h"
 #include "fs_core.h"
-
-#define UPLINK_VPORT 0xFFFF
+#include "ecpf.h"
 
 enum {
        MLX5_ACTION_NONE = 0,
@@ -52,7 +51,7 @@ enum {
 struct vport_addr {
        struct l2addr_node     node;
        u8                     action;
-       u32                    vport;
+       u16                    vport;
        struct mlx5_flow_handle *flow_rule;
        bool mpfs; /* UC MAC was added to MPFs */
        /* A flag indicating that mac was added due to mc promiscuous vport */
@@ -70,6 +69,28 @@ enum {
                            MC_ADDR_CHANGE | \
                            PROMISC_CHANGE)
 
+/* The vport getter/iterator are only valid after esw->total_vports
+ * and vport->vport are initialized in mlx5_eswitch_init.
+ */
+#define mlx5_esw_for_all_vports(esw, i, vport)                 \
+       for ((i) = MLX5_VPORT_PF;                               \
+            (vport) = &(esw)->vports[i],                       \
+            (i) < (esw)->total_vports; (i)++)
+
+#define mlx5_esw_for_each_vf_vport(esw, i, vport, nvfs)        \
+       for ((i) = MLX5_VPORT_FIRST_VF;                         \
+            (vport) = &(esw)->vports[i],                       \
+            (i) <= (nvfs); (i)++)
+
+static struct mlx5_vport *mlx5_eswitch_get_vport(struct mlx5_eswitch *esw,
+                                                u16 vport_num)
+{
+       u16 idx = mlx5_eswitch_vport_num_to_index(esw, vport_num);
+
+       WARN_ON(vport_num > esw->total_vports - 1);
+       return &esw->vports[idx];
+}
+
 static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport,
                                        u32 events_mask)
 {
@@ -115,7 +136,7 @@ static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport,
        return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 }
 
-static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport,
+static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u16 vport,
                                  u16 vlan, u8 qos, u8 set_flags)
 {
        u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {0};
@@ -152,7 +173,7 @@ static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport,
 
 /* E-Switch FDB */
 static struct mlx5_flow_handle *
-__esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule,
+__esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u16 vport, bool rx_rule,
                         u8 mac_c[ETH_ALEN], u8 mac_v[ETH_ALEN])
 {
        int match_header = (is_zero_ether_addr(mac_c) ? 0 :
@@ -188,7 +209,7 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule,
                                        misc_parameters);
                mc_misc  = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
                                        misc_parameters);
-               MLX5_SET(fte_match_set_misc, mv_misc, source_port, UPLINK_VPORT);
+               MLX5_SET(fte_match_set_misc, mv_misc, source_port, MLX5_VPORT_UPLINK);
                MLX5_SET_TO_ONES(fte_match_set_misc, mc_misc, source_port);
        }
 
@@ -215,7 +236,7 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule,
 }
 
 static struct mlx5_flow_handle *
-esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport)
+esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u16 vport)
 {
        u8 mac_c[ETH_ALEN];
 
@@ -224,7 +245,7 @@ esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport)
 }
 
 static struct mlx5_flow_handle *
-esw_fdb_set_vport_allmulti_rule(struct mlx5_eswitch *esw, u32 vport)
+esw_fdb_set_vport_allmulti_rule(struct mlx5_eswitch *esw, u16 vport)
 {
        u8 mac_c[ETH_ALEN];
        u8 mac_v[ETH_ALEN];
@@ -237,7 +258,7 @@ esw_fdb_set_vport_allmulti_rule(struct mlx5_eswitch *esw, u32 vport)
 }
 
 static struct mlx5_flow_handle *
-esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u32 vport)
+esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u16 vport)
 {
        u8 mac_c[ETH_ALEN];
        u8 mac_v[ETH_ALEN];
@@ -377,19 +398,19 @@ typedef int (*vport_addr_action)(struct mlx5_eswitch *esw,
 static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
 {
        u8 *mac = vaddr->node.addr;
-       u32 vport = vaddr->vport;
+       u16 vport = vaddr->vport;
        int err;
 
-       /* Skip mlx5_mpfs_add_mac for PFs,
-        * it is already done by the PF netdev in mlx5e_execute_l2_action
+       /* Skip mlx5_mpfs_add_mac for eswitch_managers,
+        * it is already done by its netdev in mlx5e_execute_l2_action
         */
-       if (!vport)
+       if (esw->manager_vport == vport)
                goto fdb_add;
 
        err = mlx5_mpfs_add_mac(esw->dev, mac);
        if (err) {
                esw_warn(esw->dev,
-                        "Failed to add L2 table mac(%pM) for vport(%d), err(%d)\n",
+                        "Failed to add L2 table mac(%pM) for vport(0x%x), err(%d)\n",
                         mac, vport, err);
                return err;
        }
@@ -409,13 +430,13 @@ fdb_add:
 static int esw_del_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
 {
        u8 *mac = vaddr->node.addr;
-       u32 vport = vaddr->vport;
+       u16 vport = vaddr->vport;
        int err = 0;
 
-       /* Skip mlx5_mpfs_del_mac for PFs,
-        * it is already done by the PF netdev in mlx5e_execute_l2_action
+       /* Skip mlx5_mpfs_del_mac for eswitch managerss,
+        * it is already done by its netdev in mlx5e_execute_l2_action
         */
-       if (!vport || !vaddr->mpfs)
+       if (!vaddr->mpfs || esw->manager_vport == vport)
                goto fdb_del;
 
        err = mlx5_mpfs_del_mac(esw->dev, mac);
@@ -438,17 +459,18 @@ static void update_allmulti_vports(struct mlx5_eswitch *esw,
                                   struct esw_mc_addr *esw_mc)
 {
        u8 *mac = vaddr->node.addr;
-       u32 vport_idx = 0;
+       struct mlx5_vport *vport;
+       u16 i, vport_num;
 
-       for (vport_idx = 0; vport_idx < esw->total_vports; vport_idx++) {
-               struct mlx5_vport *vport = &esw->vports[vport_idx];
+       mlx5_esw_for_all_vports(esw, i, vport) {
                struct hlist_head *vport_hash = vport->mc_list;
                struct vport_addr *iter_vaddr =
                                        l2addr_hash_find(vport_hash,
                                                         mac,
                                                         struct vport_addr);
+               vport_num = vport->vport;
                if (IS_ERR_OR_NULL(vport->allmulti_rule) ||
-                   vaddr->vport == vport_idx)
+                   vaddr->vport == vport_num)
                        continue;
                switch (vaddr->action) {
                case MLX5_ACTION_ADD:
@@ -460,14 +482,14 @@ static void update_allmulti_vports(struct mlx5_eswitch *esw,
                        if (!iter_vaddr) {
                                esw_warn(esw->dev,
                                         "ALL-MULTI: Failed to add MAC(%pM) to vport[%d] DB\n",
-                                        mac, vport_idx);
+                                        mac, vport_num);
                                continue;
                        }
-                       iter_vaddr->vport = vport_idx;
+                       iter_vaddr->vport = vport_num;
                        iter_vaddr->flow_rule =
                                        esw_fdb_set_vport_rule(esw,
                                                               mac,
-                                                              vport_idx);
+                                                              vport_num);
                        iter_vaddr->mc_promisc = true;
                        break;
                case MLX5_ACTION_DEL:
@@ -485,7 +507,7 @@ static int esw_add_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
        struct hlist_head *hash = esw->mc_table;
        struct esw_mc_addr *esw_mc;
        u8 *mac = vaddr->node.addr;
-       u32 vport = vaddr->vport;
+       u16 vport = vaddr->vport;
 
        if (!esw->fdb_table.legacy.fdb)
                return 0;
@@ -499,7 +521,7 @@ static int esw_add_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
                return -ENOMEM;
 
        esw_mc->uplink_rule = /* Forward MC MAC to Uplink */
-               esw_fdb_set_vport_rule(esw, mac, UPLINK_VPORT);
+               esw_fdb_set_vport_rule(esw, mac, MLX5_VPORT_UPLINK);
 
        /* Add this multicast mac to all the mc promiscuous vports */
        update_allmulti_vports(esw, vaddr, esw_mc);
@@ -525,7 +547,7 @@ static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
        struct hlist_head *hash = esw->mc_table;
        struct esw_mc_addr *esw_mc;
        u8 *mac = vaddr->node.addr;
-       u32 vport = vaddr->vport;
+       u16 vport = vaddr->vport;
 
        if (!esw->fdb_table.legacy.fdb)
                return 0;
@@ -564,9 +586,9 @@ static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
 
 /* Apply vport UC/MC list to HW l2 table and FDB table */
 static void esw_apply_vport_addr_list(struct mlx5_eswitch *esw,
-                                     u32 vport_num, int list_type)
+                                     u16 vport_num, int list_type)
 {
-       struct mlx5_vport *vport = &esw->vports[vport_num];
+       struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
        bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC;
        vport_addr_action vport_addr_add;
        vport_addr_action vport_addr_del;
@@ -599,9 +621,9 @@ static void esw_apply_vport_addr_list(struct mlx5_eswitch *esw,
 
 /* Sync vport UC/MC list from vport context */
 static void esw_update_vport_addr_list(struct mlx5_eswitch *esw,
-                                      u32 vport_num, int list_type)
+                                      u16 vport_num, int list_type)
 {
-       struct mlx5_vport *vport = &esw->vports[vport_num];
+       struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
        bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC;
        u8 (*mac_list)[ETH_ALEN];
        struct l2addr_node *node;
@@ -686,9 +708,9 @@ out:
 /* Sync vport UC/MC list from vport context
  * Must be called after esw_update_vport_addr_list
  */
-static void esw_update_vport_mc_promisc(struct mlx5_eswitch *esw, u32 vport_num)
+static void esw_update_vport_mc_promisc(struct mlx5_eswitch *esw, u16 vport_num)
 {
-       struct mlx5_vport *vport = &esw->vports[vport_num];
+       struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
        struct l2addr_node *node;
        struct vport_addr *addr;
        struct hlist_head *hash;
@@ -721,11 +743,11 @@ static void esw_update_vport_mc_promisc(struct mlx5_eswitch *esw, u32 vport_num)
 }
 
 /* Apply vport rx mode to HW FDB table */
-static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num,
+static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw, u16 vport_num,
                                    bool promisc, bool mc_promisc)
 {
+       struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
        struct esw_mc_addr *allmulti_addr = &esw->mc_promisc;
-       struct mlx5_vport *vport = &esw->vports[vport_num];
 
        if (IS_ERR_OR_NULL(vport->allmulti_rule) != mc_promisc)
                goto promisc;
@@ -736,7 +758,7 @@ static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num,
                if (!allmulti_addr->uplink_rule)
                        allmulti_addr->uplink_rule =
                                esw_fdb_set_vport_allmulti_rule(esw,
-                                                               UPLINK_VPORT);
+                                                               MLX5_VPORT_UPLINK);
                allmulti_addr->refcnt++;
        } else if (vport->allmulti_rule) {
                mlx5_del_flow_rules(vport->allmulti_rule);
@@ -764,9 +786,9 @@ promisc:
 }
 
 /* Sync vport rx mode from vport context */
-static void esw_update_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num)
+static void esw_update_vport_rx_mode(struct mlx5_eswitch *esw, u16 vport_num)
 {
-       struct mlx5_vport *vport = &esw->vports[vport_num];
+       struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
        int promisc_all = 0;
        int promisc_uc = 0;
        int promisc_mc = 0;
@@ -1343,8 +1365,8 @@ static void esw_destroy_tsar(struct mlx5_eswitch *esw)
 static int esw_vport_enable_qos(struct mlx5_eswitch *esw, int vport_num,
                                u32 initial_max_rate, u32 initial_bw_share)
 {
+       struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
        u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
-       struct mlx5_vport *vport = &esw->vports[vport_num];
        struct mlx5_core_dev *dev = esw->dev;
        void *vport_elem;
        int err = 0;
@@ -1383,7 +1405,7 @@ static int esw_vport_enable_qos(struct mlx5_eswitch *esw, int vport_num,
 
 static void esw_vport_disable_qos(struct mlx5_eswitch *esw, int vport_num)
 {
-       struct mlx5_vport *vport = &esw->vports[vport_num];
+       struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
        int err = 0;
 
        if (!vport->qos.enabled)
@@ -1402,8 +1424,8 @@ static void esw_vport_disable_qos(struct mlx5_eswitch *esw, int vport_num)
 static int esw_vport_qos_config(struct mlx5_eswitch *esw, int vport_num,
                                u32 max_rate, u32 bw_share)
 {
+       struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
        u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
-       struct mlx5_vport *vport = &esw->vports[vport_num];
        struct mlx5_core_dev *dev = esw->dev;
        void *vport_elem;
        u32 bitmask = 0;
@@ -1459,15 +1481,22 @@ static void esw_apply_vport_conf(struct mlx5_eswitch *esw,
 {
        int vport_num = vport->vport;
 
-       if (!vport_num)
+       if (esw->manager_vport == vport_num)
                return;
 
        mlx5_modify_vport_admin_state(esw->dev,
                                      MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
-                                     vport_num,
+                                     vport_num, 1,
                                      vport->info.link_state);
-       mlx5_modify_nic_vport_mac_address(esw->dev, vport_num, vport->info.mac);
-       mlx5_modify_nic_vport_node_guid(esw->dev, vport_num, vport->info.node_guid);
+
+       /* Host PF has its own mac/guid. */
+       if (vport_num) {
+               mlx5_modify_nic_vport_mac_address(esw->dev, vport_num,
+                                                 vport->info.mac);
+               mlx5_modify_nic_vport_node_guid(esw->dev, vport_num,
+                                               vport->info.node_guid);
+       }
+
        modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, vport->info.qos,
                               (vport->info.vlan || vport->info.qos));
 
@@ -1513,10 +1542,10 @@ static void esw_vport_destroy_drop_counters(struct mlx5_vport *vport)
                mlx5_fc_destroy(dev, vport->egress.drop_counter);
 }
 
-static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num,
+static void esw_enable_vport(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
                             int enable_events)
 {
-       struct mlx5_vport *vport = &esw->vports[vport_num];
+       u16 vport_num = vport->vport;
 
        mutex_lock(&esw->state_lock);
        WARN_ON(vport->enabled);
@@ -1539,8 +1568,11 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num,
        vport->enabled_events = enable_events;
        vport->enabled = true;
 
-       /* only PF is trusted by default */
-       if (!vport_num)
+       /* Esw manager is trusted by default. Host PF (vport 0) is trusted as well
+        * in smartNIC as it's a vport group manager.
+        */
+       if (esw->manager_vport == vport_num ||
+           (!vport_num && mlx5_core_is_ecpf(esw->dev)))
                vport->info.trusted = true;
 
        esw_vport_change_handle_locked(vport);
@@ -1550,9 +1582,10 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num,
        mutex_unlock(&esw->state_lock);
 }
 
-static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num)
+static void esw_disable_vport(struct mlx5_eswitch *esw,
+                             struct mlx5_vport *vport)
 {
-       struct mlx5_vport *vport = &esw->vports[vport_num];
+       u16 vport_num = vport->vport;
 
        if (!vport->enabled)
                return;
@@ -1573,10 +1606,10 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num)
        esw_vport_change_handle_locked(vport);
        vport->enabled_events = 0;
        esw_vport_disable_qos(esw, vport_num);
-       if (vport_num && esw->mode == SRIOV_LEGACY) {
+       if (esw->mode == SRIOV_LEGACY) {
                mlx5_modify_vport_admin_state(esw->dev,
                                              MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
-                                             vport_num,
+                                             vport_num, 1,
                                              MLX5_VPORT_ADMIN_STATE_DOWN);
                esw_vport_disable_egress_acl(esw, vport);
                esw_vport_disable_ingress_acl(esw, vport);
@@ -1595,7 +1628,7 @@ static int eswitch_vport_event(struct notifier_block *nb,
        u16 vport_num;
 
        vport_num = be16_to_cpu(eqe->data.vport_change.vport_num);
-       vport = &esw->vports[vport_num];
+       vport = mlx5_eswitch_get_vport(esw, vport_num);
        if (vport->enabled)
                queue_work(esw->work_queue, &vport->vport_change_handler);
 
@@ -1607,6 +1640,8 @@ static int eswitch_vport_event(struct notifier_block *nb,
 
 int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
 {
+       int vf_nvports = 0, total_nvports = 0;
+       struct mlx5_vport *vport;
        int err;
        int i, enabled_events;
 
@@ -1624,6 +1659,18 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
 
        esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode);
 
+       if (mode == SRIOV_OFFLOADS) {
+               if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
+                       err = mlx5_query_host_params_num_vfs(esw->dev, &vf_nvports);
+                       if (err)
+                               return err;
+                       total_nvports = esw->total_vports;
+               } else {
+                       vf_nvports = nvfs;
+                       total_nvports = nvfs + MLX5_SPECIAL_VPORTS(esw->dev);
+               }
+       }
+
        esw->mode = mode;
 
        mlx5_lag_update(esw->dev);
@@ -1633,7 +1680,7 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
        } else {
                mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH);
                mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
-               err = esw_offloads_init(esw, nvfs + 1);
+               err = esw_offloads_init(esw, vf_nvports, total_nvports);
        }
 
        if (err)
@@ -1648,8 +1695,20 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
         * 2. FDB/Eswitch is programmed by user space tools
         */
        enabled_events = (mode == SRIOV_LEGACY) ? SRIOV_VPORT_EVENTS : 0;
-       for (i = 0; i <= nvfs; i++)
-               esw_enable_vport(esw, i, enabled_events);
+
+       /* Enable PF vport */
+       vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF);
+       esw_enable_vport(esw, vport, enabled_events);
+
+       /* Enable ECPF vports */
+       if (mlx5_ecpf_vport_exists(esw->dev)) {
+               vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF);
+               esw_enable_vport(esw, vport, enabled_events);
+       }
+
+       /* Enable VF vports */
+       mlx5_esw_for_each_vf_vport(esw, i, vport, nvfs)
+               esw_enable_vport(esw, vport, enabled_events);
 
        if (mode == SRIOV_LEGACY) {
                MLX5_NB_INIT(&esw->nb, eswitch_vport_event, NIC_VPORT_CHANGE);
@@ -1674,8 +1733,8 @@ abort:
 void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
 {
        struct esw_mc_addr *mc_promisc;
+       struct mlx5_vport *vport;
        int old_mode;
-       int nvports;
        int i;
 
        if (!ESW_ALLOWED(esw) || esw->mode == SRIOV_NONE)
@@ -1685,13 +1744,12 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
                 esw->enabled_vports, esw->mode);
 
        mc_promisc = &esw->mc_promisc;
-       nvports = esw->enabled_vports;
 
        if (esw->mode == SRIOV_LEGACY)
                mlx5_eq_notifier_unregister(esw->dev, &esw->nb);
 
-       for (i = 0; i < esw->total_vports; i++)
-               esw_disable_vport(esw, i);
+       mlx5_esw_for_all_vports(esw, i, vport)
+               esw_disable_vport(esw, vport);
 
        if (mc_promisc && mc_promisc->uplink_rule)
                mlx5_del_flow_rules(mc_promisc->uplink_rule);
@@ -1701,7 +1759,7 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
        if (esw->mode == SRIOV_LEGACY)
                esw_destroy_legacy_fdb_table(esw);
        else if (esw->mode == SRIOV_OFFLOADS)
-               esw_offloads_cleanup(esw, nvports);
+               esw_offloads_cleanup(esw);
 
        old_mode = esw->mode;
        esw->mode = SRIOV_NONE;
@@ -1718,8 +1776,8 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
 {
        int total_vports = MLX5_TOTAL_VPORTS(dev);
        struct mlx5_eswitch *esw;
-       int vport_num;
-       int err;
+       struct mlx5_vport *vport;
+       int err, i;
 
        if (!MLX5_VPORT_MANAGER(dev))
                return 0;
@@ -1735,6 +1793,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
                return -ENOMEM;
 
        esw->dev = dev;
+       esw->manager_vport = mlx5_eswitch_manager_vport(dev);
 
        esw->work_queue = create_singlethread_workqueue("mlx5_esw_wq");
        if (!esw->work_queue) {
@@ -1749,6 +1808,8 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
                goto abort;
        }
 
+       esw->total_vports = total_vports;
+
        err = esw_offloads_init_reps(esw);
        if (err)
                goto abort;
@@ -1757,17 +1818,14 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
        hash_init(esw->offloads.mod_hdr_tbl);
        mutex_init(&esw->state_lock);
 
-       for (vport_num = 0; vport_num < total_vports; vport_num++) {
-               struct mlx5_vport *vport = &esw->vports[vport_num];
-
-               vport->vport = vport_num;
+       mlx5_esw_for_all_vports(esw, i, vport) {
+               vport->vport = mlx5_eswitch_index_to_vport_num(esw, i);
                vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO;
                vport->dev = dev;
                INIT_WORK(&vport->vport_change_handler,
                          esw_vport_change_handler);
        }
 
-       esw->total_vports = total_vports;
        esw->enabled_vports = 0;
        esw->mode = SRIOV_NONE;
        esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE;
@@ -1866,7 +1924,7 @@ int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw,
 
        err = mlx5_modify_vport_admin_state(esw->dev,
                                            MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
-                                           vport, link_state);
+                                           vport, 1, link_state);
        if (err) {
                mlx5_core_warn(esw->dev,
                               "Failed to set vport %d link state, err = %d",
@@ -2009,8 +2067,7 @@ static u32 calculate_vports_min_rate_divider(struct mlx5_eswitch *esw)
        u32 max_guarantee = 0;
        int i;
 
-       for (i = 0; i < esw->total_vports; i++) {
-               evport = &esw->vports[i];
+       mlx5_esw_for_all_vports(esw, i, evport) {
                if (!evport->enabled || evport->info.min_rate < max_guarantee)
                        continue;
                max_guarantee = evport->info.min_rate;
@@ -2029,8 +2086,7 @@ static int normalize_vports_min_rate(struct mlx5_eswitch *esw, u32 divider)
        int err;
        int i;
 
-       for (i = 0; i < esw->total_vports; i++) {
-               evport = &esw->vports[i];
+       mlx5_esw_for_all_vports(esw, i, evport) {
                if (!evport->enabled)
                        continue;
                vport_min_rate = evport->info.min_rate;
@@ -2045,7 +2101,7 @@ static int normalize_vports_min_rate(struct mlx5_eswitch *esw, u32 divider)
                if (bw_share == evport->qos.bw_share)
                        continue;
 
-               err = esw_vport_qos_config(esw, i, vport_max_rate,
+               err = esw_vport_qos_config(esw, evport->vport, vport_max_rate,
                                           bw_share);
                if (!err)
                        evport->qos.bw_share = bw_share;
@@ -2128,7 +2184,7 @@ static int mlx5_eswitch_query_vport_drop_stats(struct mlx5_core_dev *dev,
            !MLX5_CAP_GEN(dev, transmit_discard_vport_down))
                return 0;
 
-       err = mlx5_query_vport_down_stats(dev, vport_idx,
+       err = mlx5_query_vport_down_stats(dev, vport_idx, 1,
                                          &rx_discard_vport_down,
                                          &tx_discard_vport_down);
        if (err)
@@ -2165,8 +2221,7 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
                 MLX5_CMD_OP_QUERY_VPORT_COUNTER);
        MLX5_SET(query_vport_counter_in, in, op_mod, 0);
        MLX5_SET(query_vport_counter_in, in, vport_number, vport);
-       if (vport)
-               MLX5_SET(query_vport_counter_in, in, other_vport, 1);
+       MLX5_SET(query_vport_counter_in, in, other_vport, 1);
 
        memset(out, 0, outlen);
        err = mlx5_cmd_exec(esw->dev, in, sizeof(in), out, outlen);
index 9c89eea..af5581a 100644 (file)
@@ -38,6 +38,7 @@
 #include <net/devlink.h>
 #include <linux/mlx5/device.h>
 #include <linux/mlx5/eswitch.h>
+#include <linux/mlx5/vport.h>
 #include <linux/mlx5/fs.h>
 #include "lib/mpfs.h"
 
@@ -49,8 +50,6 @@
 #define MLX5_MAX_MC_PER_VPORT(dev) \
        (1 << MLX5_CAP_GEN(dev, log_max_current_mc_list))
 
-#define FDB_UPLINK_VPORT 0xffff
-
 #define MLX5_MIN_BW_SHARE 1
 
 #define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \
@@ -183,6 +182,16 @@ struct esw_mc_addr { /* SRIOV only */
        u32                    refcnt;
 };
 
+struct mlx5_host_work {
+       struct work_struct      work;
+       struct mlx5_eswitch     *esw;
+};
+
+struct mlx5_host_info {
+       struct mlx5_nb          nb;
+       u16                     num_vfs;
+};
+
 struct mlx5_eswitch {
        struct mlx5_core_dev    *dev;
        struct mlx5_nb          nb;
@@ -206,10 +215,13 @@ struct mlx5_eswitch {
        struct mlx5_esw_offload offloads;
        int                     mode;
        int                     nvports;
+       u16                     manager_vport;
+       struct mlx5_host_info   host_info;
 };
 
-void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports);
-int esw_offloads_init(struct mlx5_eswitch *esw, int nvports);
+void esw_offloads_cleanup(struct mlx5_eswitch *esw);
+int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports,
+                     int total_nvports);
 void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw);
 int esw_offloads_init_reps(struct mlx5_eswitch *esw);
 
@@ -312,6 +324,7 @@ struct mlx5_esw_flow_attr {
        } dests[MLX5_MAX_FLOW_FWD_VPORTS];
        u32     mod_hdr_id;
        u8      match_level;
+       u8      tunnel_match_level;
        struct mlx5_fc *counter;
        u32     chain;
        u16     prio;
@@ -364,6 +377,53 @@ bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0,
 
 #define esw_debug(dev, format, ...)                            \
        mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__)
+
+/* The returned number is valid only when the dev is eswitch manager. */
+static inline u16 mlx5_eswitch_manager_vport(struct mlx5_core_dev *dev)
+{
+       return mlx5_core_is_ecpf_esw_manager(dev) ?
+               MLX5_VPORT_ECPF : MLX5_VPORT_PF;
+}
+
+static inline int mlx5_eswitch_uplink_idx(struct mlx5_eswitch *esw)
+{
+       /* Uplink always locate at the last element of the array.*/
+       return esw->total_vports - 1;
+}
+
+static inline int mlx5_eswitch_ecpf_idx(struct mlx5_eswitch *esw)
+{
+       return esw->total_vports - 2;
+}
+
+static inline int mlx5_eswitch_vport_num_to_index(struct mlx5_eswitch *esw,
+                                                 u16 vport_num)
+{
+       if (vport_num == MLX5_VPORT_ECPF) {
+               if (!mlx5_ecpf_vport_exists(esw->dev))
+                       esw_warn(esw->dev, "ECPF vport doesn't exist!\n");
+               return mlx5_eswitch_ecpf_idx(esw);
+       }
+
+       if (vport_num == MLX5_VPORT_UPLINK)
+               return mlx5_eswitch_uplink_idx(esw);
+
+       return vport_num;
+}
+
+static inline int mlx5_eswitch_index_to_vport_num(struct mlx5_eswitch *esw,
+                                                 int index)
+{
+       if (index == mlx5_eswitch_ecpf_idx(esw) &&
+           mlx5_ecpf_vport_exists(esw->dev))
+               return MLX5_VPORT_ECPF;
+
+       if (index == mlx5_eswitch_uplink_idx(esw))
+               return MLX5_VPORT_UPLINK;
+
+       return index;
+}
+
 #else  /* CONFIG_MLX5_ESWITCH */
 /* eswitch API stubs */
 static inline int  mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
index 53065b6..f226039 100644 (file)
 #include "en.h"
 #include "fs_core.h"
 #include "lib/devcom.h"
+#include "ecpf.h"
+#include "lib/eq.h"
 
 enum {
        FDB_FAST_PATH = 0,
        FDB_SLOW_PATH
 };
 
+/* There are two match-all miss flows, one for unicast dst mac and
+ * one for multicast.
+ */
+#define MLX5_ESW_MISS_FLOWS (2)
+
 #define fdb_prio_table(esw, chain, prio, level) \
        (esw)->fdb_table.offloads.fdb_prio[(chain)][(prio)][(level)]
 
+#define UPLINK_REP_INDEX 0
+
+/* The rep getter/iterator are only valid after esw->total_vports
+ * and vport->vport are initialized in mlx5_eswitch_init.
+ */
+#define mlx5_esw_for_all_reps(esw, i, rep)                     \
+       for ((i) = MLX5_VPORT_PF;                               \
+            (rep) = &(esw)->offloads.vport_reps[i],            \
+            (i) < (esw)->total_vports; (i)++)
+
+#define mlx5_esw_for_each_vf_rep(esw, i, rep, nvfs)            \
+       for ((i) = MLX5_VPORT_FIRST_VF;                         \
+            (rep) = &(esw)->offloads.vport_reps[i],            \
+            (i) <= (nvfs); (i)++)
+
+#define mlx5_esw_for_each_vf_rep_reverse(esw, i, rep, nvfs)    \
+       for ((i) = (nvfs);                                      \
+            (rep) = &(esw)->offloads.vport_reps[i],            \
+            (i) >= MLX5_VPORT_FIRST_VF; (i)--)
+
+#define mlx5_esw_for_each_vf_vport(esw, vport, nvfs)           \
+       for ((vport) = MLX5_VPORT_FIRST_VF;                     \
+            (vport) <= (nvfs); (vport)++)
+
+#define mlx5_esw_for_each_vf_vport_reverse(esw, vport, nvfs)   \
+       for ((vport) = (nvfs);                                  \
+            (vport) >= MLX5_VPORT_FIRST_VF; (vport)--)
+
+static struct mlx5_eswitch_rep *mlx5_eswitch_get_rep(struct mlx5_eswitch *esw,
+                                                    u16 vport_num)
+{
+       u16 idx = mlx5_eswitch_vport_num_to_index(esw, vport_num);
+
+       WARN_ON(idx > esw->total_vports - 1);
+       return &esw->offloads.vport_reps[idx];
+}
+
 static struct mlx5_flow_table *
 esw_get_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level);
 static void
@@ -160,14 +204,15 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
                MLX5_SET_TO_ONES(fte_match_set_misc, misc,
                                 source_eswitch_owner_vhca_id);
 
-       if (attr->match_level == MLX5_MATCH_NONE)
-               spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
-       else
-               spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS |
-                                             MLX5_MATCH_MISC_PARAMETERS;
-
-       if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP)
-               spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS;
+       spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+       if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) {
+               if (attr->tunnel_match_level != MLX5_MATCH_NONE)
+                       spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
+               if (attr->match_level != MLX5_MATCH_NONE)
+                       spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS;
+       } else if (attr->match_level != MLX5_MATCH_NONE) {
+               spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
+       }
 
        if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
                flow_act.modify_id = attr->mod_hdr_id;
@@ -318,7 +363,7 @@ static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val)
        esw_debug(esw->dev, "%s applying global %s policy\n", __func__, val ? "pop" : "none");
        for (vf_vport = 1; vf_vport < esw->enabled_vports; vf_vport++) {
                rep = &esw->offloads.vport_reps[vf_vport];
-               if (!rep->rep_if[REP_ETH].valid)
+               if (rep->rep_if[REP_ETH].state != REP_LOADED)
                        continue;
 
                err = __mlx5_eswitch_set_vport_vlan(esw, rep->vport, 0, 0, val);
@@ -359,15 +404,15 @@ static int esw_add_vlan_action_check(struct mlx5_esw_flow_attr *attr,
        in_rep  = attr->in_rep;
        out_rep = attr->dests[0].rep;
 
-       if (push && in_rep->vport == FDB_UPLINK_VPORT)
+       if (push && in_rep->vport == MLX5_VPORT_UPLINK)
                goto out_notsupp;
 
-       if (pop && out_rep->vport == FDB_UPLINK_VPORT)
+       if (pop && out_rep->vport == MLX5_VPORT_UPLINK)
                goto out_notsupp;
 
        /* vport has vlan push configured, can't offload VF --> wire rules w.o it */
        if (!push && !pop && fwd)
-               if (in_rep->vlan && out_rep->vport == FDB_UPLINK_VPORT)
+               if (in_rep->vlan && out_rep->vport == MLX5_VPORT_UPLINK)
                        goto out_notsupp;
 
        /* protects against (1) setting rules with different vlans to push and
@@ -409,7 +454,7 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
 
        if (!push && !pop && fwd) {
                /* tracks VF --> wire rules without vlan push action */
-               if (attr->dests[0].rep->vport == FDB_UPLINK_VPORT) {
+               if (attr->dests[0].rep->vport == MLX5_VPORT_UPLINK) {
                        vport->vlan_refcount++;
                        attr->vlan_handled = true;
                }
@@ -469,7 +514,7 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
 
        if (!push && !pop && fwd) {
                /* tracks VF --> wire rules without vlan push action */
-               if (attr->dests[0].rep->vport == FDB_UPLINK_VPORT)
+               if (attr->dests[0].rep->vport == MLX5_VPORT_UPLINK)
                        vport->vlan_refcount--;
 
                return 0;
@@ -516,7 +561,8 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn
 
        misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
        MLX5_SET(fte_match_set_misc, misc, source_sqn, sqn);
-       MLX5_SET(fte_match_set_misc, misc, source_port, 0x0); /* source vport is 0 */
+       /* source vport is the esw manager */
+       MLX5_SET(fte_match_set_misc, misc, source_port, esw->manager_vport);
 
        misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
        MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_sqn);
@@ -561,7 +607,7 @@ static void peer_miss_rules_setup(struct mlx5_core_dev *peer_dev,
                         source_eswitch_owner_vhca_id);
 
        dest->type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
-       dest->vport.num = 0;
+       dest->vport.num = peer_dev->priv.eswitch->manager_vport;
        dest->vport.vhca_id = MLX5_CAP_GEN(peer_dev, vhca_id);
        dest->vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
 }
@@ -595,14 +641,35 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
        misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
                            misc_parameters);
 
-       for (i = 1; i < nvports; i++) {
+       if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
+               MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_PF);
+               flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
+                                          spec, &flow_act, &dest, 1);
+               if (IS_ERR(flow)) {
+                       err = PTR_ERR(flow);
+                       goto add_pf_flow_err;
+               }
+               flows[MLX5_VPORT_PF] = flow;
+       }
+
+       if (mlx5_ecpf_vport_exists(esw->dev)) {
+               MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_ECPF);
+               flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
+                                          spec, &flow_act, &dest, 1);
+               if (IS_ERR(flow)) {
+                       err = PTR_ERR(flow);
+                       goto add_ecpf_flow_err;
+               }
+               flows[mlx5_eswitch_ecpf_idx(esw)] = flow;
+       }
+
+       mlx5_esw_for_each_vf_vport(esw, i, mlx5_core_max_vfs(esw->dev)) {
                MLX5_SET(fte_match_set_misc, misc, source_port, i);
                flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
                                           spec, &flow_act, &dest, 1);
                if (IS_ERR(flow)) {
                        err = PTR_ERR(flow);
-                       esw_warn(esw->dev, "FDB: Failed to add peer miss flow rule err %d\n", err);
-                       goto add_flow_err;
+                       goto add_vf_flow_err;
                }
                flows[i] = flow;
        }
@@ -612,9 +679,18 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
        kvfree(spec);
        return 0;
 
-add_flow_err:
-       for (i--; i > 0; i--)
+add_vf_flow_err:
+       nvports = --i;
+       mlx5_esw_for_each_vf_vport_reverse(esw, i, nvports)
                mlx5_del_flow_rules(flows[i]);
+
+       if (mlx5_ecpf_vport_exists(esw->dev))
+               mlx5_del_flow_rules(flows[mlx5_eswitch_ecpf_idx(esw)]);
+add_ecpf_flow_err:
+       if (mlx5_core_is_ecpf_esw_manager(esw->dev))
+               mlx5_del_flow_rules(flows[MLX5_VPORT_PF]);
+add_pf_flow_err:
+       esw_warn(esw->dev, "FDB: Failed to add peer miss flow rule err %d\n", err);
        kvfree(flows);
 alloc_flows_err:
        kvfree(spec);
@@ -628,9 +704,15 @@ static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw)
 
        flows = esw->fdb_table.offloads.peer_miss_rules;
 
-       for (i = 1; i < esw->total_vports; i++)
+       mlx5_esw_for_each_vf_vport_reverse(esw, i, mlx5_core_max_vfs(esw->dev))
                mlx5_del_flow_rules(flows[i]);
 
+       if (mlx5_ecpf_vport_exists(esw->dev))
+               mlx5_del_flow_rules(flows[mlx5_eswitch_ecpf_idx(esw)]);
+
+       if (mlx5_core_is_ecpf_esw_manager(esw->dev))
+               mlx5_del_flow_rules(flows[MLX5_VPORT_PF]);
+
        kvfree(flows);
 }
 
@@ -660,7 +742,7 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
        dmac_c[0] = 0x01;
 
        dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
-       dest.vport.num = 0;
+       dest.vport.num = esw->manager_vport;
        flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 
        flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec,
@@ -904,8 +986,8 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
                esw->fdb_table.offloads.fdb_left[i] =
                        ESW_POOLS[i] <= fdb_max ? ESW_SIZE / ESW_POOLS[i] : 0;
 
-       table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ + 2 +
-               esw->total_vports;
+       table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ +
+               MLX5_ESW_MISS_FLOWS + esw->total_vports;
 
        /* create the slow path fdb with encap set, so further table instances
         * can be created at run time while VFs are probed if the FW allows that.
@@ -999,7 +1081,8 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
        dmac[0] = 0x01;
 
        MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
-       MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + 2);
+       MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
+                ix + MLX5_ESW_MISS_FLOWS);
 
        g = mlx5_create_flow_group(fdb, flow_group_in);
        if (IS_ERR(g)) {
@@ -1048,7 +1131,7 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw)
        esw_destroy_offloads_fast_fdb_tables(esw);
 }
 
-static int esw_create_offloads_table(struct mlx5_eswitch *esw)
+static int esw_create_offloads_table(struct mlx5_eswitch *esw, int nvports)
 {
        struct mlx5_flow_table_attr ft_attr = {};
        struct mlx5_core_dev *dev = esw->dev;
@@ -1062,7 +1145,7 @@ static int esw_create_offloads_table(struct mlx5_eswitch *esw)
                return -EOPNOTSUPP;
        }
 
-       ft_attr.max_fte = dev->priv.sriov.num_vfs + 2;
+       ft_attr.max_fte = nvports + MLX5_ESW_MISS_FLOWS;
 
        ft_offloads = mlx5_create_flow_table(ns, &ft_attr);
        if (IS_ERR(ft_offloads)) {
@@ -1082,16 +1165,15 @@ static void esw_destroy_offloads_table(struct mlx5_eswitch *esw)
        mlx5_destroy_flow_table(offloads->ft_offloads);
 }
 
-static int esw_create_vport_rx_group(struct mlx5_eswitch *esw)
+static int esw_create_vport_rx_group(struct mlx5_eswitch *esw, int nvports)
 {
        int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
        struct mlx5_flow_group *g;
-       struct mlx5_priv *priv = &esw->dev->priv;
        u32 *flow_group_in;
        void *match_criteria, *misc;
        int err = 0;
-       int nvports = priv->sriov.num_vfs + 2;
 
+       nvports = nvports + MLX5_ESW_MISS_FLOWS;
        flow_group_in = kvzalloc(inlen, GFP_KERNEL);
        if (!flow_group_in)
                return -ENOMEM;
@@ -1168,7 +1250,8 @@ static int esw_offloads_start(struct mlx5_eswitch *esw,
 {
        int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs;
 
-       if (esw->mode != SRIOV_LEGACY) {
+       if (esw->mode != SRIOV_LEGACY &&
+           !mlx5_core_is_ecpf_esw_manager(esw->dev)) {
                NL_SET_ERR_MSG_MOD(extack,
                                   "Can't set offloads mode, SRIOV legacy not enabled");
                return -EINVAL;
@@ -1206,9 +1289,8 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw)
 {
        int total_vfs = MLX5_TOTAL_VPORTS(esw->dev);
        struct mlx5_core_dev *dev = esw->dev;
-       struct mlx5_esw_offload *offloads;
        struct mlx5_eswitch_rep *rep;
-       u8 hw_id[ETH_ALEN];
+       u8 hw_id[ETH_ALEN], rep_type;
        int vport;
 
        esw->offloads.vport_reps = kcalloc(total_vfs,
@@ -1217,75 +1299,203 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw)
        if (!esw->offloads.vport_reps)
                return -ENOMEM;
 
-       offloads = &esw->offloads;
        mlx5_query_nic_vport_mac_address(dev, 0, hw_id);
 
-       for (vport = 0; vport < total_vfs; vport++) {
-               rep = &offloads->vport_reps[vport];
-
-               rep->vport = vport;
+       mlx5_esw_for_all_reps(esw, vport, rep) {
+               rep->vport = mlx5_eswitch_index_to_vport_num(esw, vport);
                ether_addr_copy(rep->hw_id, hw_id);
-       }
 
-       offloads->vport_reps[0].vport = FDB_UPLINK_VPORT;
+               for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++)
+                       rep->rep_if[rep_type].state = REP_UNREGISTERED;
+       }
 
        return 0;
 }
 
-static void esw_offloads_unload_reps_type(struct mlx5_eswitch *esw, int nvports,
-                                         u8 rep_type)
+static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw,
+                                     struct mlx5_eswitch_rep *rep, u8 rep_type)
+{
+       if (rep->rep_if[rep_type].state != REP_LOADED)
+               return;
+
+       rep->rep_if[rep_type].unload(rep);
+       rep->rep_if[rep_type].state = REP_REGISTERED;
+}
+
+static void __unload_reps_special_vport(struct mlx5_eswitch *esw, u8 rep_type)
 {
        struct mlx5_eswitch_rep *rep;
-       int vport;
 
-       for (vport = nvports - 1; vport >= 0; vport--) {
-               rep = &esw->offloads.vport_reps[vport];
-               if (!rep->rep_if[rep_type].valid)
-                       continue;
+       if (mlx5_ecpf_vport_exists(esw->dev)) {
+               rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_ECPF);
+               __esw_offloads_unload_rep(esw, rep, rep_type);
+       }
 
-               rep->rep_if[rep_type].unload(rep);
+       if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
+               rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF);
+               __esw_offloads_unload_rep(esw, rep, rep_type);
        }
+
+       rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK);
+       __esw_offloads_unload_rep(esw, rep, rep_type);
 }
 
-static void esw_offloads_unload_reps(struct mlx5_eswitch *esw, int nvports)
+static void __unload_reps_vf_vport(struct mlx5_eswitch *esw, int nvports,
+                                  u8 rep_type)
+{
+       struct mlx5_eswitch_rep *rep;
+       int i;
+
+       mlx5_esw_for_each_vf_rep_reverse(esw, i, rep, nvports)
+               __esw_offloads_unload_rep(esw, rep, rep_type);
+}
+
+static void esw_offloads_unload_vf_reps(struct mlx5_eswitch *esw, int nvports)
+{
+       u8 rep_type = NUM_REP_TYPES;
+
+       while (rep_type-- > 0)
+               __unload_reps_vf_vport(esw, nvports, rep_type);
+}
+
+static void __unload_reps_all_vport(struct mlx5_eswitch *esw, int nvports,
+                                   u8 rep_type)
+{
+       __unload_reps_vf_vport(esw, nvports, rep_type);
+
+       /* Special vports must be the last to unload. */
+       __unload_reps_special_vport(esw, rep_type);
+}
+
+static void esw_offloads_unload_all_reps(struct mlx5_eswitch *esw, int nvports)
 {
        u8 rep_type = NUM_REP_TYPES;
 
        while (rep_type-- > 0)
-               esw_offloads_unload_reps_type(esw, nvports, rep_type);
+               __unload_reps_all_vport(esw, nvports, rep_type);
+}
+
+static int __esw_offloads_load_rep(struct mlx5_eswitch *esw,
+                                  struct mlx5_eswitch_rep *rep, u8 rep_type)
+{
+       int err = 0;
+
+       if (rep->rep_if[rep_type].state != REP_REGISTERED)
+               return 0;
+
+       err = rep->rep_if[rep_type].load(esw->dev, rep);
+       if (err)
+               return err;
+
+       rep->rep_if[rep_type].state = REP_LOADED;
+
+       return 0;
 }
 
-static int esw_offloads_load_reps_type(struct mlx5_eswitch *esw, int nvports,
-                                      u8 rep_type)
+static int __load_reps_special_vport(struct mlx5_eswitch *esw, u8 rep_type)
 {
        struct mlx5_eswitch_rep *rep;
-       int vport;
        int err;
 
-       for (vport = 0; vport < nvports; vport++) {
-               rep = &esw->offloads.vport_reps[vport];
-               if (!rep->rep_if[rep_type].valid)
-                       continue;
+       rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK);
+       err = __esw_offloads_load_rep(esw, rep, rep_type);
+       if (err)
+               return err;
 
-               err = rep->rep_if[rep_type].load(esw->dev, rep);
+       if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
+               rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF);
+               err = __esw_offloads_load_rep(esw, rep, rep_type);
                if (err)
-                       goto err_reps;
+                       goto err_pf;
+       }
+
+       if (mlx5_ecpf_vport_exists(esw->dev)) {
+               rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_ECPF);
+               err = __esw_offloads_load_rep(esw, rep, rep_type);
+               if (err)
+                       goto err_ecpf;
+       }
+
+       return 0;
+
+err_ecpf:
+       if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
+               rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF);
+               __esw_offloads_unload_rep(esw, rep, rep_type);
+       }
+
+err_pf:
+       rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK);
+       __esw_offloads_unload_rep(esw, rep, rep_type);
+       return err;
+}
+
+static int __load_reps_vf_vport(struct mlx5_eswitch *esw, int nvports,
+                               u8 rep_type)
+{
+       struct mlx5_eswitch_rep *rep;
+       int err, i;
+
+       mlx5_esw_for_each_vf_rep(esw, i, rep, nvports) {
+               err = __esw_offloads_load_rep(esw, rep, rep_type);
+               if (err)
+                       goto err_vf;
        }
 
        return 0;
 
+err_vf:
+       __unload_reps_vf_vport(esw, --i, rep_type);
+       return err;
+}
+
+static int esw_offloads_load_vf_reps(struct mlx5_eswitch *esw, int nvports)
+{
+       u8 rep_type = 0;
+       int err;
+
+       for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) {
+               err = __load_reps_vf_vport(esw, nvports, rep_type);
+               if (err)
+                       goto err_reps;
+       }
+
+       return err;
+
 err_reps:
-       esw_offloads_unload_reps_type(esw, vport, rep_type);
+       while (rep_type-- > 0)
+               __unload_reps_vf_vport(esw, nvports, rep_type);
+       return err;
+}
+
+static int __load_reps_all_vport(struct mlx5_eswitch *esw, int nvports,
+                                u8 rep_type)
+{
+       int err;
+
+       /* Special vports must be loaded first. */
+       err = __load_reps_special_vport(esw, rep_type);
+       if (err)
+               return err;
+
+       err = __load_reps_vf_vport(esw, nvports, rep_type);
+       if (err)
+               goto err_vfs;
+
+       return 0;
+
+err_vfs:
+       __unload_reps_special_vport(esw, rep_type);
        return err;
 }
 
-static int esw_offloads_load_reps(struct mlx5_eswitch *esw, int nvports)
+static int esw_offloads_load_all_reps(struct mlx5_eswitch *esw, int nvports)
 {
        u8 rep_type = 0;
        int err;
 
        for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) {
-               err = esw_offloads_load_reps_type(esw, nvports, rep_type);
+               err = __load_reps_all_vport(esw, nvports, rep_type);
                if (err)
                        goto err_reps;
        }
@@ -1294,7 +1504,7 @@ static int esw_offloads_load_reps(struct mlx5_eswitch *esw, int nvports)
 
 err_reps:
        while (rep_type-- > 0)
-               esw_offloads_unload_reps_type(esw, nvports, rep_type);
+               __unload_reps_all_vport(esw, nvports, rep_type);
        return err;
 }
 
@@ -1397,7 +1607,7 @@ static void esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw)
        mlx5_devcom_unregister_component(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
 }
 
-int esw_offloads_init(struct mlx5_eswitch *esw, int nvports)
+static int esw_offloads_steering_init(struct mlx5_eswitch *esw, int nvports)
 {
        int err;
 
@@ -1407,24 +1617,16 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports)
        if (err)
                return err;
 
-       err = esw_create_offloads_table(esw);
+       err = esw_create_offloads_table(esw, nvports);
        if (err)
                goto create_ft_err;
 
-       err = esw_create_vport_rx_group(esw);
+       err = esw_create_vport_rx_group(esw, nvports);
        if (err)
                goto create_fg_err;
 
-       err = esw_offloads_load_reps(esw, nvports);
-       if (err)
-               goto err_reps;
-
-       esw_offloads_devcom_init(esw);
        return 0;
 
-err_reps:
-       esw_destroy_vport_rx_group(esw);
-
 create_fg_err:
        esw_destroy_offloads_table(esw);
 
@@ -1434,6 +1636,95 @@ create_ft_err:
        return err;
 }
 
+static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw)
+{
+       esw_destroy_vport_rx_group(esw);
+       esw_destroy_offloads_table(esw);
+       esw_destroy_offloads_fdb_tables(esw);
+}
+
+static void esw_host_params_event_handler(struct work_struct *work)
+{
+       struct mlx5_host_work *host_work;
+       struct mlx5_eswitch *esw;
+       int err, num_vf = 0;
+
+       host_work = container_of(work, struct mlx5_host_work, work);
+       esw = host_work->esw;
+
+       err = mlx5_query_host_params_num_vfs(esw->dev, &num_vf);
+       if (err || num_vf == esw->host_info.num_vfs)
+               goto out;
+
+       /* Number of VFs can only change from "0 to x" or "x to 0". */
+       if (esw->host_info.num_vfs > 0) {
+               esw_offloads_unload_vf_reps(esw, esw->host_info.num_vfs);
+       } else {
+               err = esw_offloads_load_vf_reps(esw, num_vf);
+
+               if (err)
+                       goto out;
+       }
+
+       esw->host_info.num_vfs = num_vf;
+
+out:
+       kfree(host_work);
+}
+
+static int esw_host_params_event(struct notifier_block *nb,
+                                unsigned long type, void *data)
+{
+       struct mlx5_host_work *host_work;
+       struct mlx5_host_info *host_info;
+       struct mlx5_eswitch *esw;
+
+       host_work = kzalloc(sizeof(*host_work), GFP_ATOMIC);
+       if (!host_work)
+               return NOTIFY_DONE;
+
+       host_info = mlx5_nb_cof(nb, struct mlx5_host_info, nb);
+       esw = container_of(host_info, struct mlx5_eswitch, host_info);
+
+       host_work->esw = esw;
+
+       INIT_WORK(&host_work->work, esw_host_params_event_handler);
+       queue_work(esw->work_queue, &host_work->work);
+
+       return NOTIFY_OK;
+}
+
+int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports,
+                     int total_nvports)
+{
+       int err;
+
+       mutex_init(&esw->fdb_table.offloads.fdb_prio_lock);
+
+       err = esw_offloads_steering_init(esw, total_nvports);
+       if (err)
+               return err;
+
+       err = esw_offloads_load_all_reps(esw, vf_nvports);
+       if (err)
+               goto err_reps;
+
+       esw_offloads_devcom_init(esw);
+
+       if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
+               MLX5_NB_INIT(&esw->host_info.nb, esw_host_params_event,
+                            HOST_PARAMS_CHANGE);
+               mlx5_eq_notifier_register(esw->dev, &esw->host_info.nb);
+               esw->host_info.num_vfs = vf_nvports;
+       }
+
+       return 0;
+
+err_reps:
+       esw_offloads_steering_cleanup(esw);
+       return err;
+}
+
 static int esw_offloads_stop(struct mlx5_eswitch *esw,
                             struct netlink_ext_ack *extack)
 {
@@ -1453,13 +1744,21 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw,
        return err;
 }
 
-void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports)
+void esw_offloads_cleanup(struct mlx5_eswitch *esw)
 {
+       u16 num_vfs;
+
+       if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
+               mlx5_eq_notifier_unregister(esw->dev, &esw->host_info.nb);
+               flush_workqueue(esw->work_queue);
+               num_vfs = esw->host_info.num_vfs;
+       } else {
+               num_vfs = esw->dev->priv.sriov.num_vfs;
+       }
+
        esw_offloads_devcom_cleanup(esw);
-       esw_offloads_unload_reps(esw, nvports);
-       esw_destroy_vport_rx_group(esw);
-       esw_destroy_offloads_table(esw);
-       esw_destroy_offloads_fdb_tables(esw);
+       esw_offloads_unload_all_reps(esw, num_vfs);
+       esw_offloads_steering_cleanup(esw);
 }
 
 static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode)
@@ -1548,7 +1847,8 @@ static int mlx5_devlink_eswitch_check(struct devlink *devlink)
        if(!MLX5_ESWITCH_MANAGER(dev))
                return -EPERM;
 
-       if (dev->priv.eswitch->mode == SRIOV_NONE)
+       if (dev->priv.eswitch->mode == SRIOV_NONE &&
+           !mlx5_core_is_ecpf_esw_manager(dev))
                return -EOPNOTSUPP;
 
        return 0;
@@ -1760,47 +2060,45 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap)
        return 0;
 }
 
-void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
-                                    int vport_index,
-                                    struct mlx5_eswitch_rep_if *__rep_if,
-                                    u8 rep_type)
+void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw,
+                                     struct mlx5_eswitch_rep_if *__rep_if,
+                                     u8 rep_type)
 {
-       struct mlx5_esw_offload *offloads = &esw->offloads;
        struct mlx5_eswitch_rep_if *rep_if;
+       struct mlx5_eswitch_rep *rep;
+       int i;
 
-       rep_if = &offloads->vport_reps[vport_index].rep_if[rep_type];
-
-       rep_if->load   = __rep_if->load;
-       rep_if->unload = __rep_if->unload;
-       rep_if->get_proto_dev = __rep_if->get_proto_dev;
-       rep_if->priv = __rep_if->priv;
+       mlx5_esw_for_all_reps(esw, i, rep) {
+               rep_if = &rep->rep_if[rep_type];
+               rep_if->load   = __rep_if->load;
+               rep_if->unload = __rep_if->unload;
+               rep_if->get_proto_dev = __rep_if->get_proto_dev;
+               rep_if->priv = __rep_if->priv;
 
-       rep_if->valid = true;
+               rep_if->state = REP_REGISTERED;
+       }
 }
-EXPORT_SYMBOL(mlx5_eswitch_register_vport_rep);
+EXPORT_SYMBOL(mlx5_eswitch_register_vport_reps);
 
-void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
-                                      int vport_index, u8 rep_type)
+void mlx5_eswitch_unregister_vport_reps(struct mlx5_eswitch *esw, u8 rep_type)
 {
-       struct mlx5_esw_offload *offloads = &esw->offloads;
+       u16 max_vf = mlx5_core_max_vfs(esw->dev);
        struct mlx5_eswitch_rep *rep;
+       int i;
 
-       rep = &offloads->vport_reps[vport_index];
-
-       if (esw->mode == SRIOV_OFFLOADS && esw->vports[vport_index].enabled)
-               rep->rep_if[rep_type].unload(rep);
+       if (esw->mode == SRIOV_OFFLOADS)
+               __unload_reps_all_vport(esw, max_vf, rep_type);
 
-       rep->rep_if[rep_type].valid = false;
+       mlx5_esw_for_all_reps(esw, i, rep)
+               rep->rep_if[rep_type].state = REP_UNREGISTERED;
 }
-EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_rep);
+EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_reps);
 
 void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type)
 {
-#define UPLINK_REP_INDEX 0
-       struct mlx5_esw_offload *offloads = &esw->offloads;
        struct mlx5_eswitch_rep *rep;
 
-       rep = &offloads->vport_reps[UPLINK_REP_INDEX];
+       rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK);
        return rep->rep_if[rep_type].priv;
 }
 
@@ -1808,15 +2106,11 @@ void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,
                                 int vport,
                                 u8 rep_type)
 {
-       struct mlx5_esw_offload *offloads = &esw->offloads;
        struct mlx5_eswitch_rep *rep;
 
-       if (vport == FDB_UPLINK_VPORT)
-               vport = UPLINK_REP_INDEX;
-
-       rep = &offloads->vport_reps[vport];
+       rep = mlx5_eswitch_get_rep(esw, vport);
 
-       if (rep->rep_if[rep_type].valid &&
+       if (rep->rep_if[rep_type].state == REP_LOADED &&
            rep->rep_if[rep_type].get_proto_dev)
                return rep->rep_if[rep_type].get_proto_dev(rep);
        return NULL;
@@ -1825,13 +2119,13 @@ EXPORT_SYMBOL(mlx5_eswitch_get_proto_dev);
 
 void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type)
 {
-       return mlx5_eswitch_get_proto_dev(esw, UPLINK_REP_INDEX, rep_type);
+       return mlx5_eswitch_get_proto_dev(esw, MLX5_VPORT_UPLINK, rep_type);
 }
 EXPORT_SYMBOL(mlx5_eswitch_uplink_get_proto_dev);
 
 struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
                                                int vport)
 {
-       return &esw->offloads.vport_reps[vport];
+       return mlx5_eswitch_get_rep(esw, vport);
 }
 EXPORT_SYMBOL(mlx5_eswitch_vport_rep);
index fbc42b7..5d5864e 100644 (file)
@@ -103,6 +103,8 @@ static const char *eqe_type_str(u8 type)
                return "MLX5_EVENT_TYPE_STALL_EVENT";
        case MLX5_EVENT_TYPE_CMD:
                return "MLX5_EVENT_TYPE_CMD";
+       case MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE:
+               return "MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE";
        case MLX5_EVENT_TYPE_PAGE_REQUEST:
                return "MLX5_EVENT_TYPE_PAGE_REQUEST";
        case MLX5_EVENT_TYPE_PAGE_FAULT:
@@ -211,11 +213,10 @@ static int port_module(struct notifier_block *nb, unsigned long type, void *data
        enum port_module_event_status_type module_status;
        enum port_module_event_error_type error_type;
        struct mlx5_eqe_port_module *module_event_eqe;
-       const char *status_str, *error_str;
+       const char *status_str;
        u8 module_num;
 
        module_event_eqe = &eqe->data.port_module;
-       module_num = module_event_eqe->module;
        module_status = module_event_eqe->module_status &
                        PORT_MODULE_EVENT_MODULE_STATUS_MASK;
        error_type = module_event_eqe->error_type &
@@ -223,25 +224,27 @@ static int port_module(struct notifier_block *nb, unsigned long type, void *data
 
        if (module_status < MLX5_MODULE_STATUS_NUM)
                events->pme_stats.status_counters[module_status]++;
-       status_str = mlx5_pme_status_to_string(module_status);
 
-       if (module_status == MLX5_MODULE_STATUS_ERROR) {
+       if (module_status == MLX5_MODULE_STATUS_ERROR)
                if (error_type < MLX5_MODULE_EVENT_ERROR_NUM)
                        events->pme_stats.error_counters[error_type]++;
-               error_str = mlx5_pme_error_to_string(error_type);
-       }
 
        if (!printk_ratelimit())
                return NOTIFY_OK;
 
-       if (module_status == MLX5_MODULE_STATUS_ERROR)
+       module_num = module_event_eqe->module;
+       status_str = mlx5_pme_status_to_string(module_status);
+       if (module_status == MLX5_MODULE_STATUS_ERROR) {
+               const char *error_str = mlx5_pme_error_to_string(error_type);
+
                mlx5_core_err(events->dev,
                              "Port module event[error]: module %u, %s, %s\n",
                              module_num, status_str, error_str);
-       else
+       } else {
                mlx5_core_info(events->dev,
                               "Port module event: module %u, %s\n",
                               module_num, status_str);
+       }
 
        return NOTIFY_OK;
 }
index b354f3e..f2cfa01 100644 (file)
@@ -32,6 +32,7 @@
 
 #include <linux/mutex.h>
 #include <linux/mlx5/driver.h>
+#include <linux/mlx5/vport.h>
 #include <linux/mlx5/eswitch.h>
 
 #include "mlx5_core.h"
index 196c073..cb9fa34 100644 (file)
@@ -103,7 +103,7 @@ void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force)
        mlx5_core_err(dev, "start\n");
        if (pci_channel_offline(dev->pdev) || in_fatal(dev) || force) {
                dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
-               mlx5_cmd_trigger_completions(dev);
+               mlx5_cmd_flush(dev);
        }
 
        mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_SYS_ERROR, (void *)1);
index 2d22338..04c5aca 100644 (file)
@@ -343,6 +343,11 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
                roce_lag = !mlx5_sriov_is_enabled(dev0) &&
                           !mlx5_sriov_is_enabled(dev1);
 
+#ifdef CONFIG_MLX5_ESWITCH
+               roce_lag &= dev0->priv.eswitch->mode == SRIOV_NONE &&
+                           dev1->priv.eswitch->mode == SRIOV_NONE;
+#endif
+
                if (roce_lag)
                        mlx5_lag_remove_ib_devices(ldev);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mad.c b/drivers/net/ethernet/mellanox/mlx5/core/mad.c
deleted file mode 100644 (file)
index 3a3b000..0000000
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/mlx5/driver.h>
-#include <linux/mlx5/cmd.h>
-#include "mlx5_core.h"
-
-int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
-                     u16 opmod, u8 port)
-{
-       int outlen = MLX5_ST_SZ_BYTES(mad_ifc_out);
-       int inlen = MLX5_ST_SZ_BYTES(mad_ifc_in);
-       int err = -ENOMEM;
-       void *data;
-       void *resp;
-       u32 *out;
-       u32 *in;
-
-       in = kzalloc(inlen, GFP_KERNEL);
-       out = kzalloc(outlen, GFP_KERNEL);
-       if (!in || !out)
-               goto out;
-
-       MLX5_SET(mad_ifc_in, in, opcode, MLX5_CMD_OP_MAD_IFC);
-       MLX5_SET(mad_ifc_in, in, op_mod, opmod);
-       MLX5_SET(mad_ifc_in, in, port, port);
-
-       data = MLX5_ADDR_OF(mad_ifc_in, in, mad);
-       memcpy(data, inb, MLX5_FLD_SZ_BYTES(mad_ifc_in, mad));
-
-       err = mlx5_cmd_exec(dev, in, inlen, out, outlen);
-       if (err)
-               goto out;
-
-       resp = MLX5_ADDR_OF(mad_ifc_out, out, response_mad_packet);
-       memcpy(outb, resp,
-              MLX5_FLD_SZ_BYTES(mad_ifc_out, response_mad_packet));
-
-out:
-       kfree(out);
-       kfree(in);
-       return err;
-}
-EXPORT_SYMBOL_GPL(mlx5_core_mad_ifc);
index be81b31..40d591c 100644 (file)
@@ -65,6 +65,7 @@
 #include "lib/vxlan.h"
 #include "lib/devcom.h"
 #include "diag/fw_tracer.h"
+#include "ecpf.h"
 
 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
 MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver");
@@ -459,6 +460,50 @@ static int handle_hca_cap_atomic(struct mlx5_core_dev *dev)
        return err;
 }
 
+static int handle_hca_cap_odp(struct mlx5_core_dev *dev)
+{
+       void *set_hca_cap;
+       void *set_ctx;
+       int set_sz;
+       int err;
+
+       if (!MLX5_CAP_GEN(dev, pg))
+               return 0;
+
+       err = mlx5_core_get_caps(dev, MLX5_CAP_ODP);
+       if (err)
+               return err;
+
+       if (!(MLX5_CAP_ODP_MAX(dev, ud_odp_caps.srq_receive) ||
+             MLX5_CAP_ODP_MAX(dev, rc_odp_caps.srq_receive) ||
+             MLX5_CAP_ODP_MAX(dev, xrc_odp_caps.srq_receive)))
+               return 0;
+
+       set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
+       set_ctx = kzalloc(set_sz, GFP_KERNEL);
+       if (!set_ctx)
+               return -ENOMEM;
+
+       set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
+       memcpy(set_hca_cap, dev->caps.hca_cur[MLX5_CAP_ODP],
+              MLX5_ST_SZ_BYTES(odp_cap));
+
+       /* set ODP SRQ support for RC/UD and XRC transports */
+       MLX5_SET(odp_cap, set_hca_cap, ud_odp_caps.srq_receive,
+                MLX5_CAP_ODP_MAX(dev, ud_odp_caps.srq_receive));
+
+       MLX5_SET(odp_cap, set_hca_cap, rc_odp_caps.srq_receive,
+                MLX5_CAP_ODP_MAX(dev, rc_odp_caps.srq_receive));
+
+       MLX5_SET(odp_cap, set_hca_cap, xrc_odp_caps.srq_receive,
+                MLX5_CAP_ODP_MAX(dev, xrc_odp_caps.srq_receive));
+
+       err = set_caps(dev, set_ctx, set_sz, MLX5_SET_HCA_CAP_OP_MOD_ODP);
+
+       kfree(set_ctx);
+       return err;
+}
+
 static int handle_hca_cap(struct mlx5_core_dev *dev)
 {
        void *set_ctx = NULL;
@@ -567,6 +612,8 @@ int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id)
 
        MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA);
        MLX5_SET(enable_hca_in, in, function_id, func_id);
+       MLX5_SET(enable_hca_in, in, embedded_cpu_function,
+                dev->caps.embedded_cpu);
        return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
 }
 
@@ -577,6 +624,8 @@ int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id)
 
        MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA);
        MLX5_SET(disable_hca_in, in, function_id, func_id);
+       MLX5_SET(enable_hca_in, in, embedded_cpu_function,
+                dev->caps.embedded_cpu);
        return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
@@ -693,6 +742,11 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
                goto err_clr_master;
        }
 
+       if (pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP32) &&
+           pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP64) &&
+           pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP128))
+               mlx5_core_dbg(dev, "Enabling pci atomics failed\n");
+
        dev->iseg_base = pci_resource_start(dev->pdev, 0);
        dev->iseg = ioremap(dev->iseg_base, sizeof(*dev->iseg));
        if (!dev->iseg) {
@@ -849,6 +903,7 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
        struct pci_dev *pdev = dev->pdev;
        int err;
 
+       dev->caps.embedded_cpu = mlx5_read_embedded_cpu(dev);
        mutex_lock(&dev->intf_state_mutex);
        if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
                dev_warn(&dev->pdev->dev, "%s: interface is up, NOP\n",
@@ -926,6 +981,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
                goto reclaim_boot_pages;
        }
 
+       err = handle_hca_cap_odp(dev);
+       if (err) {
+               dev_err(&pdev->dev, "handle_hca_cap_odp failed\n");
+               goto reclaim_boot_pages;
+       }
+
        err = mlx5_satisfy_startup_pages(dev, 0);
        if (err) {
                dev_err(&pdev->dev, "failed to allocate init pages\n");
@@ -1014,6 +1075,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
                goto err_sriov;
        }
 
+       err = mlx5_ec_init(dev);
+       if (err) {
+               dev_err(&pdev->dev, "Failed to init embedded CPU\n");
+               goto err_ec;
+       }
+
        if (mlx5_device_registered(dev)) {
                mlx5_attach_device(dev);
        } else {
@@ -1031,6 +1098,9 @@ out:
        return 0;
 
 err_reg_dev:
+       mlx5_ec_cleanup(dev);
+
+err_ec:
        mlx5_sriov_detach(dev);
 
 err_sriov:
@@ -1105,6 +1175,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
        if (mlx5_device_registered(dev))
                mlx5_detach_device(dev);
 
+       mlx5_ec_cleanup(dev);
        mlx5_sriov_detach(dev);
        mlx5_cleanup_fs(dev);
        mlx5_accel_ipsec_cleanup(dev);
index 5300b0b..9529cf9 100644 (file)
@@ -121,11 +121,12 @@ int mlx5_modify_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
                                       u32 modify_bitmask);
 int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
                                        u32 element_id);
-int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev);
+int mlx5_wait_for_pages(struct mlx5_core_dev *dev, int *pages);
 u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev,
                             struct ptp_system_timestamp *sts);
 
 void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev);
+void mlx5_cmd_flush(struct mlx5_core_dev *dev);
 int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev);
 void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev);
 
index 0670165..ea744d8 100644 (file)
@@ -51,9 +51,10 @@ void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev)
 
 int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev,
                             struct mlx5_core_mkey *mkey,
-                            u32 *in, int inlen,
-                            u32 *out, int outlen,
-                            mlx5_cmd_cbk_t callback, void *context)
+                            struct mlx5_async_ctx *async_ctx, u32 *in,
+                            int inlen, u32 *out, int outlen,
+                            mlx5_async_cbk_t callback,
+                            struct mlx5_async_work *context)
 {
        struct mlx5_mkey_table *table = &dev->priv.mkey_table;
        u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {0};
@@ -71,7 +72,7 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev,
        MLX5_SET(mkc, mkc, mkey_7_0, key);
 
        if (callback)
-               return mlx5_cmd_exec_cb(dev, in, inlen, out, outlen,
+               return mlx5_cmd_exec_cb(async_ctx, in, inlen, out, outlen,
                                        callback, context);
 
        err = mlx5_cmd_exec(dev, in, inlen, lout, sizeof(lout));
@@ -105,7 +106,7 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev,
                          struct mlx5_core_mkey *mkey,
                          u32 *in, int inlen)
 {
-       return mlx5_core_create_mkey_cb(dev, mkey, in, inlen,
+       return mlx5_core_create_mkey_cb(dev, mkey, NULL, in, inlen,
                                        NULL, 0, NULL, NULL);
 }
 EXPORT_SYMBOL(mlx5_core_create_mkey);
index a83b517..4102538 100644 (file)
@@ -48,6 +48,7 @@ enum {
 struct mlx5_pages_req {
        struct mlx5_core_dev *dev;
        u16     func_id;
+       u8      ec_function;
        s32     npages;
        struct work_struct work;
 };
@@ -143,6 +144,7 @@ static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id,
        MLX5_SET(query_pages_in, in, op_mod, boot ?
                 MLX5_QUERY_PAGES_IN_OP_MOD_BOOT_PAGES :
                 MLX5_QUERY_PAGES_IN_OP_MOD_INIT_PAGES);
+       MLX5_SET(query_pages_in, in, embedded_cpu_function, mlx5_core_is_ecpf(dev));
 
        err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
        if (err)
@@ -253,7 +255,8 @@ err_mapping:
        return err;
 }
 
-static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id)
+static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id,
+                            bool ec_function)
 {
        u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0};
        u32 in[MLX5_ST_SZ_DW(manage_pages_in)]   = {0};
@@ -262,6 +265,7 @@ static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id)
        MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES);
        MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_CANT_GIVE);
        MLX5_SET(manage_pages_in, in, function_id, func_id);
+       MLX5_SET(manage_pages_in, in, embedded_cpu_function, ec_function);
 
        err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
        if (err)
@@ -270,7 +274,7 @@ static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id)
 }
 
 static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
-                     int notify_fail)
+                     int notify_fail, bool ec_function)
 {
        u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0};
        int inlen = MLX5_ST_SZ_BYTES(manage_pages_in);
@@ -305,6 +309,7 @@ retry:
        MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_GIVE);
        MLX5_SET(manage_pages_in, in, function_id, func_id);
        MLX5_SET(manage_pages_in, in, input_num_entries, npages);
+       MLX5_SET(manage_pages_in, in, embedded_cpu_function, ec_function);
 
        err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
        if (err) {
@@ -316,8 +321,11 @@ retry:
        dev->priv.fw_pages += npages;
        if (func_id)
                dev->priv.vfs_pages += npages;
+       else if (mlx5_core_is_ecpf(dev) && !ec_function)
+               dev->priv.peer_pf_pages += npages;
 
-       mlx5_core_dbg(dev, "err %d\n", err);
+       mlx5_core_dbg(dev, "npages %d, ec_function %d, func_id 0x%x, err %d\n",
+                     npages, ec_function, func_id, err);
 
        kvfree(in);
        return 0;
@@ -328,7 +336,7 @@ out_4k:
 out_free:
        kvfree(in);
        if (notify_fail)
-               page_notify_fail(dev, func_id);
+               page_notify_fail(dev, func_id, ec_function);
        return err;
 }
 
@@ -364,7 +372,7 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev,
 }
 
 static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
-                        int *nclaimed)
+                        int *nclaimed, bool ec_function)
 {
        int outlen = MLX5_ST_SZ_BYTES(manage_pages_out);
        u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {0};
@@ -385,6 +393,7 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
        MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_TAKE);
        MLX5_SET(manage_pages_in, in, function_id, func_id);
        MLX5_SET(manage_pages_in, in, input_num_entries, npages);
+       MLX5_SET(manage_pages_in, in, embedded_cpu_function, ec_function);
 
        mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen);
        err = reclaim_pages_cmd(dev, in, sizeof(in), out, outlen);
@@ -410,6 +419,8 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
        dev->priv.fw_pages -= num_claimed;
        if (func_id)
                dev->priv.vfs_pages -= num_claimed;
+       else if (mlx5_core_is_ecpf(dev) && !ec_function)
+               dev->priv.peer_pf_pages -= num_claimed;
 
 out_free:
        kvfree(out);
@@ -423,9 +434,10 @@ static void pages_work_handler(struct work_struct *work)
        int err = 0;
 
        if (req->npages < 0)
-               err = reclaim_pages(dev, req->func_id, -1 * req->npages, NULL);
+               err = reclaim_pages(dev, req->func_id, -1 * req->npages, NULL,
+                                   req->ec_function);
        else if (req->npages > 0)
-               err = give_pages(dev, req->func_id, req->npages, 1);
+               err = give_pages(dev, req->func_id, req->npages, 1, req->ec_function);
 
        if (err)
                mlx5_core_warn(dev, "%s fail %d\n",
@@ -434,6 +446,10 @@ static void pages_work_handler(struct work_struct *work)
        kfree(req);
 }
 
+enum {
+       EC_FUNCTION_MASK = 0x8000,
+};
+
 static int req_pages_handler(struct notifier_block *nb,
                             unsigned long type, void *data)
 {
@@ -441,6 +457,7 @@ static int req_pages_handler(struct notifier_block *nb,
        struct mlx5_core_dev *dev;
        struct mlx5_priv *priv;
        struct mlx5_eqe *eqe;
+       bool ec_function;
        u16 func_id;
        s32 npages;
 
@@ -450,6 +467,7 @@ static int req_pages_handler(struct notifier_block *nb,
 
        func_id = be16_to_cpu(eqe->data.req_pages.func_id);
        npages  = be32_to_cpu(eqe->data.req_pages.num_pages);
+       ec_function = be16_to_cpu(eqe->data.req_pages.ec_function) & EC_FUNCTION_MASK;
        mlx5_core_dbg(dev, "page request for func 0x%x, npages %d\n",
                      func_id, npages);
        req = kzalloc(sizeof(*req), GFP_ATOMIC);
@@ -461,6 +479,7 @@ static int req_pages_handler(struct notifier_block *nb,
        req->dev = dev;
        req->func_id = func_id;
        req->npages = npages;
+       req->ec_function = ec_function;
        INIT_WORK(&req->work, pages_work_handler);
        queue_work(dev->priv.pg_wq, &req->work);
        return NOTIFY_OK;
@@ -479,7 +498,7 @@ int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot)
        mlx5_core_dbg(dev, "requested %d %s pages for func_id 0x%x\n",
                      npages, boot ? "boot" : "init", func_id);
 
-       return give_pages(dev, func_id, npages, 0);
+       return give_pages(dev, func_id, npages, 0, mlx5_core_is_ecpf(dev));
 }
 
 enum {
@@ -513,7 +532,7 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
                        fwp = rb_entry(p, struct fw_page, rb_node);
                        err = reclaim_pages(dev, fwp->func_id,
                                            optimal_reclaimed_pages(),
-                                           &nclaimed);
+                                           &nclaimed, mlx5_core_is_ecpf(dev));
 
                        if (err) {
                                mlx5_core_warn(dev, "failed reclaiming pages (%d)\n",
@@ -535,6 +554,9 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
        WARN(dev->priv.vfs_pages,
             "VFs FW pages counter is %d after reclaiming all pages\n",
             dev->priv.vfs_pages);
+       WARN(dev->priv.peer_pf_pages,
+            "Peer PF FW pages counter is %d after reclaiming all pages\n",
+            dev->priv.peer_pf_pages);
 
        return 0;
 }
@@ -567,10 +589,10 @@ void mlx5_pagealloc_stop(struct mlx5_core_dev *dev)
        flush_workqueue(dev->priv.pg_wq);
 }
 
-int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev)
+int mlx5_wait_for_pages(struct mlx5_core_dev *dev, int *pages)
 {
        unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS);
-       int prev_vfs_pages = dev->priv.vfs_pages;
+       int prev_pages = *pages;
 
        /* In case of internal error we will free the pages manually later */
        if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
@@ -578,16 +600,16 @@ int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev)
                return 0;
        }
 
-       mlx5_core_dbg(dev, "Waiting for %d pages from %s\n", prev_vfs_pages,
+       mlx5_core_dbg(dev, "Waiting for %d pages from %s\n", prev_pages,
                      dev->priv.name);
-       while (dev->priv.vfs_pages) {
+       while (*pages) {
                if (time_after(jiffies, end)) {
-                       mlx5_core_warn(dev, "aborting while there are %d pending pages\n", dev->priv.vfs_pages);
+                       mlx5_core_warn(dev, "aborting while there are %d pending pages\n", *pages);
                        return -ETIMEDOUT;
                }
-               if (dev->priv.vfs_pages < prev_vfs_pages) {
+               if (*pages < prev_pages) {
                        end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS);
-                       prev_vfs_pages = dev->priv.vfs_pages;
+                       prev_pages = *pages;
                }
                msleep(50);
        }
index 2b82f35..b815428 100644 (file)
  * SOFTWARE.
  */
 
-#include <linux/module.h>
-#include <linux/mlx5/driver.h>
 #include <linux/mlx5/port.h>
-#include <linux/mlx5/cmd.h>
 #include "mlx5_core.h"
 
 int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
@@ -157,44 +154,6 @@ int mlx5_set_port_beacon(struct mlx5_core_dev *dev, u16 beacon_duration)
                                    sizeof(out), MLX5_REG_MLCR, 0, 1);
 }
 
-int mlx5_query_port_proto_cap(struct mlx5_core_dev *dev,
-                             u32 *proto_cap, int proto_mask)
-{
-       u32 out[MLX5_ST_SZ_DW(ptys_reg)];
-       int err;
-
-       err = mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, 1);
-       if (err)
-               return err;
-
-       if (proto_mask == MLX5_PTYS_EN)
-               *proto_cap = MLX5_GET(ptys_reg, out, eth_proto_capability);
-       else
-               *proto_cap = MLX5_GET(ptys_reg, out, ib_proto_capability);
-
-       return 0;
-}
-EXPORT_SYMBOL_GPL(mlx5_query_port_proto_cap);
-
-int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev,
-                               u32 *proto_admin, int proto_mask)
-{
-       u32 out[MLX5_ST_SZ_DW(ptys_reg)];
-       int err;
-
-       err = mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, 1);
-       if (err)
-               return err;
-
-       if (proto_mask == MLX5_PTYS_EN)
-               *proto_admin = MLX5_GET(ptys_reg, out, eth_proto_admin);
-       else
-               *proto_admin = MLX5_GET(ptys_reg, out, ib_proto_admin);
-
-       return 0;
-}
-EXPORT_SYMBOL_GPL(mlx5_query_port_proto_admin);
-
 int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev,
                                    u8 *link_width_oper, u8 local_port)
 {
@@ -211,23 +170,6 @@ int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev,
 }
 EXPORT_SYMBOL_GPL(mlx5_query_port_link_width_oper);
 
-int mlx5_query_port_eth_proto_oper(struct mlx5_core_dev *dev,
-                                  u32 *proto_oper, u8 local_port)
-{
-       u32 out[MLX5_ST_SZ_DW(ptys_reg)];
-       int err;
-
-       err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN,
-                                  local_port);
-       if (err)
-               return err;
-
-       *proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
-
-       return 0;
-}
-EXPORT_SYMBOL(mlx5_query_port_eth_proto_oper);
-
 int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev,
                                  u8 *proto_oper, u8 local_port)
 {
@@ -245,35 +187,6 @@ int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev,
 }
 EXPORT_SYMBOL(mlx5_query_port_ib_proto_oper);
 
-int mlx5_set_port_ptys(struct mlx5_core_dev *dev, bool an_disable,
-                      u32 proto_admin, int proto_mask)
-{
-       u32 out[MLX5_ST_SZ_DW(ptys_reg)];
-       u32 in[MLX5_ST_SZ_DW(ptys_reg)];
-       u8 an_disable_admin;
-       u8 an_disable_cap;
-       u8 an_status;
-
-       mlx5_query_port_autoneg(dev, proto_mask, &an_status,
-                               &an_disable_cap, &an_disable_admin);
-       if (!an_disable_cap && an_disable)
-               return -EPERM;
-
-       memset(in, 0, sizeof(in));
-
-       MLX5_SET(ptys_reg, in, local_port, 1);
-       MLX5_SET(ptys_reg, in, an_disable_admin, an_disable);
-       MLX5_SET(ptys_reg, in, proto_mask, proto_mask);
-       if (proto_mask == MLX5_PTYS_EN)
-               MLX5_SET(ptys_reg, in, eth_proto_admin, proto_admin);
-       else
-               MLX5_SET(ptys_reg, in, ib_proto_admin, proto_admin);
-
-       return mlx5_core_access_reg(dev, in, sizeof(in), out,
-                                   sizeof(out), MLX5_REG_PTYS, 0, 1);
-}
-EXPORT_SYMBOL_GPL(mlx5_set_port_ptys);
-
 /* This function should be used after setting a port register only */
 void mlx5_toggle_port_link(struct mlx5_core_dev *dev)
 {
@@ -606,25 +519,6 @@ int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx, u8 *pfc_en_rx)
 }
 EXPORT_SYMBOL_GPL(mlx5_query_port_pfc);
 
-void mlx5_query_port_autoneg(struct mlx5_core_dev *dev, int proto_mask,
-                            u8 *an_status,
-                            u8 *an_disable_cap, u8 *an_disable_admin)
-{
-       u32 out[MLX5_ST_SZ_DW(ptys_reg)];
-
-       *an_status = 0;
-       *an_disable_cap = 0;
-       *an_disable_admin = 0;
-
-       if (mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, 1))
-               return;
-
-       *an_status = MLX5_GET(ptys_reg, out, an_status);
-       *an_disable_cap = MLX5_GET(ptys_reg, out, an_disable_cap);
-       *an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin);
-}
-EXPORT_SYMBOL_GPL(mlx5_query_port_autoneg);
-
 int mlx5_max_tc(struct mlx5_core_dev *mdev)
 {
        u8 num_tc = MLX5_CAP_GEN(mdev, max_tc) ? : 8;
index 6e17803..7b23fa8 100644 (file)
@@ -147,7 +147,7 @@ out:
        if (MLX5_ESWITCH_MANAGER(dev))
                mlx5_eswitch_disable_sriov(dev->priv.eswitch);
 
-       if (mlx5_wait_for_vf_pages(dev))
+       if (mlx5_wait_for_pages(dev, &dev->priv.vfs_pages))
                mlx5_core_warn(dev, "timeout reclaiming VFs pages\n");
 }
 
index 9b150ce..ef95fec 100644 (file)
@@ -64,7 +64,7 @@ u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
 }
 
 int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
-                                 u16 vport, u8 state)
+                                 u16 vport, u8 other_vport, u8 state)
 {
        u32 in[MLX5_ST_SZ_DW(modify_vport_state_in)]   = {0};
        u32 out[MLX5_ST_SZ_DW(modify_vport_state_out)] = {0};
@@ -73,8 +73,7 @@ int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
                 MLX5_CMD_OP_MODIFY_VPORT_STATE);
        MLX5_SET(modify_vport_state_in, in, op_mod, opmod);
        MLX5_SET(modify_vport_state_in, in, vport_number, vport);
-       if (vport)
-               MLX5_SET(modify_vport_state_in, in, other_vport, 1);
+       MLX5_SET(modify_vport_state_in, in, other_vport, other_vport);
        MLX5_SET(modify_vport_state_in, in, admin_state, state);
 
        return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
@@ -255,7 +254,7 @@ int mlx5_modify_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 mtu)
 EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mtu);
 
 int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev,
-                                 u32 vport,
+                                 u16 vport,
                                  enum mlx5_list_type list_type,
                                  u8 addr_list[][ETH_ALEN],
                                  int *list_size)
@@ -373,7 +372,7 @@ int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev,
 EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mac_list);
 
 int mlx5_query_nic_vport_vlans(struct mlx5_core_dev *dev,
-                              u32 vport,
+                              u16 vport,
                               u16 vlans[],
                               int *size)
 {
@@ -526,7 +525,7 @@ int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid)
 EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_node_guid);
 
 int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev,
-                                   u32 vport, u64 node_guid)
+                                   u16 vport, u64 node_guid)
 {
        int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
        void *nic_vport_context;
@@ -827,7 +826,7 @@ int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev,
 EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_node_guid);
 
 int mlx5_query_nic_vport_promisc(struct mlx5_core_dev *mdev,
-                                u32 vport,
+                                u16 vport,
                                 int *promisc_uc,
                                 int *promisc_mc,
                                 int *promisc_all)
@@ -1057,7 +1056,7 @@ free:
 EXPORT_SYMBOL_GPL(mlx5_core_query_vport_counter);
 
 int mlx5_query_vport_down_stats(struct mlx5_core_dev *mdev, u16 vport,
-                               u64 *rx_discard_vport_down,
+                               u8 other_vport, u64 *rx_discard_vport_down,
                                u64 *tx_discard_vport_down)
 {
        u32 out[MLX5_ST_SZ_DW(query_vnic_env_out)] = {0};
@@ -1068,8 +1067,7 @@ int mlx5_query_vport_down_stats(struct mlx5_core_dev *mdev, u16 vport,
                 MLX5_CMD_OP_QUERY_VNIC_ENV);
        MLX5_SET(query_vnic_env_in, in, op_mod, 0);
        MLX5_SET(query_vnic_env_in, in, vport_number, vport);
-       if (vport)
-               MLX5_SET(query_vnic_env_in, in, other_vport, 1);
+       MLX5_SET(query_vnic_env_in, in, other_vport, other_vport);
 
        err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
        if (err)
index bbf45f1..a01d155 100644 (file)
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_MLXSW_CORE)       += mlxsw_core.o
 mlxsw_core-objs                        := core.o core_acl_flex_keys.o \
-                                  core_acl_flex_actions.o
+                                  core_acl_flex_actions.o core_env.o
 mlxsw_core-$(CONFIG_MLXSW_CORE_HWMON) += core_hwmon.o
 mlxsw_core-$(CONFIG_MLXSW_CORE_THERMAL) += core_thermal.o
 obj-$(CONFIG_MLXSW_PCI)                += mlxsw_pci.o
index 4f6fa51..b505d38 100644 (file)
@@ -1062,6 +1062,9 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
                        goto err_driver_init;
        }
 
+       if (mlxsw_driver->params_register && !reload)
+               devlink_params_publish(devlink);
+
        return 0;
 
 err_driver_init:
@@ -1131,6 +1134,8 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core,
                        return;
        }
 
+       if (mlxsw_core->driver->params_unregister && !reload)
+               devlink_params_unpublish(devlink);
        if (mlxsw_core->driver->fini)
                mlxsw_core->driver->fini(mlxsw_core);
        mlxsw_thermal_fini(mlxsw_core->thermal);
index 4e114f3..cd0c6aa 100644 (file)
@@ -344,6 +344,7 @@ struct mlxsw_bus_info {
        struct mlxsw_fw_rev fw_rev;
        u8 vsd[MLXSW_CMD_BOARDINFO_VSD_LEN];
        u8 psid[MLXSW_CMD_BOARDINFO_PSID_LEN];
+       u8 low_frequency;
 };
 
 struct mlxsw_hwmon;
@@ -394,4 +395,9 @@ static inline void mlxsw_thermal_fini(struct mlxsw_thermal *thermal)
 
 #endif
 
+enum mlxsw_devlink_param_id {
+       MLXSW_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX,
+       MLXSW_DEVLINK_PARAM_ID_ACL_REGION_REHASH_INTERVAL,
+};
+
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.c b/drivers/net/ethernet/mellanox/mlxsw/core_env.c
new file mode 100644 (file)
index 0000000..160d6cd
--- /dev/null
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/kernel.h>
+#include <linux/err.h>
+
+#include "core.h"
+#include "core_env.h"
+#include "item.h"
+#include "reg.h"
+
+static int mlxsw_env_validate_cable_ident(struct mlxsw_core *core, int id,
+                                         bool *qsfp)
+{
+       char eeprom_tmp[MLXSW_REG_MCIA_EEPROM_SIZE];
+       char mcia_pl[MLXSW_REG_MCIA_LEN];
+       u8 ident;
+       int err;
+
+       mlxsw_reg_mcia_pack(mcia_pl, id, 0, MLXSW_REG_MCIA_PAGE0_LO_OFF, 0, 1,
+                           MLXSW_REG_MCIA_I2C_ADDR_LOW);
+       err = mlxsw_reg_query(core, MLXSW_REG(mcia), mcia_pl);
+       if (err)
+               return err;
+       mlxsw_reg_mcia_eeprom_memcpy_from(mcia_pl, eeprom_tmp);
+       ident = eeprom_tmp[0];
+       switch (ident) {
+       case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_SFP:
+               *qsfp = false;
+               break;
+       case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP: /* fall-through */
+       case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP_PLUS: /* fall-through */
+       case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP28: /* fall-through */
+       case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP_DD:
+               *qsfp = true;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+int mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, int module,
+                                        int off, int *temp)
+{
+       char eeprom_tmp[MLXSW_REG_MCIA_EEPROM_SIZE];
+       union {
+               u8 buf[MLXSW_REG_MCIA_TH_ITEM_SIZE];
+               u16 temp;
+       } temp_thresh;
+       char mcia_pl[MLXSW_REG_MCIA_LEN] = {0};
+       char mtbr_pl[MLXSW_REG_MTBR_LEN] = {0};
+       u16 module_temp;
+       bool qsfp;
+       int err;
+
+       mlxsw_reg_mtbr_pack(mtbr_pl, MLXSW_REG_MTBR_BASE_MODULE_INDEX + module,
+                           1);
+       err = mlxsw_reg_query(core, MLXSW_REG(mtbr), mtbr_pl);
+       if (err)
+               return err;
+
+       /* Don't read temperature thresholds for module with no valid info. */
+       mlxsw_reg_mtbr_temp_unpack(mtbr_pl, 0, &module_temp, NULL);
+       switch (module_temp) {
+       case MLXSW_REG_MTBR_BAD_SENS_INFO: /* fall-through */
+       case MLXSW_REG_MTBR_NO_CONN: /* fall-through */
+       case MLXSW_REG_MTBR_NO_TEMP_SENS: /* fall-through */
+       case MLXSW_REG_MTBR_INDEX_NA:
+               *temp = 0;
+               return 0;
+       default:
+               /* Do not consider thresholds for zero temperature. */
+               if (!MLXSW_REG_MTMP_TEMP_TO_MC(module_temp)) {
+                       *temp = 0;
+                       return 0;
+               }
+               break;
+       }
+
+       /* Read Free Side Device Temperature Thresholds from page 03h
+        * (MSB at lower byte address).
+        * Bytes:
+        * 128-129 - Temp High Alarm (SFP_TEMP_HIGH_ALARM);
+        * 130-131 - Temp Low Alarm (SFP_TEMP_LOW_ALARM);
+        * 132-133 - Temp High Warning (SFP_TEMP_HIGH_WARN);
+        * 134-135 - Temp Low Warning (SFP_TEMP_LOW_WARN);
+        */
+
+       /* Validate module identifier value. */
+       err = mlxsw_env_validate_cable_ident(core, module, &qsfp);
+       if (err)
+               return err;
+
+       if (qsfp)
+               mlxsw_reg_mcia_pack(mcia_pl, module, 0,
+                                   MLXSW_REG_MCIA_TH_PAGE_NUM,
+                                   MLXSW_REG_MCIA_TH_PAGE_OFF + off,
+                                   MLXSW_REG_MCIA_TH_ITEM_SIZE,
+                                   MLXSW_REG_MCIA_I2C_ADDR_LOW);
+       else
+               mlxsw_reg_mcia_pack(mcia_pl, module, 0,
+                                   MLXSW_REG_MCIA_PAGE0_LO,
+                                   off, MLXSW_REG_MCIA_TH_ITEM_SIZE,
+                                   MLXSW_REG_MCIA_I2C_ADDR_HIGH);
+
+       err = mlxsw_reg_query(core, MLXSW_REG(mcia), mcia_pl);
+       if (err)
+               return err;
+
+       mlxsw_reg_mcia_eeprom_memcpy_from(mcia_pl, eeprom_tmp);
+       memcpy(temp_thresh.buf, eeprom_tmp, MLXSW_REG_MCIA_TH_ITEM_SIZE);
+       *temp = temp_thresh.temp * 1000;
+
+       return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.h b/drivers/net/ethernet/mellanox/mlxsw/core_env.h
new file mode 100644 (file)
index 0000000..6dbdf63
--- /dev/null
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */
+
+#ifndef _MLXSW_CORE_ENV_H
+#define _MLXSW_CORE_ENV_H
+
+int mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, int module,
+                                        int off, int *temp);
+
+#endif
index e04e816..6956bbe 100644 (file)
@@ -7,8 +7,10 @@
 #include <linux/sysfs.h>
 #include <linux/hwmon.h>
 #include <linux/err.h>
+#include <linux/sfp.h>
 
 #include "core.h"
+#include "core_env.h"
 
 #define MLXSW_HWMON_TEMP_SENSOR_MAX_COUNT 127
 #define MLXSW_HWMON_ATTR_COUNT (MLXSW_HWMON_TEMP_SENSOR_MAX_COUNT * 4 + \
@@ -30,6 +32,7 @@ struct mlxsw_hwmon {
        struct attribute *attrs[MLXSW_HWMON_ATTR_COUNT + 1];
        struct mlxsw_hwmon_attr hwmon_attrs[MLXSW_HWMON_ATTR_COUNT];
        unsigned int attrs_count;
+       u8 sensor_count;
 };
 
 static ssize_t mlxsw_hwmon_temp_show(struct device *dev,
@@ -121,6 +124,27 @@ static ssize_t mlxsw_hwmon_fan_rpm_show(struct device *dev,
        return sprintf(buf, "%u\n", mlxsw_reg_mfsm_rpm_get(mfsm_pl));
 }
 
+static ssize_t mlxsw_hwmon_fan_fault_show(struct device *dev,
+                                         struct device_attribute *attr,
+                                         char *buf)
+{
+       struct mlxsw_hwmon_attr *mlwsw_hwmon_attr =
+                       container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
+       struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
+       char fore_pl[MLXSW_REG_FORE_LEN];
+       bool fault;
+       int err;
+
+       err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(fore), fore_pl);
+       if (err) {
+               dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query fan\n");
+               return err;
+       }
+       mlxsw_reg_fore_unpack(fore_pl, mlwsw_hwmon_attr->type_index, &fault);
+
+       return sprintf(buf, "%u\n", fault);
+}
+
 static ssize_t mlxsw_hwmon_pwm_show(struct device *dev,
                                    struct device_attribute *attr,
                                    char *buf)
@@ -167,12 +191,160 @@ static ssize_t mlxsw_hwmon_pwm_store(struct device *dev,
        return len;
 }
 
+static ssize_t mlxsw_hwmon_module_temp_show(struct device *dev,
+                                           struct device_attribute *attr,
+                                           char *buf)
+{
+       struct mlxsw_hwmon_attr *mlwsw_hwmon_attr =
+                       container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
+       struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
+       char mtbr_pl[MLXSW_REG_MTBR_LEN] = {0};
+       u16 temp;
+       u8 module;
+       int err;
+
+       module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count;
+       mlxsw_reg_mtbr_pack(mtbr_pl, MLXSW_REG_MTBR_BASE_MODULE_INDEX + module,
+                           1);
+       err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtbr), mtbr_pl);
+       if (err) {
+               dev_err(dev, "Failed to query module temperature sensor\n");
+               return err;
+       }
+
+       mlxsw_reg_mtbr_temp_unpack(mtbr_pl, 0, &temp, NULL);
+       /* Update status and temperature cache. */
+       switch (temp) {
+       case MLXSW_REG_MTBR_NO_CONN: /* fall-through */
+       case MLXSW_REG_MTBR_NO_TEMP_SENS: /* fall-through */
+       case MLXSW_REG_MTBR_INDEX_NA:
+               temp = 0;
+               break;
+       case MLXSW_REG_MTBR_BAD_SENS_INFO:
+               /* Untrusted cable is connected. Reading temperature from its
+                * sensor is faulty.
+                */
+               temp = 0;
+               break;
+       default:
+               temp = MLXSW_REG_MTMP_TEMP_TO_MC(temp);
+               break;
+       }
+
+       return sprintf(buf, "%u\n", temp);
+}
+
+static ssize_t mlxsw_hwmon_module_temp_fault_show(struct device *dev,
+                                                 struct device_attribute *attr,
+                                                 char *buf)
+{
+       struct mlxsw_hwmon_attr *mlwsw_hwmon_attr =
+                       container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
+       struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
+       char mtbr_pl[MLXSW_REG_MTBR_LEN] = {0};
+       u8 module, fault;
+       u16 temp;
+       int err;
+
+       module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count;
+       mlxsw_reg_mtbr_pack(mtbr_pl, MLXSW_REG_MTBR_BASE_MODULE_INDEX + module,
+                           1);
+       err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtbr), mtbr_pl);
+       if (err) {
+               dev_err(dev, "Failed to query module temperature sensor\n");
+               return err;
+       }
+
+       mlxsw_reg_mtbr_temp_unpack(mtbr_pl, 0, &temp, NULL);
+
+       /* Update status and temperature cache. */
+       switch (temp) {
+       case MLXSW_REG_MTBR_BAD_SENS_INFO:
+               /* Untrusted cable is connected. Reading temperature from its
+                * sensor is faulty.
+                */
+               fault = 1;
+               break;
+       case MLXSW_REG_MTBR_NO_CONN: /* fall-through */
+       case MLXSW_REG_MTBR_NO_TEMP_SENS: /* fall-through */
+       case MLXSW_REG_MTBR_INDEX_NA:
+       default:
+               fault = 0;
+               break;
+       }
+
+       return sprintf(buf, "%u\n", fault);
+}
+
+static ssize_t
+mlxsw_hwmon_module_temp_critical_show(struct device *dev,
+                                     struct device_attribute *attr, char *buf)
+{
+       struct mlxsw_hwmon_attr *mlwsw_hwmon_attr =
+                       container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
+       struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
+       int temp;
+       u8 module;
+       int err;
+
+       module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count;
+       err = mlxsw_env_module_temp_thresholds_get(mlxsw_hwmon->core, module,
+                                                  SFP_TEMP_HIGH_WARN, &temp);
+       if (err) {
+               dev_err(dev, "Failed to query module temperature thresholds\n");
+               return err;
+       }
+
+       return sprintf(buf, "%u\n", temp);
+}
+
+static ssize_t
+mlxsw_hwmon_module_temp_emergency_show(struct device *dev,
+                                      struct device_attribute *attr,
+                                      char *buf)
+{
+       struct mlxsw_hwmon_attr *mlwsw_hwmon_attr =
+                       container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
+       struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
+       u8 module;
+       int temp;
+       int err;
+
+       module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count;
+       err = mlxsw_env_module_temp_thresholds_get(mlxsw_hwmon->core, module,
+                                                  SFP_TEMP_HIGH_ALARM, &temp);
+       if (err) {
+               dev_err(dev, "Failed to query module temperature thresholds\n");
+               return err;
+       }
+
+       return sprintf(buf, "%u\n", temp);
+}
+
+static ssize_t
+mlxsw_hwmon_module_temp_label_show(struct device *dev,
+                                  struct device_attribute *attr,
+                                  char *buf)
+{
+       struct mlxsw_hwmon_attr *mlwsw_hwmon_attr =
+                       container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
+
+       return sprintf(buf, "front panel %03u\n",
+                      mlwsw_hwmon_attr->type_index);
+}
+
 enum mlxsw_hwmon_attr_type {
        MLXSW_HWMON_ATTR_TYPE_TEMP,
        MLXSW_HWMON_ATTR_TYPE_TEMP_MAX,
        MLXSW_HWMON_ATTR_TYPE_TEMP_RST,
        MLXSW_HWMON_ATTR_TYPE_FAN_RPM,
+       MLXSW_HWMON_ATTR_TYPE_FAN_FAULT,
        MLXSW_HWMON_ATTR_TYPE_PWM,
+       MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE,
+       MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_FAULT,
+       MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_CRIT,
+       MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_EMERG,
+       MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_LABEL,
 };
 
 static void mlxsw_hwmon_attr_add(struct mlxsw_hwmon *mlxsw_hwmon,
@@ -209,6 +381,12 @@ static void mlxsw_hwmon_attr_add(struct mlxsw_hwmon *mlxsw_hwmon,
                snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
                         "fan%u_input", num + 1);
                break;
+       case MLXSW_HWMON_ATTR_TYPE_FAN_FAULT:
+               mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_fan_fault_show;
+               mlxsw_hwmon_attr->dev_attr.attr.mode = 0444;
+               snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
+                        "fan%u_fault", num + 1);
+               break;
        case MLXSW_HWMON_ATTR_TYPE_PWM:
                mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_pwm_show;
                mlxsw_hwmon_attr->dev_attr.store = mlxsw_hwmon_pwm_store;
@@ -216,6 +394,40 @@ static void mlxsw_hwmon_attr_add(struct mlxsw_hwmon *mlxsw_hwmon,
                snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
                         "pwm%u", num + 1);
                break;
+       case MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE:
+               mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_module_temp_show;
+               mlxsw_hwmon_attr->dev_attr.attr.mode = 0444;
+               snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
+                        "temp%u_input", num + 1);
+               break;
+       case MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_FAULT:
+               mlxsw_hwmon_attr->dev_attr.show =
+                                       mlxsw_hwmon_module_temp_fault_show;
+               mlxsw_hwmon_attr->dev_attr.attr.mode = 0444;
+               snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
+                        "temp%u_fault", num + 1);
+               break;
+       case MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_CRIT:
+               mlxsw_hwmon_attr->dev_attr.show =
+                       mlxsw_hwmon_module_temp_critical_show;
+               mlxsw_hwmon_attr->dev_attr.attr.mode = 0444;
+               snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
+                        "temp%u_crit", num + 1);
+               break;
+       case MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_EMERG:
+               mlxsw_hwmon_attr->dev_attr.show =
+                       mlxsw_hwmon_module_temp_emergency_show;
+               mlxsw_hwmon_attr->dev_attr.attr.mode = 0444;
+               snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
+                        "temp%u_emergency", num + 1);
+               break;
+       case MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_LABEL:
+               mlxsw_hwmon_attr->dev_attr.show =
+                       mlxsw_hwmon_module_temp_label_show;
+               mlxsw_hwmon_attr->dev_attr.attr.mode = 0444;
+               snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
+                        "temp%u_label", num + 1);
+               break;
        default:
                WARN_ON(1);
        }
@@ -233,7 +445,6 @@ static int mlxsw_hwmon_temp_init(struct mlxsw_hwmon *mlxsw_hwmon)
 {
        char mtcap_pl[MLXSW_REG_MTCAP_LEN] = {0};
        char mtmp_pl[MLXSW_REG_MTMP_LEN];
-       u8 sensor_count;
        int i;
        int err;
 
@@ -242,8 +453,8 @@ static int mlxsw_hwmon_temp_init(struct mlxsw_hwmon *mlxsw_hwmon)
                dev_err(mlxsw_hwmon->bus_info->dev, "Failed to get number of temp sensors\n");
                return err;
        }
-       sensor_count = mlxsw_reg_mtcap_sensor_count_get(mtcap_pl);
-       for (i = 0; i < sensor_count; i++) {
+       mlxsw_hwmon->sensor_count = mlxsw_reg_mtcap_sensor_count_get(mtcap_pl);
+       for (i = 0; i < mlxsw_hwmon->sensor_count; i++) {
                mlxsw_reg_mtmp_pack(mtmp_pl, i, true, true);
                err = mlxsw_reg_write(mlxsw_hwmon->core,
                                      MLXSW_REG(mtmp), mtmp_pl);
@@ -280,10 +491,14 @@ static int mlxsw_hwmon_fans_init(struct mlxsw_hwmon *mlxsw_hwmon)
        mlxsw_reg_mfcr_unpack(mfcr_pl, &freq, &tacho_active, &pwm_active);
        num = 0;
        for (type_index = 0; type_index < MLXSW_MFCR_TACHOS_MAX; type_index++) {
-               if (tacho_active & BIT(type_index))
+               if (tacho_active & BIT(type_index)) {
                        mlxsw_hwmon_attr_add(mlxsw_hwmon,
                                             MLXSW_HWMON_ATTR_TYPE_FAN_RPM,
+                                            type_index, num);
+                       mlxsw_hwmon_attr_add(mlxsw_hwmon,
+                                            MLXSW_HWMON_ATTR_TYPE_FAN_FAULT,
                                             type_index, num++);
+               }
        }
        num = 0;
        for (type_index = 0; type_index < MLXSW_MFCR_PWMS_MAX; type_index++) {
@@ -295,6 +510,53 @@ static int mlxsw_hwmon_fans_init(struct mlxsw_hwmon *mlxsw_hwmon)
        return 0;
 }
 
+static int mlxsw_hwmon_module_init(struct mlxsw_hwmon *mlxsw_hwmon)
+{
+       unsigned int module_count = mlxsw_core_max_ports(mlxsw_hwmon->core);
+       char pmlp_pl[MLXSW_REG_PMLP_LEN] = {0};
+       int i, index;
+       u8 width;
+       int err;
+
+       /* Add extra attributes for module temperature. Sensor index is
+        * assigned to sensor_count value, while all indexed before
+        * sensor_count are already utilized by the sensors connected through
+        * mtmp register by mlxsw_hwmon_temp_init().
+        */
+       index = mlxsw_hwmon->sensor_count;
+       for (i = 1; i < module_count; i++) {
+               mlxsw_reg_pmlp_pack(pmlp_pl, i);
+               err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(pmlp),
+                                     pmlp_pl);
+               if (err) {
+                       dev_err(mlxsw_hwmon->bus_info->dev, "Failed to read module index %d\n",
+                               i);
+                       return err;
+               }
+               width = mlxsw_reg_pmlp_width_get(pmlp_pl);
+               if (!width)
+                       continue;
+               mlxsw_hwmon_attr_add(mlxsw_hwmon,
+                                    MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE, index,
+                                    index);
+               mlxsw_hwmon_attr_add(mlxsw_hwmon,
+                                    MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_FAULT,
+                                    index, index);
+               mlxsw_hwmon_attr_add(mlxsw_hwmon,
+                                    MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_CRIT,
+                                    index, index);
+               mlxsw_hwmon_attr_add(mlxsw_hwmon,
+                                    MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_EMERG,
+                                    index, index);
+               mlxsw_hwmon_attr_add(mlxsw_hwmon,
+                                    MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_LABEL,
+                                    index, index);
+               index++;
+       }
+
+       return 0;
+}
+
 int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core,
                     const struct mlxsw_bus_info *mlxsw_bus_info,
                     struct mlxsw_hwmon **p_hwmon)
@@ -317,6 +579,10 @@ int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core,
        if (err)
                goto err_fans_init;
 
+       err = mlxsw_hwmon_module_init(mlxsw_hwmon);
+       if (err)
+               goto err_temp_module_init;
+
        mlxsw_hwmon->groups[0] = &mlxsw_hwmon->group;
        mlxsw_hwmon->group.attrs = mlxsw_hwmon->attrs;
 
@@ -333,6 +599,7 @@ int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core,
        return 0;
 
 err_hwmon_register:
+err_temp_module_init:
 err_fans_init:
 err_temp_init:
        kfree(mlxsw_hwmon);
index 61f897b..821fef2 100644 (file)
 #include "core.h"
 
 #define MLXSW_THERMAL_POLL_INT 1000    /* ms */
-#define MLXSW_THERMAL_MAX_TEMP 110000  /* 110C */
+#define MLXSW_THERMAL_SLOW_POLL_INT    20000   /* ms */
+#define MLXSW_THERMAL_ASIC_TEMP_NORM   75000   /* 75C */
+#define MLXSW_THERMAL_ASIC_TEMP_HIGH   85000   /* 85C */
+#define MLXSW_THERMAL_ASIC_TEMP_HOT    105000  /* 105C */
+#define MLXSW_THERMAL_ASIC_TEMP_CRIT   110000  /* 110C */
+#define MLXSW_THERMAL_HYSTERESIS_TEMP  5000    /* 5C */
 #define MLXSW_THERMAL_MAX_STATE        10
 #define MLXSW_THERMAL_MAX_DUTY 255
 /* Minimum and maximum fan allowed speed in percent: from 20% to 100%. Values
 #define MLXSW_THERMAL_SPEED_MAX                (MLXSW_THERMAL_MAX_STATE * 2)
 #define MLXSW_THERMAL_SPEED_MIN_LEVEL  2               /* 20% */
 
+/* External cooling devices, allowed for binding to mlxsw thermal zones. */
+static char * const mlxsw_thermal_external_allowed_cdev[] = {
+       "mlxreg_fan",
+};
+
 struct mlxsw_thermal_trip {
        int     type;
        int     temp;
+       int     hyst;
        int     min_state;
        int     max_state;
 };
@@ -36,32 +47,29 @@ struct mlxsw_thermal_trip {
 static const struct mlxsw_thermal_trip default_thermal_trips[] = {
        {       /* In range - 0-40% PWM */
                .type           = THERMAL_TRIP_ACTIVE,
-               .temp           = 75000,
+               .temp           = MLXSW_THERMAL_ASIC_TEMP_NORM,
+               .hyst           = MLXSW_THERMAL_HYSTERESIS_TEMP,
                .min_state      = 0,
                .max_state      = (4 * MLXSW_THERMAL_MAX_STATE) / 10,
        },
-       {       /* High - 40-100% PWM */
-               .type           = THERMAL_TRIP_ACTIVE,
-               .temp           = 80000,
-               .min_state      = (4 * MLXSW_THERMAL_MAX_STATE) / 10,
-               .max_state      = MLXSW_THERMAL_MAX_STATE,
-       },
        {
-               /* Very high - 100% PWM */
+               /* In range - 40-100% PWM */
                .type           = THERMAL_TRIP_ACTIVE,
-               .temp           = 85000,
-               .min_state      = MLXSW_THERMAL_MAX_STATE,
+               .temp           = MLXSW_THERMAL_ASIC_TEMP_HIGH,
+               .hyst           = MLXSW_THERMAL_HYSTERESIS_TEMP,
+               .min_state      = (4 * MLXSW_THERMAL_MAX_STATE) / 10,
                .max_state      = MLXSW_THERMAL_MAX_STATE,
        },
        {       /* Warning */
                .type           = THERMAL_TRIP_HOT,
-               .temp           = 105000,
+               .temp           = MLXSW_THERMAL_ASIC_TEMP_HOT,
+               .hyst           = MLXSW_THERMAL_HYSTERESIS_TEMP,
                .min_state      = MLXSW_THERMAL_MAX_STATE,
                .max_state      = MLXSW_THERMAL_MAX_STATE,
        },
        {       /* Critical - soft poweroff */
                .type           = THERMAL_TRIP_CRITICAL,
-               .temp           = MLXSW_THERMAL_MAX_TEMP,
+               .temp           = MLXSW_THERMAL_ASIC_TEMP_CRIT,
                .min_state      = MLXSW_THERMAL_MAX_STATE,
                .max_state      = MLXSW_THERMAL_MAX_STATE,
        }
@@ -76,6 +84,7 @@ struct mlxsw_thermal {
        struct mlxsw_core *core;
        const struct mlxsw_bus_info *bus_info;
        struct thermal_zone_device *tzdev;
+       int polling_delay;
        struct thermal_cooling_device *cdevs[MLXSW_MFCR_PWMS_MAX];
        u8 cooling_levels[MLXSW_THERMAL_MAX_STATE + 1];
        struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS];
@@ -103,6 +112,13 @@ static int mlxsw_get_cooling_device_idx(struct mlxsw_thermal *thermal,
                if (thermal->cdevs[i] == cdev)
                        return i;
 
+       /* Allow mlxsw thermal zone binding to an external cooling device */
+       for (i = 0; i < ARRAY_SIZE(mlxsw_thermal_external_allowed_cdev); i++) {
+               if (strnstr(cdev->type, mlxsw_thermal_external_allowed_cdev[i],
+                           sizeof(cdev->type)))
+                       return 0;
+       }
+
        return -ENODEV;
 }
 
@@ -172,7 +188,7 @@ static int mlxsw_thermal_set_mode(struct thermal_zone_device *tzdev,
        mutex_lock(&tzdev->lock);
 
        if (mode == THERMAL_DEVICE_ENABLED)
-               tzdev->polling_delay = MLXSW_THERMAL_POLL_INT;
+               tzdev->polling_delay = thermal->polling_delay;
        else
                tzdev->polling_delay = 0;
 
@@ -237,13 +253,31 @@ static int mlxsw_thermal_set_trip_temp(struct thermal_zone_device *tzdev,
        struct mlxsw_thermal *thermal = tzdev->devdata;
 
        if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS ||
-           temp > MLXSW_THERMAL_MAX_TEMP)
+           temp > MLXSW_THERMAL_ASIC_TEMP_CRIT)
                return -EINVAL;
 
        thermal->trips[trip].temp = temp;
        return 0;
 }
 
+static int mlxsw_thermal_get_trip_hyst(struct thermal_zone_device *tzdev,
+                                      int trip, int *p_hyst)
+{
+       struct mlxsw_thermal *thermal = tzdev->devdata;
+
+       *p_hyst = thermal->trips[trip].hyst;
+       return 0;
+}
+
+static int mlxsw_thermal_set_trip_hyst(struct thermal_zone_device *tzdev,
+                                      int trip, int hyst)
+{
+       struct mlxsw_thermal *thermal = tzdev->devdata;
+
+       thermal->trips[trip].hyst = hyst;
+       return 0;
+}
+
 static struct thermal_zone_device_ops mlxsw_thermal_ops = {
        .bind = mlxsw_thermal_bind,
        .unbind = mlxsw_thermal_unbind,
@@ -253,6 +287,8 @@ static struct thermal_zone_device_ops mlxsw_thermal_ops = {
        .get_trip_type  = mlxsw_thermal_get_trip_type,
        .get_trip_temp  = mlxsw_thermal_get_trip_temp,
        .set_trip_temp  = mlxsw_thermal_set_trip_temp,
+       .get_trip_hyst  = mlxsw_thermal_get_trip_hyst,
+       .set_trip_hyst  = mlxsw_thermal_set_trip_hyst,
 };
 
 static int mlxsw_thermal_get_max_state(struct thermal_cooling_device *cdev,
@@ -407,8 +443,9 @@ int mlxsw_thermal_init(struct mlxsw_core *core,
                if (pwm_active & BIT(i)) {
                        struct thermal_cooling_device *cdev;
 
-                       cdev = thermal_cooling_device_register("Fan", thermal,
-                                                       &mlxsw_cooling_ops);
+                       cdev = thermal_cooling_device_register("mlxsw_fan",
+                                                              thermal,
+                                                              &mlxsw_cooling_ops);
                        if (IS_ERR(cdev)) {
                                err = PTR_ERR(cdev);
                                dev_err(dev, "Failed to register cooling device\n");
@@ -423,13 +460,17 @@ int mlxsw_thermal_init(struct mlxsw_core *core,
                thermal->cooling_levels[i] = max(MLXSW_THERMAL_SPEED_MIN_LEVEL,
                                                 i);
 
+       thermal->polling_delay = bus_info->low_frequency ?
+                                MLXSW_THERMAL_SLOW_POLL_INT :
+                                MLXSW_THERMAL_POLL_INT;
+
        thermal->tzdev = thermal_zone_device_register("mlxsw",
                                                      MLXSW_THERMAL_NUM_TRIPS,
                                                      MLXSW_THERMAL_TRIP_MASK,
                                                      thermal,
                                                      &mlxsw_thermal_ops,
                                                      NULL, 0,
-                                                     MLXSW_THERMAL_POLL_INT);
+                                                     thermal->polling_delay);
        if (IS_ERR(thermal->tzdev)) {
                err = PTR_ERR(thermal->tzdev);
                dev_err(dev, "Failed to register thermal zone\n");
index 798bd5a..a87ca6b 100644 (file)
@@ -503,6 +503,7 @@ static int mlxsw_i2c_probe(struct i2c_client *client,
        mlxsw_i2c->bus_info.device_kind = id->name;
        mlxsw_i2c->bus_info.device_name = client->name;
        mlxsw_i2c->bus_info.dev = &client->dev;
+       mlxsw_i2c->bus_info.low_frequency = true;
        mlxsw_i2c->dev = &client->dev;
 
        err = mlxsw_core_bus_device_register(&mlxsw_i2c->bus_info,
index 5f8066a..cbd0193 100644 (file)
@@ -2199,6 +2199,14 @@ MLXSW_ITEM32(reg, pagt, size, 0x00, 0, 8);
  */
 MLXSW_ITEM32(reg, pagt, acl_group_id, 0x08, 0, 16);
 
+/* reg_pagt_multi
+ * Multi-ACL
+ * 0 - This ACL is the last ACL in the multi-ACL
+ * 1 - This ACL is part of a multi-ACL
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, pagt, multi, 0x30, 31, 1, 0x04, 0x00, false);
+
 /* reg_pagt_acl_id
  * ACL identifier
  * Access: RW
@@ -2212,12 +2220,13 @@ static inline void mlxsw_reg_pagt_pack(char *payload, u16 acl_group_id)
 }
 
 static inline void mlxsw_reg_pagt_acl_id_pack(char *payload, int index,
-                                             u16 acl_id)
+                                             u16 acl_id, bool multi)
 {
        u8 size = mlxsw_reg_pagt_size_get(payload);
 
        if (index >= size)
                mlxsw_reg_pagt_size_set(payload, index + 1);
+       mlxsw_reg_pagt_multi_set(payload, index, multi);
        mlxsw_reg_pagt_acl_id_set(payload, index, acl_id);
 }
 
@@ -7866,6 +7875,35 @@ static inline void mlxsw_reg_mfsl_unpack(char *payload, u8 tacho,
                *p_tach_max = mlxsw_reg_mfsl_tach_max_get(payload);
 }
 
+/* FORE - Fan Out of Range Event Register
+ * --------------------------------------
+ * This register reports the status of the controlled fans compared to the
+ * range defined by the MFSL register.
+ */
+#define MLXSW_REG_FORE_ID 0x9007
+#define MLXSW_REG_FORE_LEN 0x0C
+
+MLXSW_REG_DEFINE(fore, MLXSW_REG_FORE_ID, MLXSW_REG_FORE_LEN);
+
+/* fan_under_limit
+ * Fan speed is below the low limit defined in MFSL register. Each bit relates
+ * to a single tachometer and indicates the specific tachometer reading is
+ * below the threshold.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, fore, fan_under_limit, 0x00, 16, 10);
+
+static inline void mlxsw_reg_fore_unpack(char *payload, u8 tacho,
+                                        bool *fault)
+{
+       u16 limit;
+
+       if (fault) {
+               limit = mlxsw_reg_fore_fan_under_limit_get(payload);
+               *fault = limit & BIT(tacho);
+       }
+}
+
 /* MTCAP - Management Temperature Capabilities
  * -------------------------------------------
  * This register exposes the capabilities of the device and
@@ -7992,6 +8030,80 @@ static inline void mlxsw_reg_mtmp_unpack(char *payload, unsigned int *p_temp,
                mlxsw_reg_mtmp_sensor_name_memcpy_from(payload, sensor_name);
 }
 
+/* MTBR - Management Temperature Bulk Register
+ * -------------------------------------------
+ * This register is used for bulk temperature reading.
+ */
+#define MLXSW_REG_MTBR_ID 0x900F
+#define MLXSW_REG_MTBR_BASE_LEN 0x10 /* base length, without records */
+#define MLXSW_REG_MTBR_REC_LEN 0x04 /* record length */
+#define MLXSW_REG_MTBR_REC_MAX_COUNT 47 /* firmware limitation */
+#define MLXSW_REG_MTBR_LEN (MLXSW_REG_MTBR_BASE_LEN +  \
+                           MLXSW_REG_MTBR_REC_LEN *    \
+                           MLXSW_REG_MTBR_REC_MAX_COUNT)
+
+MLXSW_REG_DEFINE(mtbr, MLXSW_REG_MTBR_ID, MLXSW_REG_MTBR_LEN);
+
+/* reg_mtbr_base_sensor_index
+ * Base sensors index to access (0 - ASIC sensor, 1-63 - ambient sensors,
+ * 64-127 are mapped to the SFP+/QSFP modules sequentially).
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, mtbr, base_sensor_index, 0x00, 0, 7);
+
+/* reg_mtbr_num_rec
+ * Request: Number of records to read
+ * Response: Number of records read
+ * See above description for more details.
+ * Range 1..255
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mtbr, num_rec, 0x04, 0, 8);
+
+/* reg_mtbr_rec_max_temp
+ * The highest measured temperature from the sensor.
+ * When the bit mte is cleared, the field max_temperature is reserved.
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, mtbr, rec_max_temp, MLXSW_REG_MTBR_BASE_LEN, 16,
+                    16, MLXSW_REG_MTBR_REC_LEN, 0x00, false);
+
+/* reg_mtbr_rec_temp
+ * Temperature reading from the sensor. Reading is in 0..125 Celsius
+ * degrees units.
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, mtbr, rec_temp, MLXSW_REG_MTBR_BASE_LEN, 0, 16,
+                    MLXSW_REG_MTBR_REC_LEN, 0x00, false);
+
+static inline void mlxsw_reg_mtbr_pack(char *payload, u8 base_sensor_index,
+                                      u8 num_rec)
+{
+       MLXSW_REG_ZERO(mtbr, payload);
+       mlxsw_reg_mtbr_base_sensor_index_set(payload, base_sensor_index);
+       mlxsw_reg_mtbr_num_rec_set(payload, num_rec);
+}
+
+/* Error codes from temperatute reading */
+enum mlxsw_reg_mtbr_temp_status {
+       MLXSW_REG_MTBR_NO_CONN          = 0x8000,
+       MLXSW_REG_MTBR_NO_TEMP_SENS     = 0x8001,
+       MLXSW_REG_MTBR_INDEX_NA         = 0x8002,
+       MLXSW_REG_MTBR_BAD_SENS_INFO    = 0x8003,
+};
+
+/* Base index for reading modules temperature */
+#define MLXSW_REG_MTBR_BASE_MODULE_INDEX 64
+
+static inline void mlxsw_reg_mtbr_temp_unpack(char *payload, int rec_ind,
+                                             u16 *p_temp, u16 *p_max_temp)
+{
+       if (p_temp)
+               *p_temp = mlxsw_reg_mtbr_rec_temp_get(payload, rec_ind);
+       if (p_max_temp)
+               *p_max_temp = mlxsw_reg_mtbr_rec_max_temp_get(payload, rec_ind);
+}
+
 /* MCIA - Management Cable Info Access
  * -----------------------------------
  * MCIA register is used to access the SFP+ and QSFP connector's EPROM.
@@ -8046,13 +8158,41 @@ MLXSW_ITEM32(reg, mcia, device_address, 0x04, 0, 16);
  */
 MLXSW_ITEM32(reg, mcia, size, 0x08, 0, 16);
 
-#define MLXSW_SP_REG_MCIA_EEPROM_SIZE 48
+#define MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH      256
+#define MLXSW_REG_MCIA_EEPROM_SIZE             48
+#define MLXSW_REG_MCIA_I2C_ADDR_LOW            0x50
+#define MLXSW_REG_MCIA_I2C_ADDR_HIGH           0x51
+#define MLXSW_REG_MCIA_PAGE0_LO_OFF            0xa0
+#define MLXSW_REG_MCIA_TH_ITEM_SIZE            2
+#define MLXSW_REG_MCIA_TH_PAGE_NUM             3
+#define MLXSW_REG_MCIA_PAGE0_LO                        0
+#define MLXSW_REG_MCIA_TH_PAGE_OFF             0x80
+
+enum mlxsw_reg_mcia_eeprom_module_info_rev_id {
+       MLXSW_REG_MCIA_EEPROM_MODULE_INFO_REV_ID_UNSPC  = 0x00,
+       MLXSW_REG_MCIA_EEPROM_MODULE_INFO_REV_ID_8436   = 0x01,
+       MLXSW_REG_MCIA_EEPROM_MODULE_INFO_REV_ID_8636   = 0x03,
+};
+
+enum mlxsw_reg_mcia_eeprom_module_info_id {
+       MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_SFP        = 0x03,
+       MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP       = 0x0C,
+       MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP_PLUS  = 0x0D,
+       MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP28     = 0x11,
+       MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP_DD    = 0x18,
+};
+
+enum mlxsw_reg_mcia_eeprom_module_info {
+       MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID,
+       MLXSW_REG_MCIA_EEPROM_MODULE_INFO_REV_ID,
+       MLXSW_REG_MCIA_EEPROM_MODULE_INFO_SIZE,
+};
 
 /* reg_mcia_eeprom
  * Bytes to read/write.
  * Access: RW
  */
-MLXSW_ITEM_BUF(reg, mcia, eeprom, 0x10, MLXSW_SP_REG_MCIA_EEPROM_SIZE);
+MLXSW_ITEM_BUF(reg, mcia, eeprom, 0x10, MLXSW_REG_MCIA_EEPROM_SIZE);
 
 static inline void mlxsw_reg_mcia_pack(char *payload, u8 module, u8 lock,
                                       u8 page_number, u16 device_addr,
@@ -9740,8 +9880,10 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
        MLXSW_REG(mfsc),
        MLXSW_REG(mfsm),
        MLXSW_REG(mfsl),
+       MLXSW_REG(fore),
        MLXSW_REG(mtcap),
        MLXSW_REG(mtmp),
+       MLXSW_REG(mtbr),
        MLXSW_REG(mcia),
        MLXSW_REG(mpat),
        MLXSW_REG(mpar),
index 8dd808b..abac923 100644 (file)
@@ -2723,23 +2723,23 @@ static int mlxsw_sp_query_module_eeprom(struct mlxsw_sp_port *mlxsw_sp_port,
                                        unsigned int *p_read_size)
 {
        struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
-       char eeprom_tmp[MLXSW_SP_REG_MCIA_EEPROM_SIZE];
+       char eeprom_tmp[MLXSW_REG_MCIA_EEPROM_SIZE];
        char mcia_pl[MLXSW_REG_MCIA_LEN];
        u16 i2c_addr;
        int status;
        int err;
 
-       size = min_t(u16, size, MLXSW_SP_REG_MCIA_EEPROM_SIZE);
+       size = min_t(u16, size, MLXSW_REG_MCIA_EEPROM_SIZE);
 
-       if (offset < MLXSW_SP_EEPROM_PAGE_LENGTH &&
-           offset + size > MLXSW_SP_EEPROM_PAGE_LENGTH)
+       if (offset < MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH &&
+           offset + size > MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH)
                /* Cross pages read, read until offset 256 in low page */
-               size = MLXSW_SP_EEPROM_PAGE_LENGTH - offset;
+               size = MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH - offset;
 
-       i2c_addr = MLXSW_SP_I2C_ADDR_LOW;
-       if (offset >= MLXSW_SP_EEPROM_PAGE_LENGTH) {
-               i2c_addr = MLXSW_SP_I2C_ADDR_HIGH;
-               offset -= MLXSW_SP_EEPROM_PAGE_LENGTH;
+       i2c_addr = MLXSW_REG_MCIA_I2C_ADDR_LOW;
+       if (offset >= MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH) {
+               i2c_addr = MLXSW_REG_MCIA_I2C_ADDR_HIGH;
+               offset -= MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH;
        }
 
        mlxsw_reg_mcia_pack(mcia_pl, mlxsw_sp_port->mapping.module,
@@ -2760,55 +2760,37 @@ static int mlxsw_sp_query_module_eeprom(struct mlxsw_sp_port *mlxsw_sp_port,
        return 0;
 }
 
-enum mlxsw_sp_eeprom_module_info_rev_id {
-       MLXSW_SP_EEPROM_MODULE_INFO_REV_ID_UNSPC      = 0x00,
-       MLXSW_SP_EEPROM_MODULE_INFO_REV_ID_8436       = 0x01,
-       MLXSW_SP_EEPROM_MODULE_INFO_REV_ID_8636       = 0x03,
-};
-
-enum mlxsw_sp_eeprom_module_info_id {
-       MLXSW_SP_EEPROM_MODULE_INFO_ID_SFP              = 0x03,
-       MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP             = 0x0C,
-       MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP_PLUS        = 0x0D,
-       MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP28           = 0x11,
-};
-
-enum mlxsw_sp_eeprom_module_info {
-       MLXSW_SP_EEPROM_MODULE_INFO_ID,
-       MLXSW_SP_EEPROM_MODULE_INFO_REV_ID,
-       MLXSW_SP_EEPROM_MODULE_INFO_SIZE,
-};
-
 static int mlxsw_sp_get_module_info(struct net_device *netdev,
                                    struct ethtool_modinfo *modinfo)
 {
        struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(netdev);
-       u8 module_info[MLXSW_SP_EEPROM_MODULE_INFO_SIZE];
+       u8 module_info[MLXSW_REG_MCIA_EEPROM_MODULE_INFO_SIZE];
+       u16 offset = MLXSW_REG_MCIA_EEPROM_MODULE_INFO_SIZE;
        u8 module_rev_id, module_id;
        unsigned int read_size;
        int err;
 
-       err = mlxsw_sp_query_module_eeprom(mlxsw_sp_port, 0,
-                                          MLXSW_SP_EEPROM_MODULE_INFO_SIZE,
+       err = mlxsw_sp_query_module_eeprom(mlxsw_sp_port, 0, offset,
                                           module_info, &read_size);
        if (err)
                return err;
 
-       if (read_size < MLXSW_SP_EEPROM_MODULE_INFO_SIZE)
+       if (read_size < offset)
                return -EIO;
 
-       module_rev_id = module_info[MLXSW_SP_EEPROM_MODULE_INFO_REV_ID];
-       module_id = module_info[MLXSW_SP_EEPROM_MODULE_INFO_ID];
+       module_rev_id = module_info[MLXSW_REG_MCIA_EEPROM_MODULE_INFO_REV_ID];
+       module_id = module_info[MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID];
 
        switch (module_id) {
-       case MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP:
+       case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP:
                modinfo->type       = ETH_MODULE_SFF_8436;
                modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN;
                break;
-       case MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP_PLUS:
-       case MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP28:
-               if (module_id  == MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP28 ||
-                   module_rev_id >= MLXSW_SP_EEPROM_MODULE_INFO_REV_ID_8636) {
+       case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP_PLUS: /* fall-through */
+       case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP28:
+               if (module_id == MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP28 ||
+                   module_rev_id >=
+                   MLXSW_REG_MCIA_EEPROM_MODULE_INFO_REV_ID_8636) {
                        modinfo->type       = ETH_MODULE_SFF_8636;
                        modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN;
                } else {
@@ -2816,7 +2798,7 @@ static int mlxsw_sp_get_module_info(struct net_device *netdev,
                        modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN;
                }
                break;
-       case MLXSW_SP_EEPROM_MODULE_INFO_ID_SFP:
+       case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_SFP:
                modinfo->type       = ETH_MODULE_SFF_8472;
                modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
                break;
@@ -4413,6 +4395,71 @@ static void mlxsw_sp_params_unregister(struct mlxsw_core *mlxsw_core)
                                  ARRAY_SIZE(mlxsw_sp_devlink_params));
 }
 
+static int
+mlxsw_sp_params_acl_region_rehash_intrvl_get(struct devlink *devlink, u32 id,
+                                            struct devlink_param_gset_ctx *ctx)
+{
+       struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
+       struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+
+       ctx->val.vu32 = mlxsw_sp_acl_region_rehash_intrvl_get(mlxsw_sp);
+       return 0;
+}
+
+static int
+mlxsw_sp_params_acl_region_rehash_intrvl_set(struct devlink *devlink, u32 id,
+                                            struct devlink_param_gset_ctx *ctx)
+{
+       struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
+       struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+
+       return mlxsw_sp_acl_region_rehash_intrvl_set(mlxsw_sp, ctx->val.vu32);
+}
+
+static const struct devlink_param mlxsw_sp2_devlink_params[] = {
+       DEVLINK_PARAM_DRIVER(MLXSW_DEVLINK_PARAM_ID_ACL_REGION_REHASH_INTERVAL,
+                            "acl_region_rehash_interval",
+                            DEVLINK_PARAM_TYPE_U32,
+                            BIT(DEVLINK_PARAM_CMODE_RUNTIME),
+                            mlxsw_sp_params_acl_region_rehash_intrvl_get,
+                            mlxsw_sp_params_acl_region_rehash_intrvl_set,
+                            NULL),
+};
+
+static int mlxsw_sp2_params_register(struct mlxsw_core *mlxsw_core)
+{
+       struct devlink *devlink = priv_to_devlink(mlxsw_core);
+       union devlink_param_value value;
+       int err;
+
+       err = mlxsw_sp_params_register(mlxsw_core);
+       if (err)
+               return err;
+
+       err = devlink_params_register(devlink, mlxsw_sp2_devlink_params,
+                                     ARRAY_SIZE(mlxsw_sp2_devlink_params));
+       if (err)
+               goto err_devlink_params_register;
+
+       value.vu32 = 0;
+       devlink_param_driverinit_value_set(devlink,
+                                          MLXSW_DEVLINK_PARAM_ID_ACL_REGION_REHASH_INTERVAL,
+                                          value);
+       return 0;
+
+err_devlink_params_register:
+       mlxsw_sp_params_unregister(mlxsw_core);
+       return err;
+}
+
+static void mlxsw_sp2_params_unregister(struct mlxsw_core *mlxsw_core)
+{
+       devlink_params_unregister(priv_to_devlink(mlxsw_core),
+                                 mlxsw_sp2_devlink_params,
+                                 ARRAY_SIZE(mlxsw_sp2_devlink_params));
+       mlxsw_sp_params_unregister(mlxsw_core);
+}
+
 static struct mlxsw_driver mlxsw_sp1_driver = {
        .kind                           = mlxsw_sp1_driver_name,
        .priv_size                      = sizeof(struct mlxsw_sp),
@@ -4461,8 +4508,8 @@ static struct mlxsw_driver mlxsw_sp2_driver = {
        .sb_occ_tc_port_bind_get        = mlxsw_sp_sb_occ_tc_port_bind_get,
        .txhdr_construct                = mlxsw_sp_txhdr_construct,
        .resources_register             = mlxsw_sp2_resources_register,
-       .params_register                = mlxsw_sp_params_register,
-       .params_unregister              = mlxsw_sp_params_unregister,
+       .params_register                = mlxsw_sp2_params_register,
+       .params_unregister              = mlxsw_sp2_params_unregister,
        .txhdr_len                      = MLXSW_TXHDR_LEN,
        .profile                        = &mlxsw_sp2_config_profile,
        .res_query_enabled              = true,
@@ -4706,9 +4753,6 @@ static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port,
        err = mlxsw_sp_lag_col_port_add(mlxsw_sp_port, lag_id, port_index);
        if (err)
                goto err_col_port_add;
-       err = mlxsw_sp_lag_col_port_enable(mlxsw_sp_port, lag_id);
-       if (err)
-               goto err_col_port_enable;
 
        mlxsw_core_lag_mapping_set(mlxsw_sp->core, lag_id, port_index,
                                   mlxsw_sp_port->local_port);
@@ -4722,8 +4766,6 @@ static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port,
 
        return 0;
 
-err_col_port_enable:
-       mlxsw_sp_lag_col_port_remove(mlxsw_sp_port, lag_id);
 err_col_port_add:
        if (!lag->ref_count)
                mlxsw_sp_lag_destroy(mlxsw_sp, lag_id);
@@ -4742,7 +4784,6 @@ static void mlxsw_sp_port_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port,
        lag = mlxsw_sp_lag_get(mlxsw_sp, lag_id);
        WARN_ON(lag->ref_count == 0);
 
-       mlxsw_sp_lag_col_port_disable(mlxsw_sp_port, lag_id);
        mlxsw_sp_lag_col_port_remove(mlxsw_sp_port, lag_id);
 
        /* Any VLANs configured on the port are no longer valid */
@@ -4787,21 +4828,56 @@ static int mlxsw_sp_lag_dist_port_remove(struct mlxsw_sp_port *mlxsw_sp_port,
        return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sldr), sldr_pl);
 }
 
-static int mlxsw_sp_port_lag_tx_en_set(struct mlxsw_sp_port *mlxsw_sp_port,
-                                      bool lag_tx_enabled)
+static int
+mlxsw_sp_port_lag_col_dist_enable(struct mlxsw_sp_port *mlxsw_sp_port)
 {
-       if (lag_tx_enabled)
-               return mlxsw_sp_lag_dist_port_add(mlxsw_sp_port,
-                                                 mlxsw_sp_port->lag_id);
-       else
-               return mlxsw_sp_lag_dist_port_remove(mlxsw_sp_port,
-                                                    mlxsw_sp_port->lag_id);
+       int err;
+
+       err = mlxsw_sp_lag_col_port_enable(mlxsw_sp_port,
+                                          mlxsw_sp_port->lag_id);
+       if (err)
+               return err;
+
+       err = mlxsw_sp_lag_dist_port_add(mlxsw_sp_port, mlxsw_sp_port->lag_id);
+       if (err)
+               goto err_dist_port_add;
+
+       return 0;
+
+err_dist_port_add:
+       mlxsw_sp_lag_col_port_disable(mlxsw_sp_port, mlxsw_sp_port->lag_id);
+       return err;
+}
+
+static int
+mlxsw_sp_port_lag_col_dist_disable(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+       int err;
+
+       err = mlxsw_sp_lag_dist_port_remove(mlxsw_sp_port,
+                                           mlxsw_sp_port->lag_id);
+       if (err)
+               return err;
+
+       err = mlxsw_sp_lag_col_port_disable(mlxsw_sp_port,
+                                           mlxsw_sp_port->lag_id);
+       if (err)
+               goto err_col_port_disable;
+
+       return 0;
+
+err_col_port_disable:
+       mlxsw_sp_lag_dist_port_add(mlxsw_sp_port, mlxsw_sp_port->lag_id);
+       return err;
 }
 
 static int mlxsw_sp_port_lag_changed(struct mlxsw_sp_port *mlxsw_sp_port,
                                     struct netdev_lag_lower_state_info *info)
 {
-       return mlxsw_sp_port_lag_tx_en_set(mlxsw_sp_port, info->tx_enabled);
+       if (info->tx_enabled)
+               return mlxsw_sp_port_lag_col_dist_enable(mlxsw_sp_port);
+       else
+               return mlxsw_sp_port_lag_col_dist_disable(mlxsw_sp_port);
 }
 
 static int mlxsw_sp_port_stp_set(struct mlxsw_sp_port *mlxsw_sp_port,
@@ -5024,8 +5100,7 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
                                err = mlxsw_sp_port_lag_join(mlxsw_sp_port,
                                                             upper_dev);
                        } else {
-                               mlxsw_sp_port_lag_tx_en_set(mlxsw_sp_port,
-                                                           false);
+                               mlxsw_sp_port_lag_col_dist_disable(mlxsw_sp_port);
                                mlxsw_sp_port_lag_leave(mlxsw_sp_port,
                                                        upper_dev);
                        }
index 4fe0996..ceebc91 100644 (file)
@@ -690,6 +690,8 @@ struct mlxsw_sp_fid *mlxsw_sp_acl_dummy_fid(struct mlxsw_sp *mlxsw_sp);
 
 int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp);
 void mlxsw_sp_acl_fini(struct mlxsw_sp *mlxsw_sp);
+u32 mlxsw_sp_acl_region_rehash_intrvl_get(struct mlxsw_sp *mlxsw_sp);
+int mlxsw_sp_acl_region_rehash_intrvl_set(struct mlxsw_sp *mlxsw_sp, u32 val);
 
 /* spectrum_acl_tcam.c */
 struct mlxsw_sp_acl_tcam;
@@ -704,10 +706,13 @@ struct mlxsw_sp_acl_tcam_ops {
        size_t region_priv_size;
        int (*region_init)(struct mlxsw_sp *mlxsw_sp, void *region_priv,
                           void *tcam_priv,
-                          struct mlxsw_sp_acl_tcam_region *region);
+                          struct mlxsw_sp_acl_tcam_region *region,
+                          void *hints_priv);
        void (*region_fini)(struct mlxsw_sp *mlxsw_sp, void *region_priv);
        int (*region_associate)(struct mlxsw_sp *mlxsw_sp,
                                struct mlxsw_sp_acl_tcam_region *region);
+       void * (*region_rehash_hints_get)(void *region_priv);
+       void (*region_rehash_hints_put)(void *hints_priv);
        size_t chunk_priv_size;
        void (*chunk_init)(void *region_priv, void *chunk_priv,
                           unsigned int priority);
index 6e44452..3a636f7 100644 (file)
@@ -112,7 +112,8 @@ mlxsw_sp1_acl_ctcam_region_catchall_del(struct mlxsw_sp *mlxsw_sp,
 static int
 mlxsw_sp1_acl_tcam_region_init(struct mlxsw_sp *mlxsw_sp, void *region_priv,
                               void *tcam_priv,
-                              struct mlxsw_sp_acl_tcam_region *_region)
+                              struct mlxsw_sp_acl_tcam_region *_region,
+                              void *hints_priv)
 {
        struct mlxsw_sp1_acl_tcam_region *region = region_priv;
        int err;
index d380b34..6c66a0f 100644 (file)
@@ -139,7 +139,8 @@ static void mlxsw_sp2_acl_tcam_fini(struct mlxsw_sp *mlxsw_sp, void *priv)
 static int
 mlxsw_sp2_acl_tcam_region_init(struct mlxsw_sp *mlxsw_sp, void *region_priv,
                               void *tcam_priv,
-                              struct mlxsw_sp_acl_tcam_region *_region)
+                              struct mlxsw_sp_acl_tcam_region *_region,
+                              void *hints_priv)
 {
        struct mlxsw_sp2_acl_tcam_region *region = region_priv;
        struct mlxsw_sp2_acl_tcam *tcam = tcam_priv;
@@ -147,7 +148,8 @@ mlxsw_sp2_acl_tcam_region_init(struct mlxsw_sp *mlxsw_sp, void *region_priv,
        region->region = _region;
 
        return mlxsw_sp_acl_atcam_region_init(mlxsw_sp, &tcam->atcam,
-                                             &region->aregion, _region,
+                                             &region->aregion,
+                                             _region, hints_priv,
                                              &mlxsw_sp2_acl_ctcam_region_ops);
 }
 
@@ -166,6 +168,18 @@ mlxsw_sp2_acl_tcam_region_associate(struct mlxsw_sp *mlxsw_sp,
        return mlxsw_sp_acl_atcam_region_associate(mlxsw_sp, region->id);
 }
 
+static void *mlxsw_sp2_acl_tcam_region_rehash_hints_get(void *region_priv)
+{
+       struct mlxsw_sp2_acl_tcam_region *region = region_priv;
+
+       return mlxsw_sp_acl_atcam_rehash_hints_get(&region->aregion);
+}
+
+static void mlxsw_sp2_acl_tcam_region_rehash_hints_put(void *hints_priv)
+{
+       mlxsw_sp_acl_atcam_rehash_hints_put(hints_priv);
+}
+
 static void mlxsw_sp2_acl_tcam_chunk_init(void *region_priv, void *chunk_priv,
                                          unsigned int priority)
 {
@@ -243,6 +257,8 @@ const struct mlxsw_sp_acl_tcam_ops mlxsw_sp2_acl_tcam_ops = {
        .region_init            = mlxsw_sp2_acl_tcam_region_init,
        .region_fini            = mlxsw_sp2_acl_tcam_region_fini,
        .region_associate       = mlxsw_sp2_acl_tcam_region_associate,
+       .region_rehash_hints_get = mlxsw_sp2_acl_tcam_region_rehash_hints_get,
+       .region_rehash_hints_put = mlxsw_sp2_acl_tcam_region_rehash_hints_put,
        .chunk_priv_size        = sizeof(struct mlxsw_sp2_acl_tcam_chunk),
        .chunk_init             = mlxsw_sp2_acl_tcam_chunk_init,
        .chunk_fini             = mlxsw_sp2_acl_tcam_chunk_fini,
index 2649d12..a146a44 100644 (file)
@@ -640,7 +640,7 @@ mlxsw_sp_acl_rule_create(struct mlxsw_sp *mlxsw_sp,
        int err;
 
        mlxsw_sp_acl_ruleset_ref_inc(ruleset);
-       rule = kzalloc(sizeof(*rule) + ops->rule_priv_size(mlxsw_sp),
+       rule = kzalloc(sizeof(*rule) + ops->rule_priv_size,
                       GFP_KERNEL);
        if (!rule) {
                err = -ENOMEM;
@@ -912,3 +912,19 @@ void mlxsw_sp_acl_fini(struct mlxsw_sp *mlxsw_sp)
        mlxsw_afk_destroy(acl->afk);
        kfree(acl);
 }
+
+u32 mlxsw_sp_acl_region_rehash_intrvl_get(struct mlxsw_sp *mlxsw_sp)
+{
+       struct mlxsw_sp_acl *acl = mlxsw_sp->acl;
+
+       return mlxsw_sp_acl_tcam_vregion_rehash_intrvl_get(mlxsw_sp,
+                                                          &acl->tcam);
+}
+
+int mlxsw_sp_acl_region_rehash_intrvl_set(struct mlxsw_sp *mlxsw_sp, u32 val)
+{
+       struct mlxsw_sp_acl *acl = mlxsw_sp->acl;
+
+       return mlxsw_sp_acl_tcam_vregion_rehash_intrvl_set(mlxsw_sp,
+                                                          &acl->tcam, val);
+}
index a74a390..ded4cf6 100644 (file)
@@ -318,6 +318,7 @@ mlxsw_sp_acl_atcam_region_init(struct mlxsw_sp *mlxsw_sp,
                               struct mlxsw_sp_acl_atcam *atcam,
                               struct mlxsw_sp_acl_atcam_region *aregion,
                               struct mlxsw_sp_acl_tcam_region *region,
+                              void *hints_priv,
                               const struct mlxsw_sp_acl_ctcam_region_ops *ops)
 {
        int err;
@@ -334,7 +335,7 @@ mlxsw_sp_acl_atcam_region_init(struct mlxsw_sp *mlxsw_sp,
        err = aregion->ops->init(aregion);
        if (err)
                goto err_ops_init;
-       err = mlxsw_sp_acl_erp_region_init(aregion);
+       err = mlxsw_sp_acl_erp_region_init(aregion, hints_priv);
        if (err)
                goto err_erp_region_init;
        err = mlxsw_sp_acl_ctcam_region_init(mlxsw_sp, &aregion->cregion,
@@ -634,3 +635,14 @@ void mlxsw_sp_acl_atcam_fini(struct mlxsw_sp *mlxsw_sp,
 {
        mlxsw_sp_acl_erps_fini(mlxsw_sp, atcam);
 }
+
+void *
+mlxsw_sp_acl_atcam_rehash_hints_get(struct mlxsw_sp_acl_atcam_region *aregion)
+{
+       return mlxsw_sp_acl_erp_rehash_hints_get(aregion);
+}
+
+void mlxsw_sp_acl_atcam_rehash_hints_put(void *hints_priv)
+{
+       mlxsw_sp_acl_erp_rehash_hints_put(hints_priv);
+}
index 2941967..e935c36 100644 (file)
@@ -1200,6 +1200,32 @@ mlxsw_sp_acl_erp_delta_fill(const struct mlxsw_sp_acl_erp_key *parent_key,
        return 0;
 }
 
+static bool mlxsw_sp_acl_erp_delta_check(void *priv, const void *parent_obj,
+                                        const void *obj)
+{
+       const struct mlxsw_sp_acl_erp_key *parent_key = parent_obj;
+       const struct mlxsw_sp_acl_erp_key *key = obj;
+       u16 delta_start;
+       u8 delta_mask;
+       int err;
+
+       err = mlxsw_sp_acl_erp_delta_fill(parent_key, key,
+                                         &delta_start, &delta_mask);
+       return err ? false : true;
+}
+
+static int mlxsw_sp_acl_erp_hints_obj_cmp(const void *obj1, const void *obj2)
+{
+       const struct mlxsw_sp_acl_erp_key *key1 = obj1;
+       const struct mlxsw_sp_acl_erp_key *key2 = obj2;
+
+       /* For hints purposes, two objects are considered equal
+        * in case the masks are the same. Does not matter what
+        * the "ctcam" value is.
+        */
+       return memcmp(key1->mask, key2->mask, sizeof(key1->mask));
+}
+
 static void *mlxsw_sp_acl_erp_delta_create(void *priv, void *parent_obj,
                                           void *obj)
 {
@@ -1254,12 +1280,17 @@ static void mlxsw_sp_acl_erp_delta_destroy(void *priv, void *delta_priv)
        kfree(delta);
 }
 
-static void *mlxsw_sp_acl_erp_root_create(void *priv, void *obj)
+static void *mlxsw_sp_acl_erp_root_create(void *priv, void *obj,
+                                         unsigned int root_id)
 {
        struct mlxsw_sp_acl_atcam_region *aregion = priv;
        struct mlxsw_sp_acl_erp_table *erp_table = aregion->erp_table;
        struct mlxsw_sp_acl_erp_key *key = obj;
 
+       if (!key->ctcam &&
+           root_id != OBJAGG_OBJ_ROOT_ID_INVALID &&
+           root_id >= MLXSW_SP_ACL_ERP_MAX_PER_REGION)
+               return ERR_PTR(-ENOBUFS);
        return erp_table->ops->erp_create(erp_table, key);
 }
 
@@ -1273,6 +1304,8 @@ static void mlxsw_sp_acl_erp_root_destroy(void *priv, void *root_priv)
 
 static const struct objagg_ops mlxsw_sp_acl_erp_objagg_ops = {
        .obj_size = sizeof(struct mlxsw_sp_acl_erp_key),
+       .delta_check = mlxsw_sp_acl_erp_delta_check,
+       .hints_obj_cmp = mlxsw_sp_acl_erp_hints_obj_cmp,
        .delta_create = mlxsw_sp_acl_erp_delta_create,
        .delta_destroy = mlxsw_sp_acl_erp_delta_destroy,
        .root_create = mlxsw_sp_acl_erp_root_create,
@@ -1280,7 +1313,8 @@ static const struct objagg_ops mlxsw_sp_acl_erp_objagg_ops = {
 };
 
 static struct mlxsw_sp_acl_erp_table *
-mlxsw_sp_acl_erp_table_create(struct mlxsw_sp_acl_atcam_region *aregion)
+mlxsw_sp_acl_erp_table_create(struct mlxsw_sp_acl_atcam_region *aregion,
+                             struct objagg_hints *hints)
 {
        struct mlxsw_sp_acl_erp_table *erp_table;
        int err;
@@ -1290,7 +1324,7 @@ mlxsw_sp_acl_erp_table_create(struct mlxsw_sp_acl_atcam_region *aregion)
                return ERR_PTR(-ENOMEM);
 
        erp_table->objagg = objagg_create(&mlxsw_sp_acl_erp_objagg_ops,
-                                         aregion);
+                                         hints, aregion);
        if (IS_ERR(erp_table->objagg)) {
                err = PTR_ERR(erp_table->objagg);
                goto err_objagg_create;
@@ -1337,12 +1371,88 @@ mlxsw_sp_acl_erp_region_param_init(struct mlxsw_sp_acl_atcam_region *aregion)
        return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pererp), pererp_pl);
 }
 
-int mlxsw_sp_acl_erp_region_init(struct mlxsw_sp_acl_atcam_region *aregion)
+static int
+mlxsw_sp_acl_erp_hints_check(struct mlxsw_sp *mlxsw_sp,
+                            struct mlxsw_sp_acl_atcam_region *aregion,
+                            struct objagg_hints *hints, bool *p_rehash_needed)
+{
+       struct objagg *objagg = aregion->erp_table->objagg;
+       const struct objagg_stats *ostats;
+       const struct objagg_stats *hstats;
+       int err;
+
+       *p_rehash_needed = false;
+
+       ostats = objagg_stats_get(objagg);
+       if (IS_ERR(ostats)) {
+               dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to get ERP stats\n");
+               return PTR_ERR(ostats);
+       }
+
+       hstats = objagg_hints_stats_get(hints);
+       if (IS_ERR(hstats)) {
+               dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to get ERP hints stats\n");
+               err = PTR_ERR(hstats);
+               goto err_hints_stats_get;
+       }
+
+       /* Very basic criterion for now. */
+       if (hstats->root_count < ostats->root_count)
+               *p_rehash_needed = true;
+
+       err = 0;
+
+       objagg_stats_put(hstats);
+err_hints_stats_get:
+       objagg_stats_put(ostats);
+       return err;
+}
+
+void *
+mlxsw_sp_acl_erp_rehash_hints_get(struct mlxsw_sp_acl_atcam_region *aregion)
+{
+       struct mlxsw_sp *mlxsw_sp = aregion->region->mlxsw_sp;
+       struct objagg_hints *hints;
+       bool rehash_needed;
+       int err;
+
+       hints = objagg_hints_get(aregion->erp_table->objagg,
+                                OBJAGG_OPT_ALGO_SIMPLE_GREEDY);
+       if (IS_ERR(hints)) {
+               dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to create ERP hints\n");
+               return ERR_CAST(hints);
+       }
+       err = mlxsw_sp_acl_erp_hints_check(mlxsw_sp, aregion, hints,
+                                          &rehash_needed);
+       if (err)
+               goto errout;
+
+       if (!rehash_needed) {
+               err = -EAGAIN;
+               goto errout;
+       }
+       return hints;
+
+errout:
+       objagg_hints_put(hints);
+       return ERR_PTR(err);
+}
+
+void mlxsw_sp_acl_erp_rehash_hints_put(void *hints_priv)
+{
+       struct objagg_hints *hints = hints_priv;
+
+       objagg_hints_put(hints);
+}
+
+int mlxsw_sp_acl_erp_region_init(struct mlxsw_sp_acl_atcam_region *aregion,
+                                void *hints_priv)
 {
        struct mlxsw_sp_acl_erp_table *erp_table;
+       struct objagg_hints *hints = hints_priv;
        int err;
 
-       erp_table = mlxsw_sp_acl_erp_table_create(aregion);
+       erp_table = mlxsw_sp_acl_erp_table_create(aregion, hints);
        if (IS_ERR(erp_table))
                return PTR_ERR(erp_table);
        aregion->erp_table = erp_table;
index 11456e1..7e225a8 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/list.h>
 #include <linux/rhashtable.h>
 #include <linux/netdevice.h>
+#include <trace/events/mlxsw.h>
 
 #include "reg.h"
 #include "core.h"
@@ -23,6 +24,9 @@ size_t mlxsw_sp_acl_tcam_priv_size(struct mlxsw_sp *mlxsw_sp)
        return ops->priv_size;
 }
 
+#define MLXSW_SP_ACL_TCAM_VREGION_REHASH_INTRVL_DFLT 5000 /* ms */
+#define MLXSW_SP_ACL_TCAM_VREGION_REHASH_INTRVL_MIN 3000 /* ms */
+
 int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp,
                           struct mlxsw_sp_acl_tcam *tcam)
 {
@@ -33,6 +37,10 @@ int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp,
        size_t alloc_size;
        int err;
 
+       tcam->vregion_rehash_intrvl =
+                       MLXSW_SP_ACL_TCAM_VREGION_REHASH_INTRVL_DFLT;
+       INIT_LIST_HEAD(&tcam->vregion_list);
+
        max_tcam_regions = MLXSW_CORE_RES_GET(mlxsw_sp->core,
                                              ACL_MAX_TCAM_REGIONS);
        max_regions = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_REGIONS);
@@ -153,9 +161,9 @@ struct mlxsw_sp_acl_tcam_pattern {
 struct mlxsw_sp_acl_tcam_group {
        struct mlxsw_sp_acl_tcam *tcam;
        u16 id;
-       struct list_head region_list;
+       struct list_head vregion_list;
        unsigned int region_count;
-       struct rhashtable chunk_ht;
+       struct rhashtable vchunk_ht;
        struct mlxsw_sp_acl_tcam_group_ops *ops;
        const struct mlxsw_sp_acl_tcam_pattern *patterns;
        unsigned int patterns_count;
@@ -163,40 +171,77 @@ struct mlxsw_sp_acl_tcam_group {
        struct mlxsw_afk_element_usage tmplt_elusage;
 };
 
-struct mlxsw_sp_acl_tcam_chunk {
-       struct list_head list; /* Member of a TCAM region */
-       struct rhash_head ht_node; /* Member of a chunk HT */
-       unsigned int priority; /* Priority within the region and group */
+struct mlxsw_sp_acl_tcam_vregion {
+       struct mlxsw_sp_acl_tcam_region *region;
+       struct mlxsw_sp_acl_tcam_region *region2; /* Used during migration */
+       struct list_head list; /* Member of a TCAM group */
+       struct list_head tlist; /* Member of a TCAM */
+       struct list_head vchunk_list; /* List of vchunks under this vregion */
        struct mlxsw_sp_acl_tcam_group *group;
+       struct mlxsw_afk_key_info *key_info;
+       struct mlxsw_sp_acl_tcam *tcam;
+       struct delayed_work rehash_dw;
+       struct mlxsw_sp *mlxsw_sp;
+       bool failed_rollback; /* Indicates failed rollback during migration */
+};
+
+struct mlxsw_sp_acl_tcam_vchunk;
+
+struct mlxsw_sp_acl_tcam_chunk {
+       struct mlxsw_sp_acl_tcam_vchunk *vchunk;
        struct mlxsw_sp_acl_tcam_region *region;
-       unsigned int ref_count;
        unsigned long priv[0];
        /* priv has to be always the last item */
 };
 
+struct mlxsw_sp_acl_tcam_vchunk {
+       struct mlxsw_sp_acl_tcam_chunk *chunk;
+       struct mlxsw_sp_acl_tcam_chunk *chunk2; /* Used during migration */
+       struct list_head list; /* Member of a TCAM vregion */
+       struct rhash_head ht_node; /* Member of a chunk HT */
+       struct list_head ventry_list;
+       unsigned int priority; /* Priority within the vregion and group */
+       struct mlxsw_sp_acl_tcam_group *group;
+       struct mlxsw_sp_acl_tcam_vregion *vregion;
+       unsigned int ref_count;
+};
+
 struct mlxsw_sp_acl_tcam_entry {
+       struct mlxsw_sp_acl_tcam_ventry *ventry;
        struct mlxsw_sp_acl_tcam_chunk *chunk;
        unsigned long priv[0];
        /* priv has to be always the last item */
 };
 
-static const struct rhashtable_params mlxsw_sp_acl_tcam_chunk_ht_params = {
+struct mlxsw_sp_acl_tcam_ventry {
+       struct mlxsw_sp_acl_tcam_entry *entry;
+       struct list_head list; /* Member of a TCAM vchunk */
+       struct mlxsw_sp_acl_tcam_vchunk *vchunk;
+       struct mlxsw_sp_acl_rule_info *rulei;
+};
+
+static const struct rhashtable_params mlxsw_sp_acl_tcam_vchunk_ht_params = {
        .key_len = sizeof(unsigned int),
-       .key_offset = offsetof(struct mlxsw_sp_acl_tcam_chunk, priority),
-       .head_offset = offsetof(struct mlxsw_sp_acl_tcam_chunk, ht_node),
+       .key_offset = offsetof(struct mlxsw_sp_acl_tcam_vchunk, priority),
+       .head_offset = offsetof(struct mlxsw_sp_acl_tcam_vchunk, ht_node),
        .automatic_shrinking = true,
 };
 
 static int mlxsw_sp_acl_tcam_group_update(struct mlxsw_sp *mlxsw_sp,
                                          struct mlxsw_sp_acl_tcam_group *group)
 {
-       struct mlxsw_sp_acl_tcam_region *region;
+       struct mlxsw_sp_acl_tcam_vregion *vregion;
        char pagt_pl[MLXSW_REG_PAGT_LEN];
        int acl_index = 0;
 
        mlxsw_reg_pagt_pack(pagt_pl, group->id);
-       list_for_each_entry(region, &group->region_list, list)
-               mlxsw_reg_pagt_acl_id_pack(pagt_pl, acl_index++, region->id);
+       list_for_each_entry(vregion, &group->vregion_list, list) {
+               if (vregion->region2)
+                       mlxsw_reg_pagt_acl_id_pack(pagt_pl, acl_index++,
+                                                  vregion->region2->id, true);
+               mlxsw_reg_pagt_acl_id_pack(pagt_pl, acl_index++,
+                                          vregion->region->id, false);
+       }
        mlxsw_reg_pagt_size_set(pagt_pl, acl_index);
        return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pagt), pagt_pl);
 }
@@ -219,13 +264,13 @@ mlxsw_sp_acl_tcam_group_add(struct mlxsw_sp *mlxsw_sp,
                memcpy(&group->tmplt_elusage, tmplt_elusage,
                       sizeof(group->tmplt_elusage));
        }
-       INIT_LIST_HEAD(&group->region_list);
+       INIT_LIST_HEAD(&group->vregion_list);
        err = mlxsw_sp_acl_tcam_group_id_get(tcam, &group->id);
        if (err)
                return err;
 
-       err = rhashtable_init(&group->chunk_ht,
-                             &mlxsw_sp_acl_tcam_chunk_ht_params);
+       err = rhashtable_init(&group->vchunk_ht,
+                             &mlxsw_sp_acl_tcam_vchunk_ht_params);
        if (err)
                goto err_rhashtable_init;
 
@@ -241,9 +286,9 @@ static void mlxsw_sp_acl_tcam_group_del(struct mlxsw_sp *mlxsw_sp,
 {
        struct mlxsw_sp_acl_tcam *tcam = group->tcam;
 
-       rhashtable_destroy(&group->chunk_ht);
+       rhashtable_destroy(&group->vchunk_ht);
        mlxsw_sp_acl_tcam_group_id_put(tcam, group->id);
-       WARN_ON(!list_empty(&group->region_list));
+       WARN_ON(!list_empty(&group->vregion_list));
 }
 
 static int
@@ -283,140 +328,153 @@ mlxsw_sp_acl_tcam_group_id(struct mlxsw_sp_acl_tcam_group *group)
 }
 
 static unsigned int
-mlxsw_sp_acl_tcam_region_prio(struct mlxsw_sp_acl_tcam_region *region)
+mlxsw_sp_acl_tcam_vregion_prio(struct mlxsw_sp_acl_tcam_vregion *vregion)
 {
-       struct mlxsw_sp_acl_tcam_chunk *chunk;
+       struct mlxsw_sp_acl_tcam_vchunk *vchunk;
 
-       if (list_empty(&region->chunk_list))
+       if (list_empty(&vregion->vchunk_list))
                return 0;
-       /* As a priority of a region, return priority of the first chunk */
-       chunk = list_first_entry(&region->chunk_list, typeof(*chunk), list);
-       return chunk->priority;
+       /* As a priority of a vregion, return priority of the first vchunk */
+       vchunk = list_first_entry(&vregion->vchunk_list,
+                                 typeof(*vchunk), list);
+       return vchunk->priority;
 }
 
 static unsigned int
-mlxsw_sp_acl_tcam_region_max_prio(struct mlxsw_sp_acl_tcam_region *region)
+mlxsw_sp_acl_tcam_vregion_max_prio(struct mlxsw_sp_acl_tcam_vregion *vregion)
 {
-       struct mlxsw_sp_acl_tcam_chunk *chunk;
+       struct mlxsw_sp_acl_tcam_vchunk *vchunk;
 
-       if (list_empty(&region->chunk_list))
+       if (list_empty(&vregion->vchunk_list))
                return 0;
-       chunk = list_last_entry(&region->chunk_list, typeof(*chunk), list);
-       return chunk->priority;
+       vchunk = list_last_entry(&vregion->vchunk_list,
+                                typeof(*vchunk), list);
+       return vchunk->priority;
 }
 
-static void
-mlxsw_sp_acl_tcam_group_list_add(struct mlxsw_sp_acl_tcam_group *group,
-                                struct mlxsw_sp_acl_tcam_region *region)
+static int
+mlxsw_sp_acl_tcam_group_region_attach(struct mlxsw_sp *mlxsw_sp,
+                                     struct mlxsw_sp_acl_tcam_region *region)
 {
-       struct mlxsw_sp_acl_tcam_region *region2;
-       struct list_head *pos;
+       struct mlxsw_sp_acl_tcam_group *group = region->vregion->group;
+       int err;
+
+       if (group->region_count == group->tcam->max_group_size)
+               return -ENOBUFS;
+
+       err = mlxsw_sp_acl_tcam_group_update(mlxsw_sp, group);
+       if (err)
+               return err;
 
-       /* Position the region inside the list according to priority */
-       list_for_each(pos, &group->region_list) {
-               region2 = list_entry(pos, typeof(*region2), list);
-               if (mlxsw_sp_acl_tcam_region_prio(region2) >
-                   mlxsw_sp_acl_tcam_region_prio(region))
-                       break;
-       }
-       list_add_tail(&region->list, pos);
        group->region_count++;
+       return 0;
 }
 
 static void
-mlxsw_sp_acl_tcam_group_list_del(struct mlxsw_sp_acl_tcam_group *group,
-                                struct mlxsw_sp_acl_tcam_region *region)
+mlxsw_sp_acl_tcam_group_region_detach(struct mlxsw_sp *mlxsw_sp,
+                                     struct mlxsw_sp_acl_tcam_region *region)
 {
+       struct mlxsw_sp_acl_tcam_group *group = region->vregion->group;
+
        group->region_count--;
-       list_del(&region->list);
+       mlxsw_sp_acl_tcam_group_update(mlxsw_sp, group);
 }
 
 static int
-mlxsw_sp_acl_tcam_group_region_attach(struct mlxsw_sp *mlxsw_sp,
-                                     struct mlxsw_sp_acl_tcam_group *group,
-                                     struct mlxsw_sp_acl_tcam_region *region)
+mlxsw_sp_acl_tcam_group_vregion_attach(struct mlxsw_sp *mlxsw_sp,
+                                      struct mlxsw_sp_acl_tcam_group *group,
+                                      struct mlxsw_sp_acl_tcam_vregion *vregion)
 {
+       struct mlxsw_sp_acl_tcam_vregion *vregion2;
+       struct list_head *pos;
        int err;
 
-       if (group->region_count == group->tcam->max_group_size)
-               return -ENOBUFS;
-
-       mlxsw_sp_acl_tcam_group_list_add(group, region);
+       /* Position the vregion inside the list according to priority */
+       list_for_each(pos, &group->vregion_list) {
+               vregion2 = list_entry(pos, typeof(*vregion2), list);
+               if (mlxsw_sp_acl_tcam_vregion_prio(vregion2) >
+                   mlxsw_sp_acl_tcam_vregion_prio(vregion))
+                       break;
+       }
+       list_add_tail(&vregion->list, pos);
+       vregion->group = group;
 
-       err = mlxsw_sp_acl_tcam_group_update(mlxsw_sp, group);
+       err = mlxsw_sp_acl_tcam_group_region_attach(mlxsw_sp, vregion->region);
        if (err)
-               goto err_group_update;
-       region->group = group;
+               goto err_region_attach;
 
        return 0;
 
-err_group_update:
-       mlxsw_sp_acl_tcam_group_list_del(group, region);
-       mlxsw_sp_acl_tcam_group_update(mlxsw_sp, group);
+err_region_attach:
+       list_del(&vregion->list);
        return err;
 }
 
 static void
-mlxsw_sp_acl_tcam_group_region_detach(struct mlxsw_sp *mlxsw_sp,
-                                     struct mlxsw_sp_acl_tcam_region *region)
+mlxsw_sp_acl_tcam_group_vregion_detach(struct mlxsw_sp *mlxsw_sp,
+                                      struct mlxsw_sp_acl_tcam_vregion *vregion)
 {
-       struct mlxsw_sp_acl_tcam_group *group = region->group;
-
-       mlxsw_sp_acl_tcam_group_list_del(group, region);
-       mlxsw_sp_acl_tcam_group_update(mlxsw_sp, group);
+       list_del(&vregion->list);
+       if (vregion->region2)
+               mlxsw_sp_acl_tcam_group_region_detach(mlxsw_sp,
+                                                     vregion->region2);
+       mlxsw_sp_acl_tcam_group_region_detach(mlxsw_sp, vregion->region);
 }
 
-static struct mlxsw_sp_acl_tcam_region *
-mlxsw_sp_acl_tcam_group_region_find(struct mlxsw_sp_acl_tcam_group *group,
-                                   unsigned int priority,
-                                   struct mlxsw_afk_element_usage *elusage,
-                                   bool *p_need_split)
+static struct mlxsw_sp_acl_tcam_vregion *
+mlxsw_sp_acl_tcam_group_vregion_find(struct mlxsw_sp_acl_tcam_group *group,
+                                    unsigned int priority,
+                                    struct mlxsw_afk_element_usage *elusage,
+                                    bool *p_need_split)
 {
-       struct mlxsw_sp_acl_tcam_region *region, *region2;
+       struct mlxsw_sp_acl_tcam_vregion *vregion, *vregion2;
        struct list_head *pos;
        bool issubset;
 
-       list_for_each(pos, &group->region_list) {
-               region = list_entry(pos, typeof(*region), list);
+       list_for_each(pos, &group->vregion_list) {
+               vregion = list_entry(pos, typeof(*vregion), list);
 
                /* First, check if the requested priority does not rather belong
-                * under some of the next regions.
+                * under some of the next vregions.
                 */
-               if (pos->next != &group->region_list) { /* not last */
-                       region2 = list_entry(pos->next, typeof(*region2), list);
-                       if (priority >= mlxsw_sp_acl_tcam_region_prio(region2))
+               if (pos->next != &group->vregion_list) { /* not last */
+                       vregion2 = list_entry(pos->next, typeof(*vregion2),
+                                             list);
+                       if (priority >=
+                           mlxsw_sp_acl_tcam_vregion_prio(vregion2))
                                continue;
                }
 
-               issubset = mlxsw_afk_key_info_subset(region->key_info, elusage);
+               issubset = mlxsw_afk_key_info_subset(vregion->key_info,
+                                                    elusage);
 
                /* If requested element usage would not fit and the priority
-                * is lower than the currently inspected region we cannot
-                * use this region, so return NULL to indicate new region has
+                * is lower than the currently inspected vregion we cannot
+                * use this region, so return NULL to indicate new vregion has
                 * to be created.
                 */
                if (!issubset &&
-                   priority < mlxsw_sp_acl_tcam_region_prio(region))
+                   priority < mlxsw_sp_acl_tcam_vregion_prio(vregion))
                        return NULL;
 
                /* If requested element usage would not fit and the priority
-                * is higher than the currently inspected region we cannot
-                * use this region. There is still some hope that the next
-                * region would be the fit. So let it be processed and
+                * is higher than the currently inspected vregion we cannot
+                * use this vregion. There is still some hope that the next
+                * vregion would be the fit. So let it be processed and
                 * eventually break at the check right above this.
                 */
                if (!issubset &&
-                   priority > mlxsw_sp_acl_tcam_region_max_prio(region))
+                   priority > mlxsw_sp_acl_tcam_vregion_max_prio(vregion))
                        continue;
 
-               /* Indicate if the region needs to be split in order to add
+               /* Indicate if the vregion needs to be split in order to add
                 * the requested priority. Split is needed when requested
-                * element usage won't fit into the found region.
+                * element usage won't fit into the found vregion.
                 */
                *p_need_split = !issubset;
-               return region;
+               return vregion;
        }
-       return NULL; /* New region has to be created. */
+       return NULL; /* New vregion has to be created. */
 }
 
 static void
@@ -511,24 +569,19 @@ mlxsw_sp_acl_tcam_region_disable(struct mlxsw_sp *mlxsw_sp,
 static struct mlxsw_sp_acl_tcam_region *
 mlxsw_sp_acl_tcam_region_create(struct mlxsw_sp *mlxsw_sp,
                                struct mlxsw_sp_acl_tcam *tcam,
-                               struct mlxsw_afk_element_usage *elusage)
+                               struct mlxsw_sp_acl_tcam_vregion *vregion,
+                               void *hints_priv)
 {
        const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
-       struct mlxsw_afk *afk = mlxsw_sp_acl_afk(mlxsw_sp->acl);
        struct mlxsw_sp_acl_tcam_region *region;
        int err;
 
        region = kzalloc(sizeof(*region) + ops->region_priv_size, GFP_KERNEL);
        if (!region)
                return ERR_PTR(-ENOMEM);
-       INIT_LIST_HEAD(&region->chunk_list);
        region->mlxsw_sp = mlxsw_sp;
-
-       region->key_info = mlxsw_afk_key_info_get(afk, elusage);
-       if (IS_ERR(region->key_info)) {
-               err = PTR_ERR(region->key_info);
-               goto err_key_info_get;
-       }
+       region->vregion = vregion;
+       region->key_info = vregion->key_info;
 
        err = mlxsw_sp_acl_tcam_region_id_get(tcam, &region->id);
        if (err)
@@ -547,7 +600,8 @@ mlxsw_sp_acl_tcam_region_create(struct mlxsw_sp *mlxsw_sp,
        if (err)
                goto err_tcam_region_enable;
 
-       err = ops->region_init(mlxsw_sp, region->priv, tcam->priv, region);
+       err = ops->region_init(mlxsw_sp, region->priv, tcam->priv,
+                              region, hints_priv);
        if (err)
                goto err_tcam_region_init;
 
@@ -561,8 +615,6 @@ err_tcam_region_alloc:
 err_tcam_region_associate:
        mlxsw_sp_acl_tcam_region_id_put(tcam, region->id);
 err_region_id_get:
-       mlxsw_afk_key_info_put(region->key_info);
-err_key_info_get:
        kfree(region);
        return ERR_PTR(err);
 }
@@ -576,217 +628,372 @@ mlxsw_sp_acl_tcam_region_destroy(struct mlxsw_sp *mlxsw_sp,
        ops->region_fini(mlxsw_sp, region->priv);
        mlxsw_sp_acl_tcam_region_disable(mlxsw_sp, region);
        mlxsw_sp_acl_tcam_region_free(mlxsw_sp, region);
-       mlxsw_sp_acl_tcam_region_id_put(region->group->tcam, region->id);
-       mlxsw_afk_key_info_put(region->key_info);
+       mlxsw_sp_acl_tcam_region_id_put(region->vregion->group->tcam,
+                                       region->id);
        kfree(region);
 }
 
+static void
+mlxsw_sp_acl_tcam_vregion_rehash_work_schedule(struct mlxsw_sp_acl_tcam_vregion *vregion)
+{
+       unsigned long interval = vregion->tcam->vregion_rehash_intrvl;
+
+       if (!interval)
+               return;
+       mlxsw_core_schedule_dw(&vregion->rehash_dw,
+                              msecs_to_jiffies(interval));
+}
+
 static int
-mlxsw_sp_acl_tcam_chunk_assoc(struct mlxsw_sp *mlxsw_sp,
-                             struct mlxsw_sp_acl_tcam_group *group,
-                             unsigned int priority,
-                             struct mlxsw_afk_element_usage *elusage,
-                             struct mlxsw_sp_acl_tcam_chunk *chunk)
+mlxsw_sp_acl_tcam_vregion_rehash(struct mlxsw_sp *mlxsw_sp,
+                                struct mlxsw_sp_acl_tcam_vregion *vregion);
+
+static void mlxsw_sp_acl_tcam_vregion_rehash_work(struct work_struct *work)
 {
-       struct mlxsw_sp_acl_tcam_region *region;
-       bool region_created = false;
+       struct mlxsw_sp_acl_tcam_vregion *vregion =
+               container_of(work, struct mlxsw_sp_acl_tcam_vregion,
+                            rehash_dw.work);
+
+       /* TODO: Take rtnl lock here as the rest of the code counts on it
+        * now. Later, this should be replaced by per-vregion lock.
+        */
+       rtnl_lock();
+       mlxsw_sp_acl_tcam_vregion_rehash(vregion->mlxsw_sp, vregion);
+       rtnl_unlock();
+       mlxsw_sp_acl_tcam_vregion_rehash_work_schedule(vregion);
+}
+
+static struct mlxsw_sp_acl_tcam_vregion *
+mlxsw_sp_acl_tcam_vregion_create(struct mlxsw_sp *mlxsw_sp,
+                                struct mlxsw_sp_acl_tcam *tcam,
+                                struct mlxsw_afk_element_usage *elusage)
+{
+       const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
+       struct mlxsw_afk *afk = mlxsw_sp_acl_afk(mlxsw_sp->acl);
+       struct mlxsw_sp_acl_tcam_vregion *vregion;
+       int err;
+
+       vregion = kzalloc(sizeof(*vregion), GFP_KERNEL);
+       if (!vregion)
+               return ERR_PTR(-ENOMEM);
+       INIT_LIST_HEAD(&vregion->vchunk_list);
+       vregion->tcam = tcam;
+       vregion->mlxsw_sp = mlxsw_sp;
+
+       vregion->key_info = mlxsw_afk_key_info_get(afk, elusage);
+       if (IS_ERR(vregion->key_info)) {
+               err = PTR_ERR(vregion->key_info);
+               goto err_key_info_get;
+       }
+
+       vregion->region = mlxsw_sp_acl_tcam_region_create(mlxsw_sp, tcam,
+                                                         vregion, NULL);
+       if (IS_ERR(vregion->region)) {
+               err = PTR_ERR(vregion->region);
+               goto err_region_create;
+       }
+
+       list_add_tail(&vregion->tlist, &tcam->vregion_list);
+
+       if (ops->region_rehash_hints_get) {
+               /* Create the delayed work for vregion periodic rehash */
+               INIT_DELAYED_WORK(&vregion->rehash_dw,
+                                 mlxsw_sp_acl_tcam_vregion_rehash_work);
+               mlxsw_sp_acl_tcam_vregion_rehash_work_schedule(vregion);
+       }
+
+       return vregion;
+
+err_region_create:
+       mlxsw_afk_key_info_put(vregion->key_info);
+err_key_info_get:
+       kfree(vregion);
+       return ERR_PTR(err);
+}
+
+static void
+mlxsw_sp_acl_tcam_vregion_destroy(struct mlxsw_sp *mlxsw_sp,
+                                 struct mlxsw_sp_acl_tcam_vregion *vregion)
+{
+       const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
+
+       if (ops->region_rehash_hints_get)
+               cancel_delayed_work_sync(&vregion->rehash_dw);
+       list_del(&vregion->tlist);
+       if (vregion->region2)
+               mlxsw_sp_acl_tcam_region_destroy(mlxsw_sp, vregion->region2);
+       mlxsw_sp_acl_tcam_region_destroy(mlxsw_sp, vregion->region);
+       mlxsw_afk_key_info_put(vregion->key_info);
+       kfree(vregion);
+}
+
+u32 mlxsw_sp_acl_tcam_vregion_rehash_intrvl_get(struct mlxsw_sp *mlxsw_sp,
+                                               struct mlxsw_sp_acl_tcam *tcam)
+{
+       const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
+       u32 vregion_rehash_intrvl;
+
+       if (WARN_ON(!ops->region_rehash_hints_get))
+               return 0;
+       vregion_rehash_intrvl = tcam->vregion_rehash_intrvl;
+       return vregion_rehash_intrvl;
+}
+
+int mlxsw_sp_acl_tcam_vregion_rehash_intrvl_set(struct mlxsw_sp *mlxsw_sp,
+                                               struct mlxsw_sp_acl_tcam *tcam,
+                                               u32 val)
+{
+       const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
+       struct mlxsw_sp_acl_tcam_vregion *vregion;
+
+       if (val < MLXSW_SP_ACL_TCAM_VREGION_REHASH_INTRVL_MIN && val)
+               return -EINVAL;
+       if (WARN_ON(!ops->region_rehash_hints_get))
+               return -EOPNOTSUPP;
+       tcam->vregion_rehash_intrvl = val;
+       rtnl_lock();
+       list_for_each_entry(vregion, &tcam->vregion_list, tlist) {
+               if (val)
+                       mlxsw_core_schedule_dw(&vregion->rehash_dw, 0);
+               else
+                       cancel_delayed_work_sync(&vregion->rehash_dw);
+       }
+       rtnl_unlock();
+       return 0;
+}
+
+static int
+mlxsw_sp_acl_tcam_vchunk_assoc(struct mlxsw_sp *mlxsw_sp,
+                              struct mlxsw_sp_acl_tcam_group *group,
+                              unsigned int priority,
+                              struct mlxsw_afk_element_usage *elusage,
+                              struct mlxsw_sp_acl_tcam_vchunk *vchunk)
+{
+       struct mlxsw_sp_acl_tcam_vregion *vregion;
+       bool vregion_created = false;
        bool need_split;
        int err;
 
-       region = mlxsw_sp_acl_tcam_group_region_find(group, priority, elusage,
-                                                    &need_split);
-       if (region && need_split) {
-               /* According to priority, the chunk should belong to an
-                * existing region. However, this chunk needs elements
-                * that region does not contain. We need to split the existing
-                * region into two and create a new region for this chunk
+       vregion = mlxsw_sp_acl_tcam_group_vregion_find(group, priority, elusage,
+                                                      &need_split);
+       if (vregion && need_split) {
+               /* According to priority, the vchunk should belong to an
+                * existing vregion. However, this vchunk needs elements
+                * that vregion does not contain. We need to split the existing
+                * vregion into two and create a new vregion for this vchunk
                 * in between. This is not supported now.
                 */
                return -EOPNOTSUPP;
        }
-       if (!region) {
-               struct mlxsw_afk_element_usage region_elusage;
+       if (!vregion) {
+               struct mlxsw_afk_element_usage vregion_elusage;
 
                mlxsw_sp_acl_tcam_group_use_patterns(group, elusage,
-                                                    &region_elusage);
-               region = mlxsw_sp_acl_tcam_region_create(mlxsw_sp, group->tcam,
-                                                        &region_elusage);
-               if (IS_ERR(region))
-                       return PTR_ERR(region);
-               region_created = true;
+                                                    &vregion_elusage);
+               vregion = mlxsw_sp_acl_tcam_vregion_create(mlxsw_sp,
+                                                          group->tcam,
+                                                          &vregion_elusage);
+               if (IS_ERR(vregion))
+                       return PTR_ERR(vregion);
+               vregion_created = true;
        }
 
-       chunk->region = region;
-       list_add_tail(&chunk->list, &region->chunk_list);
+       vchunk->vregion = vregion;
+       list_add_tail(&vchunk->list, &vregion->vchunk_list);
 
-       if (!region_created)
+       if (!vregion_created)
                return 0;
 
-       err = mlxsw_sp_acl_tcam_group_region_attach(mlxsw_sp, group, region);
+       err = mlxsw_sp_acl_tcam_group_vregion_attach(mlxsw_sp, group, vregion);
        if (err)
-               goto err_group_region_attach;
+               goto err_group_vregion_attach;
 
        return 0;
 
-err_group_region_attach:
-       mlxsw_sp_acl_tcam_region_destroy(mlxsw_sp, region);
+err_group_vregion_attach:
+       mlxsw_sp_acl_tcam_vregion_destroy(mlxsw_sp, vregion);
        return err;
 }
 
 static void
-mlxsw_sp_acl_tcam_chunk_deassoc(struct mlxsw_sp *mlxsw_sp,
-                               struct mlxsw_sp_acl_tcam_chunk *chunk)
+mlxsw_sp_acl_tcam_vchunk_deassoc(struct mlxsw_sp *mlxsw_sp,
+                                struct mlxsw_sp_acl_tcam_vchunk *vchunk)
 {
-       struct mlxsw_sp_acl_tcam_region *region = chunk->region;
+       struct mlxsw_sp_acl_tcam_vregion *vregion = vchunk->vregion;
 
-       list_del(&chunk->list);
-       if (list_empty(&region->chunk_list)) {
-               mlxsw_sp_acl_tcam_group_region_detach(mlxsw_sp, region);
-               mlxsw_sp_acl_tcam_region_destroy(mlxsw_sp, region);
+       list_del(&vchunk->list);
+       if (list_empty(&vregion->vchunk_list)) {
+               mlxsw_sp_acl_tcam_group_vregion_detach(mlxsw_sp, vregion);
+               mlxsw_sp_acl_tcam_vregion_destroy(mlxsw_sp, vregion);
        }
 }
 
 static struct mlxsw_sp_acl_tcam_chunk *
 mlxsw_sp_acl_tcam_chunk_create(struct mlxsw_sp *mlxsw_sp,
-                              struct mlxsw_sp_acl_tcam_group *group,
-                              unsigned int priority,
-                              struct mlxsw_afk_element_usage *elusage)
+                              struct mlxsw_sp_acl_tcam_vchunk *vchunk,
+                              struct mlxsw_sp_acl_tcam_region *region)
 {
        const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
        struct mlxsw_sp_acl_tcam_chunk *chunk;
+
+       chunk = kzalloc(sizeof(*chunk) + ops->chunk_priv_size, GFP_KERNEL);
+       if (!chunk)
+               return ERR_PTR(-ENOMEM);
+       chunk->vchunk = vchunk;
+       chunk->region = region;
+
+       ops->chunk_init(region->priv, chunk->priv, vchunk->priority);
+       return chunk;
+}
+
+static void
+mlxsw_sp_acl_tcam_chunk_destroy(struct mlxsw_sp *mlxsw_sp,
+                               struct mlxsw_sp_acl_tcam_chunk *chunk)
+{
+       const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
+
+       ops->chunk_fini(chunk->priv);
+       kfree(chunk);
+}
+
+static struct mlxsw_sp_acl_tcam_vchunk *
+mlxsw_sp_acl_tcam_vchunk_create(struct mlxsw_sp *mlxsw_sp,
+                               struct mlxsw_sp_acl_tcam_group *group,
+                               unsigned int priority,
+                               struct mlxsw_afk_element_usage *elusage)
+{
+       struct mlxsw_sp_acl_tcam_vchunk *vchunk;
        int err;
 
        if (priority == MLXSW_SP_ACL_TCAM_CATCHALL_PRIO)
                return ERR_PTR(-EINVAL);
 
-       chunk = kzalloc(sizeof(*chunk) + ops->chunk_priv_size, GFP_KERNEL);
-       if (!chunk)
+       vchunk = kzalloc(sizeof(*vchunk), GFP_KERNEL);
+       if (!vchunk)
                return ERR_PTR(-ENOMEM);
-       chunk->priority = priority;
-       chunk->group = group;
-       chunk->ref_count = 1;
+       INIT_LIST_HEAD(&vchunk->ventry_list);
+       vchunk->priority = priority;
+       vchunk->group = group;
+       vchunk->ref_count = 1;
 
-       err = mlxsw_sp_acl_tcam_chunk_assoc(mlxsw_sp, group, priority,
-                                           elusage, chunk);
+       err = mlxsw_sp_acl_tcam_vchunk_assoc(mlxsw_sp, group, priority,
+                                            elusage, vchunk);
        if (err)
-               goto err_chunk_assoc;
+               goto err_vchunk_assoc;
 
-       ops->chunk_init(chunk->region->priv, chunk->priv, priority);
-
-       err = rhashtable_insert_fast(&group->chunk_ht, &chunk->ht_node,
-                                    mlxsw_sp_acl_tcam_chunk_ht_params);
+       err = rhashtable_insert_fast(&group->vchunk_ht, &vchunk->ht_node,
+                                    mlxsw_sp_acl_tcam_vchunk_ht_params);
        if (err)
                goto err_rhashtable_insert;
 
-       return chunk;
+       vchunk->chunk = mlxsw_sp_acl_tcam_chunk_create(mlxsw_sp, vchunk,
+                                                      vchunk->vregion->region);
+       if (IS_ERR(vchunk->chunk)) {
+               err = PTR_ERR(vchunk->chunk);
+               goto err_chunk_create;
+       }
+
+       return vchunk;
 
+err_chunk_create:
+       rhashtable_remove_fast(&group->vchunk_ht, &vchunk->ht_node,
+                              mlxsw_sp_acl_tcam_vchunk_ht_params);
 err_rhashtable_insert:
-       ops->chunk_fini(chunk->priv);
-       mlxsw_sp_acl_tcam_chunk_deassoc(mlxsw_sp, chunk);
-err_chunk_assoc:
-       kfree(chunk);
+       mlxsw_sp_acl_tcam_vchunk_deassoc(mlxsw_sp, vchunk);
+err_vchunk_assoc:
+       kfree(vchunk);
        return ERR_PTR(err);
 }
 
 static void
-mlxsw_sp_acl_tcam_chunk_destroy(struct mlxsw_sp *mlxsw_sp,
-                               struct mlxsw_sp_acl_tcam_chunk *chunk)
+mlxsw_sp_acl_tcam_vchunk_destroy(struct mlxsw_sp *mlxsw_sp,
+                                struct mlxsw_sp_acl_tcam_vchunk *vchunk)
 {
-       const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
-       struct mlxsw_sp_acl_tcam_group *group = chunk->group;
-
-       rhashtable_remove_fast(&group->chunk_ht, &chunk->ht_node,
-                              mlxsw_sp_acl_tcam_chunk_ht_params);
-       ops->chunk_fini(chunk->priv);
-       mlxsw_sp_acl_tcam_chunk_deassoc(mlxsw_sp, chunk);
-       kfree(chunk);
+       struct mlxsw_sp_acl_tcam_group *group = vchunk->group;
+
+       if (vchunk->chunk2)
+               mlxsw_sp_acl_tcam_chunk_destroy(mlxsw_sp, vchunk->chunk2);
+       mlxsw_sp_acl_tcam_chunk_destroy(mlxsw_sp, vchunk->chunk);
+       rhashtable_remove_fast(&group->vchunk_ht, &vchunk->ht_node,
+                              mlxsw_sp_acl_tcam_vchunk_ht_params);
+       mlxsw_sp_acl_tcam_vchunk_deassoc(mlxsw_sp, vchunk);
+       kfree(vchunk);
 }
 
-static struct mlxsw_sp_acl_tcam_chunk *
-mlxsw_sp_acl_tcam_chunk_get(struct mlxsw_sp *mlxsw_sp,
-                           struct mlxsw_sp_acl_tcam_group *group,
-                           unsigned int priority,
-                           struct mlxsw_afk_element_usage *elusage)
+static struct mlxsw_sp_acl_tcam_vchunk *
+mlxsw_sp_acl_tcam_vchunk_get(struct mlxsw_sp *mlxsw_sp,
+                            struct mlxsw_sp_acl_tcam_group *group,
+                            unsigned int priority,
+                            struct mlxsw_afk_element_usage *elusage)
 {
-       struct mlxsw_sp_acl_tcam_chunk *chunk;
+       struct mlxsw_sp_acl_tcam_vchunk *vchunk;
 
-       chunk = rhashtable_lookup_fast(&group->chunk_ht, &priority,
-                                      mlxsw_sp_acl_tcam_chunk_ht_params);
-       if (chunk) {
-               if (WARN_ON(!mlxsw_afk_key_info_subset(chunk->region->key_info,
+       vchunk = rhashtable_lookup_fast(&group->vchunk_ht, &priority,
+                                       mlxsw_sp_acl_tcam_vchunk_ht_params);
+       if (vchunk) {
+               if (WARN_ON(!mlxsw_afk_key_info_subset(vchunk->vregion->key_info,
                                                       elusage)))
                        return ERR_PTR(-EINVAL);
-               chunk->ref_count++;
-               return chunk;
+               vchunk->ref_count++;
+               return vchunk;
        }
-       return mlxsw_sp_acl_tcam_chunk_create(mlxsw_sp, group,
-                                             priority, elusage);
+       return mlxsw_sp_acl_tcam_vchunk_create(mlxsw_sp, group,
+                                              priority, elusage);
 }
 
-static void mlxsw_sp_acl_tcam_chunk_put(struct mlxsw_sp *mlxsw_sp,
-                                       struct mlxsw_sp_acl_tcam_chunk *chunk)
+static void
+mlxsw_sp_acl_tcam_vchunk_put(struct mlxsw_sp *mlxsw_sp,
+                            struct mlxsw_sp_acl_tcam_vchunk *vchunk)
 {
-       if (--chunk->ref_count)
+       if (--vchunk->ref_count)
                return;
-       mlxsw_sp_acl_tcam_chunk_destroy(mlxsw_sp, chunk);
+       mlxsw_sp_acl_tcam_vchunk_destroy(mlxsw_sp, vchunk);
 }
 
-static size_t mlxsw_sp_acl_tcam_entry_priv_size(struct mlxsw_sp *mlxsw_sp)
-{
-       const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
-
-       return ops->entry_priv_size;
-}
-
-static int mlxsw_sp_acl_tcam_entry_add(struct mlxsw_sp *mlxsw_sp,
-                                      struct mlxsw_sp_acl_tcam_group *group,
-                                      struct mlxsw_sp_acl_tcam_entry *entry,
-                                      struct mlxsw_sp_acl_rule_info *rulei)
+static struct mlxsw_sp_acl_tcam_entry *
+mlxsw_sp_acl_tcam_entry_create(struct mlxsw_sp *mlxsw_sp,
+                              struct mlxsw_sp_acl_tcam_ventry *ventry,
+                              struct mlxsw_sp_acl_tcam_chunk *chunk)
 {
        const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
-       struct mlxsw_sp_acl_tcam_chunk *chunk;
-       struct mlxsw_sp_acl_tcam_region *region;
+       struct mlxsw_sp_acl_tcam_entry *entry;
        int err;
 
-       chunk = mlxsw_sp_acl_tcam_chunk_get(mlxsw_sp, group, rulei->priority,
-                                           &rulei->values.elusage);
-       if (IS_ERR(chunk))
-               return PTR_ERR(chunk);
-
-       region = chunk->region;
+       entry = kzalloc(sizeof(*entry) + ops->entry_priv_size, GFP_KERNEL);
+       if (!entry)
+               return ERR_PTR(-ENOMEM);
+       entry->ventry = ventry;
+       entry->chunk = chunk;
 
-       err = ops->entry_add(mlxsw_sp, region->priv, chunk->priv,
-                            entry->priv, rulei);
+       err = ops->entry_add(mlxsw_sp, chunk->region->priv, chunk->priv,
+                            entry->priv, ventry->rulei);
        if (err)
                goto err_entry_add;
-       entry->chunk = chunk;
 
-       return 0;
+       return entry;
 
 err_entry_add:
-       mlxsw_sp_acl_tcam_chunk_put(mlxsw_sp, chunk);
-       return err;
+       kfree(entry);
+       return ERR_PTR(err);
 }
 
-static void mlxsw_sp_acl_tcam_entry_del(struct mlxsw_sp *mlxsw_sp,
-                                       struct mlxsw_sp_acl_tcam_entry *entry)
+static void mlxsw_sp_acl_tcam_entry_destroy(struct mlxsw_sp *mlxsw_sp,
+                                           struct mlxsw_sp_acl_tcam_entry *entry)
 {
        const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
-       struct mlxsw_sp_acl_tcam_chunk *chunk = entry->chunk;
-       struct mlxsw_sp_acl_tcam_region *region = chunk->region;
 
-       ops->entry_del(mlxsw_sp, region->priv, chunk->priv, entry->priv);
-       mlxsw_sp_acl_tcam_chunk_put(mlxsw_sp, chunk);
+       ops->entry_del(mlxsw_sp, entry->chunk->region->priv,
+                      entry->chunk->priv, entry->priv);
+       kfree(entry);
 }
 
 static int
 mlxsw_sp_acl_tcam_entry_action_replace(struct mlxsw_sp *mlxsw_sp,
+                                      struct mlxsw_sp_acl_tcam_region *region,
                                       struct mlxsw_sp_acl_tcam_entry *entry,
                                       struct mlxsw_sp_acl_rule_info *rulei)
 {
        const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
-       struct mlxsw_sp_acl_tcam_chunk *chunk = entry->chunk;
-       struct mlxsw_sp_acl_tcam_region *region = chunk->region;
 
        return ops->entry_action_replace(mlxsw_sp, region->priv,
                                         entry->priv, rulei);
@@ -798,13 +1005,249 @@ mlxsw_sp_acl_tcam_entry_activity_get(struct mlxsw_sp *mlxsw_sp,
                                     bool *activity)
 {
        const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
-       struct mlxsw_sp_acl_tcam_chunk *chunk = entry->chunk;
-       struct mlxsw_sp_acl_tcam_region *region = chunk->region;
 
-       return ops->entry_activity_get(mlxsw_sp, region->priv,
+       return ops->entry_activity_get(mlxsw_sp, entry->chunk->region->priv,
                                       entry->priv, activity);
 }
 
+static int mlxsw_sp_acl_tcam_ventry_add(struct mlxsw_sp *mlxsw_sp,
+                                       struct mlxsw_sp_acl_tcam_group *group,
+                                       struct mlxsw_sp_acl_tcam_ventry *ventry,
+                                       struct mlxsw_sp_acl_rule_info *rulei)
+{
+       struct mlxsw_sp_acl_tcam_vchunk *vchunk;
+       int err;
+
+       vchunk = mlxsw_sp_acl_tcam_vchunk_get(mlxsw_sp, group, rulei->priority,
+                                             &rulei->values.elusage);
+       if (IS_ERR(vchunk))
+               return PTR_ERR(vchunk);
+
+       ventry->vchunk = vchunk;
+       ventry->rulei = rulei;
+       ventry->entry = mlxsw_sp_acl_tcam_entry_create(mlxsw_sp, ventry,
+                                                      vchunk->chunk);
+       if (IS_ERR(ventry->entry)) {
+               err = PTR_ERR(ventry->entry);
+               goto err_entry_create;
+       }
+
+       list_add_tail(&ventry->list, &vchunk->ventry_list);
+
+       return 0;
+
+err_entry_create:
+       mlxsw_sp_acl_tcam_vchunk_put(mlxsw_sp, vchunk);
+       return err;
+}
+
+static void mlxsw_sp_acl_tcam_ventry_del(struct mlxsw_sp *mlxsw_sp,
+                                        struct mlxsw_sp_acl_tcam_ventry *ventry)
+{
+       struct mlxsw_sp_acl_tcam_vchunk *vchunk = ventry->vchunk;
+
+       list_del(&ventry->list);
+       mlxsw_sp_acl_tcam_entry_destroy(mlxsw_sp, ventry->entry);
+       mlxsw_sp_acl_tcam_vchunk_put(mlxsw_sp, vchunk);
+}
+
+static int
+mlxsw_sp_acl_tcam_ventry_action_replace(struct mlxsw_sp *mlxsw_sp,
+                                       struct mlxsw_sp_acl_tcam_ventry *ventry,
+                                       struct mlxsw_sp_acl_rule_info *rulei)
+{
+       struct mlxsw_sp_acl_tcam_vchunk *vchunk = ventry->vchunk;
+
+       return mlxsw_sp_acl_tcam_entry_action_replace(mlxsw_sp,
+                                                     vchunk->vregion->region,
+                                                     ventry->entry, rulei);
+}
+
+static int
+mlxsw_sp_acl_tcam_ventry_activity_get(struct mlxsw_sp *mlxsw_sp,
+                                     struct mlxsw_sp_acl_tcam_ventry *ventry,
+                                     bool *activity)
+{
+       return mlxsw_sp_acl_tcam_entry_activity_get(mlxsw_sp,
+                                                   ventry->entry, activity);
+}
+
+static int
+mlxsw_sp_acl_tcam_ventry_migrate(struct mlxsw_sp *mlxsw_sp,
+                                struct mlxsw_sp_acl_tcam_ventry *ventry,
+                                struct mlxsw_sp_acl_tcam_chunk *chunk2)
+{
+       struct mlxsw_sp_acl_tcam_entry *entry2;
+
+       entry2 = mlxsw_sp_acl_tcam_entry_create(mlxsw_sp, ventry, chunk2);
+       if (IS_ERR(entry2))
+               return PTR_ERR(entry2);
+       mlxsw_sp_acl_tcam_entry_destroy(mlxsw_sp, ventry->entry);
+       ventry->entry = entry2;
+       return 0;
+}
+
+static int
+mlxsw_sp_acl_tcam_vchunk_migrate_one(struct mlxsw_sp *mlxsw_sp,
+                                    struct mlxsw_sp_acl_tcam_vchunk *vchunk,
+                                    struct mlxsw_sp_acl_tcam_region *region,
+                                    bool this_is_rollback)
+{
+       struct mlxsw_sp_acl_tcam_ventry *ventry;
+       struct mlxsw_sp_acl_tcam_chunk *chunk2;
+       int err;
+       int err2;
+
+       chunk2 = mlxsw_sp_acl_tcam_chunk_create(mlxsw_sp, vchunk, region);
+       if (IS_ERR(chunk2)) {
+               if (this_is_rollback)
+                       vchunk->vregion->failed_rollback = true;
+               return PTR_ERR(chunk2);
+       }
+       vchunk->chunk2 = chunk2;
+       list_for_each_entry(ventry, &vchunk->ventry_list, list) {
+               err = mlxsw_sp_acl_tcam_ventry_migrate(mlxsw_sp, ventry,
+                                                      vchunk->chunk2);
+               if (err) {
+                       if (this_is_rollback) {
+                               vchunk->vregion->failed_rollback = true;
+                               return err;
+                       }
+                       goto rollback;
+               }
+       }
+       mlxsw_sp_acl_tcam_chunk_destroy(mlxsw_sp, vchunk->chunk);
+       vchunk->chunk = chunk2;
+       vchunk->chunk2 = NULL;
+       return 0;
+
+rollback:
+       /* Migrate the entries back to the original chunk. If some entry
+        * migration fails, there's no good way how to proceed. Set the
+        * vregion with "failed_rollback" flag.
+        */
+       list_for_each_entry_continue_reverse(ventry, &vchunk->ventry_list,
+                                            list) {
+               err2 = mlxsw_sp_acl_tcam_ventry_migrate(mlxsw_sp, ventry,
+                                                       vchunk->chunk);
+               if (err2) {
+                       vchunk->vregion->failed_rollback = true;
+                       goto err_rollback;
+               }
+       }
+
+       mlxsw_sp_acl_tcam_chunk_destroy(mlxsw_sp, vchunk->chunk2);
+       vchunk->chunk2 = NULL;
+
+err_rollback:
+       return err;
+}
+
+static int
+mlxsw_sp_acl_tcam_vchunk_migrate_all(struct mlxsw_sp *mlxsw_sp,
+                                    struct mlxsw_sp_acl_tcam_vregion *vregion)
+{
+       struct mlxsw_sp_acl_tcam_vchunk *vchunk;
+       int err;
+
+       list_for_each_entry(vchunk, &vregion->vchunk_list, list) {
+               err = mlxsw_sp_acl_tcam_vchunk_migrate_one(mlxsw_sp, vchunk,
+                                                          vregion->region2,
+                                                          false);
+               if (err)
+                       goto rollback;
+       }
+       return 0;
+
+rollback:
+       list_for_each_entry_continue_reverse(vchunk, &vregion->vchunk_list,
+                                            list) {
+               mlxsw_sp_acl_tcam_vchunk_migrate_one(mlxsw_sp, vchunk,
+                                                    vregion->region, true);
+       }
+       return err;
+}
+
+static int
+mlxsw_sp_acl_tcam_vregion_migrate(struct mlxsw_sp *mlxsw_sp,
+                                 struct mlxsw_sp_acl_tcam_vregion *vregion,
+                                 void *hints_priv)
+{
+       struct mlxsw_sp_acl_tcam_region *region2, *unused_region;
+       int err;
+
+       trace_mlxsw_sp_acl_tcam_vregion_migrate(mlxsw_sp, vregion);
+
+       region2 = mlxsw_sp_acl_tcam_region_create(mlxsw_sp, vregion->tcam,
+                                                 vregion, hints_priv);
+       if (IS_ERR(region2))
+               return PTR_ERR(region2);
+
+       vregion->region2 = region2;
+       err = mlxsw_sp_acl_tcam_group_region_attach(mlxsw_sp, region2);
+       if (err)
+               goto err_group_region_attach;
+
+       err = mlxsw_sp_acl_tcam_vchunk_migrate_all(mlxsw_sp, vregion);
+       if (!vregion->failed_rollback) {
+               if (!err) {
+                       /* In case of successful migration, region2 is used and
+                        * the original is unused.
+                        */
+                       unused_region = vregion->region;
+                       vregion->region = vregion->region2;
+               } else {
+                       /* In case of failure during migration, the original
+                        * region is still used.
+                        */
+                       unused_region = vregion->region2;
+               }
+               vregion->region2 = NULL;
+               mlxsw_sp_acl_tcam_group_region_detach(mlxsw_sp, unused_region);
+               mlxsw_sp_acl_tcam_region_destroy(mlxsw_sp, unused_region);
+       }
+       return err;
+
+err_group_region_attach:
+       vregion->region2 = NULL;
+       mlxsw_sp_acl_tcam_region_destroy(mlxsw_sp, region2);
+       return err;
+}
+
+static int
+mlxsw_sp_acl_tcam_vregion_rehash(struct mlxsw_sp *mlxsw_sp,
+                                struct mlxsw_sp_acl_tcam_vregion *vregion)
+{
+       const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
+       void *hints_priv;
+       int err;
+
+       trace_mlxsw_sp_acl_tcam_vregion_rehash(mlxsw_sp, vregion);
+       if (vregion->failed_rollback)
+               return -EBUSY;
+
+       hints_priv = ops->region_rehash_hints_get(vregion->region->priv);
+       if (IS_ERR(hints_priv)) {
+               err = PTR_ERR(hints_priv);
+               if (err != -EAGAIN)
+                       dev_err(mlxsw_sp->bus_info->dev, "Failed get rehash hints\n");
+               return err;
+       }
+
+       err = mlxsw_sp_acl_tcam_vregion_migrate(mlxsw_sp, vregion, hints_priv);
+       if (err) {
+               dev_err(mlxsw_sp->bus_info->dev, "Failed to migrate vregion\n");
+               if (vregion->failed_rollback) {
+                       trace_mlxsw_sp_acl_tcam_vregion_rehash_dis(mlxsw_sp,
+                                                                  vregion);
+                       dev_err(mlxsw_sp->bus_info->dev, "Failed to rollback during vregion migration fail\n");
+               }
+       }
+
+       ops->region_rehash_hints_put(hints_priv);
+       return err;
+}
+
 static const enum mlxsw_afk_element mlxsw_sp_acl_tcam_pattern_ipv4[] = {
        MLXSW_AFK_ELEMENT_SRC_SYS_PORT,
        MLXSW_AFK_ELEMENT_DMAC_32_47,
@@ -859,7 +1302,7 @@ struct mlxsw_sp_acl_tcam_flower_ruleset {
 };
 
 struct mlxsw_sp_acl_tcam_flower_rule {
-       struct mlxsw_sp_acl_tcam_entry entry;
+       struct mlxsw_sp_acl_tcam_ventry ventry;
 };
 
 static int
@@ -917,12 +1360,6 @@ mlxsw_sp_acl_tcam_flower_ruleset_group_id(void *ruleset_priv)
        return mlxsw_sp_acl_tcam_group_id(&ruleset->group);
 }
 
-static size_t mlxsw_sp_acl_tcam_flower_rule_priv_size(struct mlxsw_sp *mlxsw_sp)
-{
-       return sizeof(struct mlxsw_sp_acl_tcam_flower_rule) +
-              mlxsw_sp_acl_tcam_entry_priv_size(mlxsw_sp);
-}
-
 static int
 mlxsw_sp_acl_tcam_flower_rule_add(struct mlxsw_sp *mlxsw_sp,
                                  void *ruleset_priv, void *rule_priv,
@@ -931,8 +1368,8 @@ mlxsw_sp_acl_tcam_flower_rule_add(struct mlxsw_sp *mlxsw_sp,
        struct mlxsw_sp_acl_tcam_flower_ruleset *ruleset = ruleset_priv;
        struct mlxsw_sp_acl_tcam_flower_rule *rule = rule_priv;
 
-       return mlxsw_sp_acl_tcam_entry_add(mlxsw_sp, &ruleset->group,
-                                          &rule->entry, rulei);
+       return mlxsw_sp_acl_tcam_ventry_add(mlxsw_sp, &ruleset->group,
+                                           &rule->ventry, rulei);
 }
 
 static void
@@ -940,7 +1377,7 @@ mlxsw_sp_acl_tcam_flower_rule_del(struct mlxsw_sp *mlxsw_sp, void *rule_priv)
 {
        struct mlxsw_sp_acl_tcam_flower_rule *rule = rule_priv;
 
-       mlxsw_sp_acl_tcam_entry_del(mlxsw_sp, &rule->entry);
+       mlxsw_sp_acl_tcam_ventry_del(mlxsw_sp, &rule->ventry);
 }
 
 static int
@@ -957,8 +1394,8 @@ mlxsw_sp_acl_tcam_flower_rule_activity_get(struct mlxsw_sp *mlxsw_sp,
 {
        struct mlxsw_sp_acl_tcam_flower_rule *rule = rule_priv;
 
-       return mlxsw_sp_acl_tcam_entry_activity_get(mlxsw_sp, &rule->entry,
-                                                   activity);
+       return mlxsw_sp_acl_tcam_ventry_activity_get(mlxsw_sp, &rule->ventry,
+                                                    activity);
 }
 
 static const struct mlxsw_sp_acl_profile_ops mlxsw_sp_acl_tcam_flower_ops = {
@@ -968,7 +1405,7 @@ static const struct mlxsw_sp_acl_profile_ops mlxsw_sp_acl_tcam_flower_ops = {
        .ruleset_bind           = mlxsw_sp_acl_tcam_flower_ruleset_bind,
        .ruleset_unbind         = mlxsw_sp_acl_tcam_flower_ruleset_unbind,
        .ruleset_group_id       = mlxsw_sp_acl_tcam_flower_ruleset_group_id,
-       .rule_priv_size         = mlxsw_sp_acl_tcam_flower_rule_priv_size,
+       .rule_priv_size         = sizeof(struct mlxsw_sp_acl_tcam_flower_rule),
        .rule_add               = mlxsw_sp_acl_tcam_flower_rule_add,
        .rule_del               = mlxsw_sp_acl_tcam_flower_rule_del,
        .rule_action_replace    = mlxsw_sp_acl_tcam_flower_rule_action_replace,
@@ -976,12 +1413,12 @@ static const struct mlxsw_sp_acl_profile_ops mlxsw_sp_acl_tcam_flower_ops = {
 };
 
 struct mlxsw_sp_acl_tcam_mr_ruleset {
-       struct mlxsw_sp_acl_tcam_chunk *chunk;
+       struct mlxsw_sp_acl_tcam_vchunk *vchunk;
        struct mlxsw_sp_acl_tcam_group group;
 };
 
 struct mlxsw_sp_acl_tcam_mr_rule {
-       struct mlxsw_sp_acl_tcam_entry entry;
+       struct mlxsw_sp_acl_tcam_ventry ventry;
 };
 
 static int
@@ -1006,10 +1443,11 @@ mlxsw_sp_acl_tcam_mr_ruleset_add(struct mlxsw_sp *mlxsw_sp,
         * specific ACL Group ID which must exist in HW before multicast router
         * is initialized.
         */
-       ruleset->chunk = mlxsw_sp_acl_tcam_chunk_get(mlxsw_sp, &ruleset->group,
-                                                    1, tmplt_elusage);
-       if (IS_ERR(ruleset->chunk)) {
-               err = PTR_ERR(ruleset->chunk);
+       ruleset->vchunk = mlxsw_sp_acl_tcam_vchunk_get(mlxsw_sp,
+                                                      &ruleset->group, 1,
+                                                      tmplt_elusage);
+       if (IS_ERR(ruleset->vchunk)) {
+               err = PTR_ERR(ruleset->vchunk);
                goto err_chunk_get;
        }
 
@@ -1025,7 +1463,7 @@ mlxsw_sp_acl_tcam_mr_ruleset_del(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv)
 {
        struct mlxsw_sp_acl_tcam_mr_ruleset *ruleset = ruleset_priv;
 
-       mlxsw_sp_acl_tcam_chunk_put(mlxsw_sp, ruleset->chunk);
+       mlxsw_sp_acl_tcam_vchunk_put(mlxsw_sp, ruleset->vchunk);
        mlxsw_sp_acl_tcam_group_del(mlxsw_sp, &ruleset->group);
 }
 
@@ -1054,12 +1492,6 @@ mlxsw_sp_acl_tcam_mr_ruleset_group_id(void *ruleset_priv)
        return mlxsw_sp_acl_tcam_group_id(&ruleset->group);
 }
 
-static size_t mlxsw_sp_acl_tcam_mr_rule_priv_size(struct mlxsw_sp *mlxsw_sp)
-{
-       return sizeof(struct mlxsw_sp_acl_tcam_mr_rule) +
-              mlxsw_sp_acl_tcam_entry_priv_size(mlxsw_sp);
-}
-
 static int
 mlxsw_sp_acl_tcam_mr_rule_add(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv,
                              void *rule_priv,
@@ -1068,8 +1500,8 @@ mlxsw_sp_acl_tcam_mr_rule_add(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv,
        struct mlxsw_sp_acl_tcam_mr_ruleset *ruleset = ruleset_priv;
        struct mlxsw_sp_acl_tcam_mr_rule *rule = rule_priv;
 
-       return mlxsw_sp_acl_tcam_entry_add(mlxsw_sp, &ruleset->group,
-                                          &rule->entry, rulei);
+       return mlxsw_sp_acl_tcam_ventry_add(mlxsw_sp, &ruleset->group,
+                                          &rule->ventry, rulei);
 }
 
 static void
@@ -1077,7 +1509,7 @@ mlxsw_sp_acl_tcam_mr_rule_del(struct mlxsw_sp *mlxsw_sp, void *rule_priv)
 {
        struct mlxsw_sp_acl_tcam_mr_rule *rule = rule_priv;
 
-       mlxsw_sp_acl_tcam_entry_del(mlxsw_sp, &rule->entry);
+       mlxsw_sp_acl_tcam_ventry_del(mlxsw_sp, &rule->ventry);
 }
 
 static int
@@ -1087,8 +1519,8 @@ mlxsw_sp_acl_tcam_mr_rule_action_replace(struct mlxsw_sp *mlxsw_sp,
 {
        struct mlxsw_sp_acl_tcam_mr_rule *rule = rule_priv;
 
-       return mlxsw_sp_acl_tcam_entry_action_replace(mlxsw_sp, &rule->entry,
-                                                     rulei);
+       return mlxsw_sp_acl_tcam_ventry_action_replace(mlxsw_sp, &rule->ventry,
+                                                      rulei);
 }
 
 static int
@@ -1097,8 +1529,8 @@ mlxsw_sp_acl_tcam_mr_rule_activity_get(struct mlxsw_sp *mlxsw_sp,
 {
        struct mlxsw_sp_acl_tcam_mr_rule *rule = rule_priv;
 
-       return mlxsw_sp_acl_tcam_entry_activity_get(mlxsw_sp, &rule->entry,
-                                                   activity);
+       return mlxsw_sp_acl_tcam_ventry_activity_get(mlxsw_sp, &rule->ventry,
+                                                    activity);
 }
 
 static const struct mlxsw_sp_acl_profile_ops mlxsw_sp_acl_tcam_mr_ops = {
@@ -1108,7 +1540,7 @@ static const struct mlxsw_sp_acl_profile_ops mlxsw_sp_acl_tcam_mr_ops = {
        .ruleset_bind           = mlxsw_sp_acl_tcam_mr_ruleset_bind,
        .ruleset_unbind         = mlxsw_sp_acl_tcam_mr_ruleset_unbind,
        .ruleset_group_id       = mlxsw_sp_acl_tcam_mr_ruleset_group_id,
-       .rule_priv_size         = mlxsw_sp_acl_tcam_mr_rule_priv_size,
+       .rule_priv_size         = sizeof(struct mlxsw_sp_acl_tcam_mr_rule),
        .rule_add               = mlxsw_sp_acl_tcam_mr_rule_add,
        .rule_del               = mlxsw_sp_acl_tcam_mr_rule_del,
        .rule_action_replace    = mlxsw_sp_acl_tcam_mr_rule_action_replace,
index 0858d5b..96bd42a 100644 (file)
@@ -17,6 +17,8 @@ struct mlxsw_sp_acl_tcam {
        unsigned long *used_groups;  /* bit array */
        unsigned int max_groups;
        unsigned int max_group_size;
+       struct list_head vregion_list;
+       u32 vregion_rehash_intrvl;   /* ms */
        unsigned long priv[0];
        /* priv has to be always the last item */
 };
@@ -26,6 +28,11 @@ int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp,
                           struct mlxsw_sp_acl_tcam *tcam);
 void mlxsw_sp_acl_tcam_fini(struct mlxsw_sp *mlxsw_sp,
                            struct mlxsw_sp_acl_tcam *tcam);
+u32 mlxsw_sp_acl_tcam_vregion_rehash_intrvl_get(struct mlxsw_sp *mlxsw_sp,
+                                               struct mlxsw_sp_acl_tcam *tcam);
+int mlxsw_sp_acl_tcam_vregion_rehash_intrvl_set(struct mlxsw_sp *mlxsw_sp,
+                                               struct mlxsw_sp_acl_tcam *tcam,
+                                               u32 val);
 int mlxsw_sp_acl_tcam_priority_get(struct mlxsw_sp *mlxsw_sp,
                                   struct mlxsw_sp_acl_rule_info *rulei,
                                   u32 *priority, bool fillup_priority);
@@ -43,7 +50,7 @@ struct mlxsw_sp_acl_profile_ops {
                               struct mlxsw_sp_port *mlxsw_sp_port,
                               bool ingress);
        u16 (*ruleset_group_id)(void *ruleset_priv);
-       size_t (*rule_priv_size)(struct mlxsw_sp *mlxsw_sp);
+       size_t rule_priv_size;
        int (*rule_add)(struct mlxsw_sp *mlxsw_sp,
                        void *ruleset_priv, void *rule_priv,
                        struct mlxsw_sp_acl_rule_info *rulei);
@@ -67,11 +74,10 @@ mlxsw_sp_acl_tcam_profile_ops(struct mlxsw_sp *mlxsw_sp,
        (MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN * BITS_PER_BYTE)
 
 struct mlxsw_sp_acl_tcam_group;
+struct mlxsw_sp_acl_tcam_vregion;
 
 struct mlxsw_sp_acl_tcam_region {
-       struct list_head list; /* Member of a TCAM group */
-       struct list_head chunk_list; /* List of chunks under this region */
-       struct mlxsw_sp_acl_tcam_group *group;
+       struct mlxsw_sp_acl_tcam_vregion *vregion;
        enum mlxsw_reg_ptar_key_type key_type;
        u16 id; /* ACL ID and region ID - they are same */
        char tcam_region_info[MLXSW_REG_PXXX_TCAM_REGION_INFO_LEN];
@@ -207,6 +213,7 @@ mlxsw_sp_acl_atcam_region_init(struct mlxsw_sp *mlxsw_sp,
                               struct mlxsw_sp_acl_atcam *atcam,
                               struct mlxsw_sp_acl_atcam_region *aregion,
                               struct mlxsw_sp_acl_tcam_region *region,
+                              void *hints_priv,
                               const struct mlxsw_sp_acl_ctcam_region_ops *ops);
 void mlxsw_sp_acl_atcam_region_fini(struct mlxsw_sp_acl_atcam_region *aregion);
 void mlxsw_sp_acl_atcam_chunk_init(struct mlxsw_sp_acl_atcam_region *aregion,
@@ -230,6 +237,9 @@ int mlxsw_sp_acl_atcam_init(struct mlxsw_sp *mlxsw_sp,
                            struct mlxsw_sp_acl_atcam *atcam);
 void mlxsw_sp_acl_atcam_fini(struct mlxsw_sp *mlxsw_sp,
                             struct mlxsw_sp_acl_atcam *atcam);
+void *
+mlxsw_sp_acl_atcam_rehash_hints_get(struct mlxsw_sp_acl_atcam_region *aregion);
+void mlxsw_sp_acl_atcam_rehash_hints_put(void *hints_priv);
 
 struct mlxsw_sp_acl_erp_delta;
 
@@ -260,7 +270,11 @@ void mlxsw_sp_acl_erp_bf_remove(struct mlxsw_sp *mlxsw_sp,
                                struct mlxsw_sp_acl_atcam_region *aregion,
                                struct mlxsw_sp_acl_erp_mask *erp_mask,
                                struct mlxsw_sp_acl_atcam_entry *aentry);
-int mlxsw_sp_acl_erp_region_init(struct mlxsw_sp_acl_atcam_region *aregion);
+void *
+mlxsw_sp_acl_erp_rehash_hints_get(struct mlxsw_sp_acl_atcam_region *aregion);
+void mlxsw_sp_acl_erp_rehash_hints_put(void *hints_priv);
+int mlxsw_sp_acl_erp_region_init(struct mlxsw_sp_acl_atcam_region *aregion,
+                                void *hints_priv);
 void mlxsw_sp_acl_erp_region_fini(struct mlxsw_sp_acl_atcam_region *aregion);
 int mlxsw_sp_acl_erps_init(struct mlxsw_sp *mlxsw_sp,
                           struct mlxsw_sp_acl_atcam *atcam);
index 9af9f5c..15f8044 100644 (file)
@@ -102,8 +102,7 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
                                return err;
                        }
                        break;
-               case FLOW_ACTION_VLAN_PUSH:
-               case FLOW_ACTION_VLAN_POP: {
+               case FLOW_ACTION_VLAN_MANGLE: {
                        u16 proto = be16_to_cpu(act->vlan.proto);
                        u8 prio = act->vlan.prio;
                        u16 vid = act->vlan.vid;
index 6754061..52fed8c 100644 (file)
@@ -3814,13 +3814,11 @@ mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
        struct mlxsw_sp_nexthop_group *nh_grp;
        struct mlxsw_sp_nexthop *nh;
        struct fib_nh *fib_nh;
-       size_t alloc_size;
        int i;
        int err;
 
-       alloc_size = sizeof(*nh_grp) +
-                    fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop);
-       nh_grp = kzalloc(alloc_size, GFP_KERNEL);
+       nh_grp = kzalloc(struct_size(nh_grp, nexthops, fi->fib_nhs),
+                        GFP_KERNEL);
        if (!nh_grp)
                return ERR_PTR(-ENOMEM);
        nh_grp->priv = fi;
@@ -5066,13 +5064,11 @@ mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
        struct mlxsw_sp_nexthop_group *nh_grp;
        struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
        struct mlxsw_sp_nexthop *nh;
-       size_t alloc_size;
        int i = 0;
        int err;
 
-       alloc_size = sizeof(*nh_grp) +
-                    fib6_entry->nrt6 * sizeof(struct mlxsw_sp_nexthop);
-       nh_grp = kzalloc(alloc_size, GFP_KERNEL);
+       nh_grp = kzalloc(struct_size(nh_grp, nexthops, fib6_entry->nrt6),
+                        GFP_KERNEL);
        if (!nh_grp)
                return ERR_PTR(-ENOMEM);
        INIT_LIST_HEAD(&nh_grp->fib_list);
@@ -6146,7 +6142,7 @@ static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
 
        mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
        err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
-       if (WARN_ON_ONCE(err))
+       if (err)
                return err;
 
        mlxsw_reg_ritr_enable_set(ritr_pl, false);
index 95e37de..1f492b7 100644 (file)
@@ -431,30 +431,10 @@ static void mlxsw_sp_bridge_vlan_put(struct mlxsw_sp_bridge_vlan *bridge_vlan)
                mlxsw_sp_bridge_vlan_destroy(bridge_vlan);
 }
 
-static void mlxsw_sp_port_bridge_flags_get(struct mlxsw_sp_bridge *bridge,
-                                          struct net_device *dev,
-                                          unsigned long *brport_flags)
-{
-       struct mlxsw_sp_bridge_port *bridge_port;
-
-       bridge_port = mlxsw_sp_bridge_port_find(bridge, dev);
-       if (WARN_ON(!bridge_port))
-               return;
-
-       memcpy(brport_flags, &bridge_port->flags, sizeof(*brport_flags));
-}
-
 static int mlxsw_sp_port_attr_get(struct net_device *dev,
                                  struct switchdev_attr *attr)
 {
-       struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
-       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
-
        switch (attr->id) {
-       case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS:
-               mlxsw_sp_port_bridge_flags_get(mlxsw_sp->bridge, attr->orig_dev,
-                                              &attr->u.brport_flags);
-               break;
        case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT:
                attr->u.brport_flags_support = BR_LEARNING | BR_FLOOD |
                                               BR_MCAST_FLOOD;
index b881f5d..6006d47 100644 (file)
@@ -391,7 +391,7 @@ ks8695_tx_irq(int irq, void *dev_id)
                                         ksp->tx_buffers[buff_n].dma_ptr,
                                         ksp->tx_buffers[buff_n].length,
                                         DMA_TO_DEVICE);
-                       dev_kfree_skb_irq(ksp->tx_buffers[buff_n].skb);
+                       dev_consume_skb_irq(ksp->tx_buffers[buff_n].skb);
                        ksp->tx_buffers[buff_n].skb = NULL;
                        ksp->tx_ring_used--;
                }
index b34055a..e165175 100644 (file)
@@ -81,11 +81,13 @@ static void moxart_mac_free_memory(struct net_device *ndev)
                                 priv->rx_buf_size, DMA_FROM_DEVICE);
 
        if (priv->tx_desc_base)
-               dma_free_coherent(NULL, TX_REG_DESC_SIZE * TX_DESC_NUM,
+               dma_free_coherent(&priv->pdev->dev,
+                                 TX_REG_DESC_SIZE * TX_DESC_NUM,
                                  priv->tx_desc_base, priv->tx_base);
 
        if (priv->rx_desc_base)
-               dma_free_coherent(NULL, RX_REG_DESC_SIZE * RX_DESC_NUM,
+               dma_free_coherent(&priv->pdev->dev,
+                                 RX_REG_DESC_SIZE * RX_DESC_NUM,
                                  priv->rx_desc_base, priv->rx_base);
 
        kfree(priv->tx_buf_base);
@@ -298,7 +300,7 @@ static void moxart_tx_finished(struct net_device *ndev)
                ndev->stats.tx_packets++;
                ndev->stats.tx_bytes += priv->tx_skb[tx_tail]->len;
 
-               dev_kfree_skb_irq(priv->tx_skb[tx_tail]);
+               dev_consume_skb_irq(priv->tx_skb[tx_tail]);
                priv->tx_skb[tx_tail] = NULL;
 
                tx_tail = TX_NEXT(tx_tail);
@@ -476,6 +478,7 @@ static int moxart_mac_probe(struct platform_device *pdev)
 
        priv = netdev_priv(ndev);
        priv->ndev = ndev;
+       priv->pdev = pdev;
 
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        ndev->base_addr = res->start;
@@ -491,7 +494,7 @@ static int moxart_mac_probe(struct platform_device *pdev)
        priv->tx_buf_size = TX_BUF_SIZE;
        priv->rx_buf_size = RX_BUF_SIZE;
 
-       priv->tx_desc_base = dma_alloc_coherent(NULL, TX_REG_DESC_SIZE *
+       priv->tx_desc_base = dma_alloc_coherent(&pdev->dev, TX_REG_DESC_SIZE *
                                                TX_DESC_NUM, &priv->tx_base,
                                                GFP_DMA | GFP_KERNEL);
        if (!priv->tx_desc_base) {
@@ -499,7 +502,7 @@ static int moxart_mac_probe(struct platform_device *pdev)
                goto init_fail;
        }
 
-       priv->rx_desc_base = dma_alloc_coherent(NULL, RX_REG_DESC_SIZE *
+       priv->rx_desc_base = dma_alloc_coherent(&pdev->dev, RX_REG_DESC_SIZE *
                                                RX_DESC_NUM, &priv->rx_base,
                                                GFP_DMA | GFP_KERNEL);
        if (!priv->rx_desc_base) {
index bee608b..bf4c302 100644 (file)
 #define LINK_STATUS            0x4
 
 struct moxart_mac_priv_t {
+       struct platform_device *pdev;
        void __iomem *base;
        unsigned int reg_maccr;
        unsigned int reg_imr;
index 19ce0e6..e0340f7 100644 (file)
@@ -1408,7 +1408,7 @@ myri10ge_tx_done(struct myri10ge_slice_state *ss, int mcp_index)
                if (skb) {
                        ss->stats.tx_bytes += skb->len;
                        ss->stats.tx_packets++;
-                       dev_kfree_skb_irq(skb);
+                       dev_consume_skb_irq(skb);
                        if (len)
                                pci_unmap_single(pdev,
                                                 dma_unmap_addr(&tx->info[idx],
index b9a1a9f..1a2634c 100644 (file)
@@ -2173,7 +2173,7 @@ static void netdev_tx_done(struct net_device *dev)
                                        np->tx_skbuff[entry]->len,
                                        PCI_DMA_TODEVICE);
                /* Free the original skb. */
-               dev_kfree_skb_irq(np->tx_skbuff[entry]);
+               dev_consume_skb_irq(np->tx_skbuff[entry]);
                np->tx_skbuff[entry] = NULL;
        }
        if (netif_queue_stopped(dev) &&
index 958fced..000009e 100644 (file)
@@ -1003,7 +1003,7 @@ static void do_tx_done(struct net_device *ndev)
                                        addr,
                                        len,
                                        PCI_DMA_TODEVICE);
-                       dev_kfree_skb_irq(skb);
+                       dev_consume_skb_irq(skb);
                        atomic_dec(&dev->nr_tx_skbs);
                } else
                        pci_unmap_page(dev->pci_dev,
index c805dcb..aaec009 100644 (file)
@@ -328,7 +328,7 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id)
                                }
 
                                /* We must free the original skb */
-                               dev_kfree_skb_irq(lp->tx_skb[entry]);
+                               dev_consume_skb_irq(lp->tx_skb[entry]);
                                lp->tx_skb[entry] = NULL;
                                /* and unmap DMA buffer */
                                dma_unmap_single(lp->device, lp->tx_laddr[entry], lp->tx_len[entry], DMA_TO_DEVICE);
index 82be900..feda964 100644 (file)
@@ -3055,7 +3055,7 @@ static void tx_intr_handler(struct fifo_info *fifo_data)
 
                /* Updating the statistics block */
                swstats->mem_freed += skb->truesize;
-               dev_kfree_skb_irq(skb);
+               dev_consume_skb_irq(skb);
 
                get_info.offset++;
                if (get_info.offset == get_info.fifo_len + 1)
index 0da7393..b877ace 100644 (file)
@@ -114,7 +114,7 @@ static inline void VXGE_COMPLETE_VPATH_TX(struct vxge_fifo *fifo)
 
                /* free SKBs */
                for (temp = completed; temp != skb_ptr; temp++)
-                       dev_kfree_skb_irq(*temp);
+                       dev_consume_skb_irq(*temp);
        } while (more);
 }
 
index 583e97c..eeda4ed 100644 (file)
@@ -345,7 +345,7 @@ static void nfp_fl_set_helper32(u32 value, u32 mask, u8 *p_exact, u8 *p_mask)
 }
 
 static int
-nfp_fl_set_eth(const struct flow_action_entry *act, int idx, u32 off,
+nfp_fl_set_eth(const struct flow_action_entry *act, u32 off,
               struct nfp_fl_set_eth *set_eth)
 {
        u32 exact, mask;
@@ -376,7 +376,7 @@ struct ipv4_ttl_word {
 };
 
 static int
-nfp_fl_set_ip4(const struct flow_action_entry *act, int idx, u32 off,
+nfp_fl_set_ip4(const struct flow_action_entry *act, u32 off,
               struct nfp_fl_set_ip4_addrs *set_ip_addr,
               struct nfp_fl_set_ip4_ttl_tos *set_ip_ttl_tos)
 {
@@ -505,7 +505,7 @@ nfp_fl_set_ip6_hop_limit_flow_label(u32 off, __be32 exact, __be32 mask,
 }
 
 static int
-nfp_fl_set_ip6(const struct flow_action_entry *act, int idx, u32 off,
+nfp_fl_set_ip6(const struct flow_action_entry *act, u32 off,
               struct nfp_fl_set_ipv6_addr *ip_dst,
               struct nfp_fl_set_ipv6_addr *ip_src,
               struct nfp_fl_set_ipv6_tc_hl_fl *ip_hl_fl)
@@ -541,7 +541,7 @@ nfp_fl_set_ip6(const struct flow_action_entry *act, int idx, u32 off,
 }
 
 static int
-nfp_fl_set_tport(const struct flow_action_entry *act, int idx, u32 off,
+nfp_fl_set_tport(const struct flow_action_entry *act, u32 off,
                 struct nfp_fl_set_tport *set_tport, int opcode)
 {
        u32 exact, mask;
@@ -598,8 +598,8 @@ nfp_fl_pedit(const struct flow_action_entry *act,
        struct nfp_fl_set_eth set_eth;
        size_t act_size = 0;
        u8 ip_proto = 0;
-       int idx, err;
        u32 offset;
+       int err;
 
        memset(&set_ip6_tc_hl_fl, 0, sizeof(set_ip6_tc_hl_fl));
        memset(&set_ip_ttl_tos, 0, sizeof(set_ip_ttl_tos));
@@ -614,22 +614,22 @@ nfp_fl_pedit(const struct flow_action_entry *act,
 
        switch (htype) {
        case TCA_PEDIT_KEY_EX_HDR_TYPE_ETH:
-               err = nfp_fl_set_eth(act, idx, offset, &set_eth);
+               err = nfp_fl_set_eth(act, offset, &set_eth);
                break;
        case TCA_PEDIT_KEY_EX_HDR_TYPE_IP4:
-               err = nfp_fl_set_ip4(act, idx, offset, &set_ip_addr,
+               err = nfp_fl_set_ip4(act, offset, &set_ip_addr,
                                     &set_ip_ttl_tos);
                break;
        case TCA_PEDIT_KEY_EX_HDR_TYPE_IP6:
-               err = nfp_fl_set_ip6(act, idx, offset, &set_ip6_dst,
+               err = nfp_fl_set_ip6(act, offset, &set_ip6_dst,
                                     &set_ip6_src, &set_ip6_tc_hl_fl);
                break;
        case TCA_PEDIT_KEY_EX_HDR_TYPE_TCP:
-               err = nfp_fl_set_tport(act, idx, offset, &set_tport,
+               err = nfp_fl_set_tport(act, offset, &set_tport,
                                       NFP_FL_ACTION_OPCODE_SET_TCP);
                break;
        case TCA_PEDIT_KEY_EX_HDR_TYPE_UDP:
-               err = nfp_fl_set_tport(act, idx, offset, &set_tport,
+               err = nfp_fl_set_tport(act, offset, &set_tport,
                                       NFP_FL_ACTION_OPCODE_SET_UDP);
                break;
        default:
index 56b22ea..cf9e111 100644 (file)
@@ -45,11 +45,9 @@ nfp_flower_cmsg_mac_repr_start(struct nfp_app *app, unsigned int num_ports)
 {
        struct nfp_flower_cmsg_mac_repr *msg;
        struct sk_buff *skb;
-       unsigned int size;
 
-       size = sizeof(*msg) + num_ports * sizeof(msg->ports[0]);
-       skb = nfp_flower_cmsg_alloc(app, size, NFP_FLOWER_CMSG_TYPE_MAC_REPR,
-                                   GFP_KERNEL);
+       skb = nfp_flower_cmsg_alloc(app, struct_size(msg, ports, num_ports),
+                                   NFP_FLOWER_CMSG_TYPE_MAC_REPR, GFP_KERNEL);
        if (!skb)
                return NULL;
 
index fe1469d..450d729 100644 (file)
@@ -187,7 +187,6 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
                if (ipv4_addrs.mask->dst != cpu_to_be32(~0))
                        return -EOPNOTSUPP;
 
-
                flow_rule_match_enc_ports(rule, &enc_ports);
                if (enc_ports.mask->dst != cpu_to_be16(~0))
                        return -EOPNOTSUPP;
index dddbb05..080a301 100644 (file)
@@ -178,7 +178,7 @@ static const struct nfp_devlink_versions_simple {
 } nfp_devlink_versions_hwinfo[] = {
        { DEVLINK_INFO_VERSION_GENERIC_BOARD_ID,        "assembly.partno", },
        { DEVLINK_INFO_VERSION_GENERIC_BOARD_REV,       "assembly.revision", },
-       { "board.vendor", /* fab */                     "assembly.vendor", },
+       { DEVLINK_INFO_VERSION_GENERIC_BOARD_MANUFACTURE, "assembly.vendor", },
        { "board.model", /* code name */                "assembly.model", },
 };
 
@@ -258,18 +258,33 @@ nfp_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req,
                     struct netlink_ext_ack *extack)
 {
        struct nfp_pf *pf = devlink_priv(devlink);
+       const char *sn, *vendor, *part;
        struct nfp_nsp *nsp;
        char *buf = NULL;
-       const char *sn;
        int err;
 
        err = devlink_info_driver_name_put(req, "nfp");
        if (err)
                return err;
 
+       vendor = nfp_hwinfo_lookup(pf->hwinfo, "assembly.vendor");
+       part = nfp_hwinfo_lookup(pf->hwinfo, "assembly.partno");
        sn = nfp_hwinfo_lookup(pf->hwinfo, "assembly.serial");
-       if (sn) {
-               err = devlink_info_serial_number_put(req, sn);
+       if (vendor && part && sn) {
+               char *buf;
+
+               buf = kmalloc(strlen(vendor) + strlen(part) + strlen(sn) + 1,
+                             GFP_KERNEL);
+               if (!buf)
+                       return -ENOMEM;
+
+               buf[0] = '\0';
+               strcat(buf, vendor);
+               strcat(buf, part);
+               strcat(buf, sn);
+
+               err = devlink_info_serial_number_put(req, buf);
+               kfree(buf);
                if (err)
                        return err;
        }
index c662c6f..67bf02b 100644 (file)
@@ -630,7 +630,7 @@ static int w90p910_ether_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
        if (!(w90p910_send_frame(dev, skb->data, skb->len))) {
                ether->skb = skb;
-               dev_kfree_skb_irq(skb);
+               dev_consume_skb_irq(skb);
                return 0;
        }
        return -EAGAIN;
index c9529c2..eee883a 100644 (file)
@@ -1337,7 +1337,7 @@ static irqreturn_t hamachi_interrupt(int irq, void *dev_instance)
                                                        leXX_to_cpu(hmp->tx_ring[entry].addr),
                                                        skb->len,
                                                        PCI_DMA_TODEVICE);
-                                               dev_kfree_skb_irq(skb);
+                                               dev_consume_skb_irq(skb);
                                                hmp->tx_skbuff[entry] = NULL;
                                        }
                                        hmp->tx_ring[entry].status_n_length = 0;
index 54224d1..6f8d658 100644 (file)
@@ -925,7 +925,7 @@ static irqreturn_t yellowfin_interrupt(int irq, void *dev_instance)
                        /* Free the original skb. */
                        pci_unmap_single(yp->pci_dev, le32_to_cpu(yp->tx_ring[entry].addr),
                                skb->len, PCI_DMA_TODEVICE);
-                       dev_kfree_skb_irq(skb);
+                       dev_consume_skb_irq(skb);
                        yp->tx_skbuff[entry] = NULL;
                }
                if (yp->tx_full &&
@@ -983,7 +983,7 @@ static irqreturn_t yellowfin_interrupt(int irq, void *dev_instance)
                                pci_unmap_single(yp->pci_dev,
                                        yp->tx_ring[entry<<1].addr, skb->len,
                                        PCI_DMA_TODEVICE);
-                               dev_kfree_skb_irq(skb);
+                               dev_consume_skb_irq(skb);
                                yp->tx_skbuff[entry] = 0;
                                /* Mark status as empty. */
                                yp->tx_status[entry].tx_errs = 0;
index 3b0955d..2d21c94 100644 (file)
@@ -53,7 +53,7 @@
 extern const struct qed_common_ops qed_common_ops_pass;
 
 #define QED_MAJOR_VERSION              8
-#define QED_MINOR_VERSION              33
+#define QED_MINOR_VERSION              37
 #define QED_REVISION_VERSION           0
 #define QED_ENGINEERING_VERSION                20
 
index 35c9f48..e61d1d9 100644 (file)
@@ -2135,12 +2135,12 @@ int qed_cxt_set_pf_params(struct qed_hwfn *p_hwfn, u32 rdma_tasks)
                struct qed_eth_pf_params *p_params =
                    &p_hwfn->pf_params.eth_pf_params;
 
-                       if (!p_params->num_vf_cons)
-                               p_params->num_vf_cons =
-                                   ETH_PF_PARAMS_VF_CONS_DEFAULT;
-                       qed_cxt_set_proto_cid_count(p_hwfn, PROTOCOLID_ETH,
-                                                   p_params->num_cons,
-                                                   p_params->num_vf_cons);
+               if (!p_params->num_vf_cons)
+                       p_params->num_vf_cons =
+                           ETH_PF_PARAMS_VF_CONS_DEFAULT;
+               qed_cxt_set_proto_cid_count(p_hwfn, PROTOCOLID_ETH,
+                                           p_params->num_cons,
+                                           p_params->num_vf_cons);
                p_hwfn->p_cxt_mngr->arfs_count = p_params->num_arfs_filters;
                break;
        }
index 417121e..37edaa8 100644 (file)
@@ -12796,6 +12796,7 @@ struct public_drv_mb {
 #define FW_MB_PARAM_GET_PF_RDMA_BOTH           0x3
 
 /* get MFW feature support response */
+#define FW_MB_PARAM_FEATURE_SUPPORT_SMARTLINQ  0x00000001
 #define FW_MB_PARAM_FEATURE_SUPPORT_EEE                0x00000002
 #define FW_MB_PARAM_FEATURE_SUPPORT_VLINK      0x00010000
 
index e68ca83..58be1c4 100644 (file)
@@ -2216,7 +2216,7 @@ static int qed_fill_eth_dev_info(struct qed_dev *cdev,
                        u16 num_queues = 0;
 
                        /* Since the feature controls only queue-zones,
-                        * make sure we have the contexts [rx, tx, xdp] to
+                        * make sure we have the contexts [rx, xdp, tcs] to
                         * match.
                         */
                        for_each_hwfn(cdev, i) {
@@ -2226,7 +2226,8 @@ static int qed_fill_eth_dev_info(struct qed_dev *cdev,
                                u16 cids;
 
                                cids = hwfn->pf_params.eth_pf_params.num_cons;
-                               num_queues += min_t(u16, l2_queues, cids / 3);
+                               cids /= (2 + info->num_tc);
+                               num_queues += min_t(u16, l2_queues, cids);
                        }
 
                        /* queues might theoretically be >256, but interrupts'
@@ -2870,7 +2871,8 @@ static int qed_get_coalesce(struct qed_dev *cdev, u16 *coal, void *handle)
        p_hwfn = p_cid->p_owner;
        rc = qed_get_queue_coalesce(p_hwfn, coal, handle);
        if (rc)
-               DP_NOTICE(p_hwfn, "Unable to read queue coalescing\n");
+               DP_VERBOSE(cdev, QED_MSG_DEBUG,
+                          "Unable to read queue coalescing\n");
 
        return rc;
 }
index b473526..f164d4a 100644 (file)
@@ -281,6 +281,8 @@ int qed_fill_dev_info(struct qed_dev *cdev,
                if (hw_info->b_wol_support == QED_WOL_SUPPORT_PME)
                        dev_info->wol_support = true;
 
+               dev_info->smart_an = qed_mcp_is_smart_an_supported(p_hwfn);
+
                dev_info->abs_pf_id = QED_LEADING_HWFN(cdev)->abs_pf_id;
        } else {
                qed_vf_get_fw_version(&cdev->hwfns[0], &dev_info->fw_major,
index bb85418..cc27fd6 100644 (file)
@@ -3654,6 +3654,12 @@ void qed_mcp_resc_lock_default_init(struct qed_resc_lock_params *p_lock,
        }
 }
 
+bool qed_mcp_is_smart_an_supported(struct qed_hwfn *p_hwfn)
+{
+       return !!(p_hwfn->mcp_info->capabilities &
+                 FW_MB_PARAM_FEATURE_SUPPORT_SMARTLINQ);
+}
+
 int qed_mcp_get_capabilities(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
        u32 mcp_resp;
index 6e1d72a..2799e67 100644 (file)
@@ -1148,6 +1148,16 @@ void qed_mcp_resc_lock_default_init(struct qed_resc_lock_params *p_lock,
                                    struct qed_resc_unlock_params *p_unlock,
                                    enum qed_resc_lock
                                    resource, bool b_is_permanent);
+
+/**
+ * @brief - Return whether management firmware support smart AN
+ *
+ * @param p_hwfn
+ *
+ * @return bool - true if feature is supported.
+ */
+bool qed_mcp_is_smart_an_supported(struct qed_hwfn *p_hwfn);
+
 /**
  * @brief Learn of supported MFW features; To be done during early init
  *
index 4179c90..96ab77a 100644 (file)
@@ -382,6 +382,7 @@ void qed_consq_setup(struct qed_hwfn *p_hwfn);
  * @param p_hwfn
  */
 void qed_consq_free(struct qed_hwfn *p_hwfn);
+int qed_spq_pend_post(struct qed_hwfn *p_hwfn);
 
 /**
  * @file
index 888274f..5a495fd 100644 (file)
@@ -604,6 +604,9 @@ int qed_sp_pf_update_stag(struct qed_hwfn *p_hwfn)
 
        p_ent->ramrod.pf_update.update_mf_vlan_flag = true;
        p_ent->ramrod.pf_update.mf_vlan = cpu_to_le16(p_hwfn->hw_info.ovlan);
+       if (test_bit(QED_MF_UFP_SPECIFIC, &p_hwfn->cdev->mf_bits))
+               p_ent->ramrod.pf_update.mf_vlan |=
+                       cpu_to_le16(((u16)p_hwfn->ufp_info.tc << 13));
 
        return qed_spq_post(p_hwfn, p_ent, NULL);
 }
index 3e0f7c4..79b311b 100644 (file)
@@ -397,6 +397,11 @@ int qed_eq_completion(struct qed_hwfn *p_hwfn, void *cookie)
 
        qed_eq_prod_update(p_hwfn, qed_chain_get_prod_idx(p_chain));
 
+       /* Attempt to post pending requests */
+       spin_lock_bh(&p_hwfn->p_spq->lock);
+       rc = qed_spq_pend_post(p_hwfn);
+       spin_unlock_bh(&p_hwfn->p_spq->lock);
+
        return rc;
 }
 
@@ -767,7 +772,7 @@ static int qed_spq_post_list(struct qed_hwfn *p_hwfn,
        return 0;
 }
 
-static int qed_spq_pend_post(struct qed_hwfn *p_hwfn)
+int qed_spq_pend_post(struct qed_hwfn *p_hwfn)
 {
        struct qed_spq *p_spq = p_hwfn->p_spq;
        struct qed_spq_entry *p_ent = NULL;
@@ -927,7 +932,6 @@ int qed_spq_completion(struct qed_hwfn *p_hwfn,
        struct qed_spq_entry    *p_ent = NULL;
        struct qed_spq_entry    *tmp;
        struct qed_spq_entry    *found = NULL;
-       int                     rc;
 
        if (!p_hwfn)
                return -EINVAL;
@@ -985,12 +989,7 @@ int qed_spq_completion(struct qed_hwfn *p_hwfn,
                 */
                qed_spq_return_entry(p_hwfn, found);
 
-       /* Attempt to post pending requests */
-       spin_lock_bh(&p_spq->lock);
-       rc = qed_spq_pend_post(p_hwfn);
-       spin_unlock_bh(&p_spq->lock);
-
-       return rc;
+       return 0;
 }
 
 int qed_consq_alloc(struct qed_hwfn *p_hwfn)
index 8434164..63a7816 100644 (file)
@@ -56,7 +56,7 @@
 #include <net/tc_act/tc_gact.h>
 
 #define QEDE_MAJOR_VERSION             8
-#define QEDE_MINOR_VERSION             33
+#define QEDE_MINOR_VERSION             37
 #define QEDE_REVISION_VERSION          0
 #define QEDE_ENGINEERING_VERSION       20
 #define DRV_MODULE_VERSION __stringify(QEDE_MAJOR_VERSION) "." \
@@ -497,6 +497,9 @@ struct qede_reload_args {
 
 /* Datapath functions definition */
 netdev_tx_t qede_start_xmit(struct sk_buff *skb, struct net_device *ndev);
+u16 qede_select_queue(struct net_device *dev, struct sk_buff *skb,
+                     struct net_device *sb_dev,
+                     select_queue_fallback_t fallback);
 netdev_features_t qede_features_check(struct sk_buff *skb,
                                      struct net_device *dev,
                                      netdev_features_t features);
index 16331c6..c623808 100644 (file)
@@ -186,11 +186,13 @@ static const struct {
 
 enum {
        QEDE_PRI_FLAG_CMT,
+       QEDE_PRI_FLAG_SMART_AN_SUPPORT, /* MFW supports SmartAN */
        QEDE_PRI_FLAG_LEN,
 };
 
 static const char qede_private_arr[QEDE_PRI_FLAG_LEN][ETH_GSTRING_LEN] = {
        "Coupled-Function",
+       "SmartAN capable",
 };
 
 enum qede_ethtool_tests {
@@ -404,8 +406,15 @@ static int qede_get_sset_count(struct net_device *dev, int stringset)
 static u32 qede_get_priv_flags(struct net_device *dev)
 {
        struct qede_dev *edev = netdev_priv(dev);
+       u32 flags = 0;
 
-       return (!!(edev->dev_info.common.num_hwfns > 1)) << QEDE_PRI_FLAG_CMT;
+       if (edev->dev_info.common.num_hwfns > 1)
+               flags |= BIT(QEDE_PRI_FLAG_CMT);
+
+       if (edev->dev_info.common.smart_an)
+               flags |= BIT(QEDE_PRI_FLAG_SMART_AN_SUPPORT);
+
+       return flags;
 }
 
 struct qede_link_mode_mapping {
index bdf816f..31b046e 100644 (file)
@@ -1695,6 +1695,19 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb, struct net_device *ndev)
        return NETDEV_TX_OK;
 }
 
+u16 qede_select_queue(struct net_device *dev, struct sk_buff *skb,
+                     struct net_device *sb_dev,
+                     select_queue_fallback_t fallback)
+{
+       struct qede_dev *edev = netdev_priv(dev);
+       int total_txq;
+
+       total_txq = QEDE_TSS_COUNT(edev) * edev->dev_info.num_tc;
+
+       return QEDE_TSS_COUNT(edev) ?
+               fallback(dev, skb, NULL) % total_txq :  0;
+}
+
 /* 8B udp header + 8B base tunnel header + 32B option length */
 #define QEDE_MAX_TUN_HDR_LEN 48
 
index 6b4d966..02a97c6 100644 (file)
@@ -621,6 +621,7 @@ static const struct net_device_ops qede_netdev_ops = {
        .ndo_open = qede_open,
        .ndo_stop = qede_close,
        .ndo_start_xmit = qede_start_xmit,
+       .ndo_select_queue = qede_select_queue,
        .ndo_set_rx_mode = qede_set_rx_mode,
        .ndo_set_mac_address = qede_set_mac_addr,
        .ndo_validate_addr = eth_validate_addr,
@@ -656,6 +657,7 @@ static const struct net_device_ops qede_netdev_vf_ops = {
        .ndo_open = qede_open,
        .ndo_stop = qede_close,
        .ndo_start_xmit = qede_start_xmit,
+       .ndo_select_queue = qede_select_queue,
        .ndo_set_rx_mode = qede_set_rx_mode,
        .ndo_set_mac_address = qede_set_mac_addr,
        .ndo_validate_addr = eth_validate_addr,
@@ -674,6 +676,7 @@ static const struct net_device_ops qede_netdev_vf_xdp_ops = {
        .ndo_open = qede_open,
        .ndo_stop = qede_close,
        .ndo_start_xmit = qede_start_xmit,
+       .ndo_select_queue = qede_select_queue,
        .ndo_set_rx_mode = qede_set_rx_mode,
        .ndo_set_mac_address = qede_set_mac_addr,
        .ndo_validate_addr = eth_validate_addr,
index 5edbd53..a6886cc 100644 (file)
@@ -249,8 +249,8 @@ static void ql_update_stats(struct ql_adapter *qdev)
 
        spin_lock(&qdev->stats_lock);
        if (ql_sem_spinlock(qdev, qdev->xg_sem_mask)) {
-                       netif_err(qdev, drv, qdev->ndev,
-                                 "Couldn't get xgmac sem.\n");
+               netif_err(qdev, drv, qdev->ndev,
+                         "Couldn't get xgmac sem.\n");
                goto quit;
        }
        /*
index 059ba94..096515c 100644 (file)
@@ -1159,10 +1159,10 @@ static void ql_update_lbq(struct ql_adapter *qdev, struct rx_ring *rx_ring)
 
                        map = lbq_desc->p.pg_chunk.map +
                                lbq_desc->p.pg_chunk.offset;
-                               dma_unmap_addr_set(lbq_desc, mapaddr, map);
+                       dma_unmap_addr_set(lbq_desc, mapaddr, map);
                        dma_unmap_len_set(lbq_desc, maplen,
                                        rx_ring->lbq_buf_size);
-                               *lbq_desc->addr = cpu_to_le64(map);
+                       *lbq_desc->addr = cpu_to_le64(map);
 
                        pci_dma_sync_single_for_device(qdev->pdev, map,
                                                rx_ring->lbq_buf_size,
index 8d79031..20d2400 100644 (file)
@@ -1204,7 +1204,7 @@ void emac_mac_tx_process(struct emac_adapter *adpt, struct emac_tx_queue *tx_q)
                if (tpbuf->skb) {
                        pkts_compl++;
                        bytes_compl += tpbuf->skb->len;
-                       dev_kfree_skb_irq(tpbuf->skb);
+                       dev_consume_skb_irq(tpbuf->skb);
                        tpbuf->skb = NULL;
                }
 
index 1dd7213..10bab1f 100644 (file)
@@ -1288,11 +1288,13 @@ static u8 rtl8168d_efuse_read(struct rtl8169_private *tp, int reg_addr)
 static void rtl_ack_events(struct rtl8169_private *tp, u16 bits)
 {
        RTL_W16(tp, IntrStatus, bits);
+       mmiowb();
 }
 
 static void rtl_irq_disable(struct rtl8169_private *tp)
 {
        RTL_W16(tp, IntrMask, 0);
+       mmiowb();
 }
 
 #define RTL_EVENT_NAPI_RX      (RxOK | RxErr)
@@ -6192,7 +6194,6 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb,
        struct device *d = tp_to_dev(tp);
        dma_addr_t mapping;
        u32 opts[2], len;
-       bool stop_queue;
        int frags;
 
        if (unlikely(!rtl_tx_slots_avail(tp, skb_shinfo(skb)->nr_frags))) {
@@ -6234,6 +6235,8 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb,
 
        txd->opts2 = cpu_to_le32(opts[1]);
 
+       netdev_sent_queue(dev, skb->len);
+
        skb_tx_timestamp(skb);
 
        /* Force memory writes to complete before releasing descriptor */
@@ -6246,14 +6249,16 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb,
 
        tp->cur_tx += frags + 1;
 
-       stop_queue = !rtl_tx_slots_avail(tp, MAX_SKB_FRAGS);
-       if (unlikely(stop_queue))
-               netif_stop_queue(dev);
+       RTL_W8(tp, TxPoll, NPQ);
 
-       if (__netdev_sent_queue(dev, skb->len, skb->xmit_more))
-               RTL_W8(tp, TxPoll, NPQ);
+       mmiowb();
 
-       if (unlikely(stop_queue)) {
+       if (!rtl_tx_slots_avail(tp, MAX_SKB_FRAGS)) {
+               /* Avoid wrongly optimistic queue wake-up: rtl_tx thread must
+                * not miss a ring update when it notices a stopped queue.
+                */
+               smp_wmb();
+               netif_stop_queue(dev);
                /* Sync with rtl_tx:
                 * - publish queue status and cur_tx ring index (write barrier)
                 * - refresh dirty_tx ring index (read barrier).
@@ -6592,7 +6597,9 @@ static int rtl8169_poll(struct napi_struct *napi, int budget)
 
        if (work_done < budget) {
                napi_complete_done(napi, work_done);
+
                rtl_irq_enable(tp);
+               mmiowb();
        }
 
        return work_done;
index 748fb12..2b2e1c4 100644 (file)
@@ -109,8 +109,6 @@ struct rocker_world_ops {
        int (*port_attr_bridge_flags_set)(struct rocker_port *rocker_port,
                                          unsigned long brport_flags,
                                          struct switchdev_trans *trans);
-       int (*port_attr_bridge_flags_get)(const struct rocker_port *rocker_port,
-                                         unsigned long *p_brport_flags);
        int (*port_attr_bridge_flags_support_get)(const struct rocker_port *
                                                  rocker_port,
                                                  unsigned long *
index 66f72f8..5ce8d5a 100644 (file)
@@ -1583,17 +1583,6 @@ rocker_world_port_attr_bridge_flags_set(struct rocker_port *rocker_port,
 }
 
 static int
-rocker_world_port_attr_bridge_flags_get(const struct rocker_port *rocker_port,
-                                       unsigned long *p_brport_flags)
-{
-       struct rocker_world_ops *wops = rocker_port->rocker->wops;
-
-       if (!wops->port_attr_bridge_flags_get)
-               return -EOPNOTSUPP;
-       return wops->port_attr_bridge_flags_get(rocker_port, p_brport_flags);
-}
-
-static int
 rocker_world_port_attr_bridge_flags_support_get(const struct rocker_port *
                                                rocker_port,
                                                unsigned long *
@@ -2061,10 +2050,6 @@ static int rocker_port_attr_get(struct net_device *dev,
        int err = 0;
 
        switch (attr->id) {
-       case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS:
-               err = rocker_world_port_attr_bridge_flags_get(rocker_port,
-                                                             &attr->u.brport_flags);
-               break;
        case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT:
                err = rocker_world_port_attr_bridge_flags_support_get(rocker_port,
                                                                      &attr->u.brport_flags_support);
index bea7895..fa296a7 100644 (file)
@@ -2512,16 +2512,6 @@ static int ofdpa_port_attr_bridge_flags_set(struct rocker_port *rocker_port,
 }
 
 static int
-ofdpa_port_attr_bridge_flags_get(const struct rocker_port *rocker_port,
-                                unsigned long *p_brport_flags)
-{
-       const struct ofdpa_port *ofdpa_port = rocker_port->wpriv;
-
-       *p_brport_flags = ofdpa_port->brport_flags;
-       return 0;
-}
-
-static int
 ofdpa_port_attr_bridge_flags_support_get(const struct rocker_port *
                                         rocker_port,
                                         unsigned long *
@@ -2823,7 +2813,6 @@ struct rocker_world_ops rocker_ofdpa_ops = {
        .port_stop = ofdpa_port_stop,
        .port_attr_stp_state_set = ofdpa_port_attr_stp_state_set,
        .port_attr_bridge_flags_set = ofdpa_port_attr_bridge_flags_set,
-       .port_attr_bridge_flags_get = ofdpa_port_attr_bridge_flags_get,
        .port_attr_bridge_flags_support_get = ofdpa_port_attr_bridge_flags_support_get,
        .port_attr_bridge_ageing_time_set = ofdpa_port_attr_bridge_ageing_time_set,
        .port_obj_vlan_add = ofdpa_port_obj_vlan_add,
index 6062e99..e888b47 100644 (file)
@@ -6046,6 +6046,8 @@ static const struct efx_ef10_nvram_type_info efx_ef10_nvram_types[] = {
        { NVRAM_PARTITION_TYPE_DYNCONFIG_DEFAULTS, 0,    0, "sfc_dynamic_cfg_dflt" },
        { NVRAM_PARTITION_TYPE_ROMCONFIG_DEFAULTS, 0,    0, "sfc_exp_rom_cfg_dflt" },
        { NVRAM_PARTITION_TYPE_STATUS,             0,    0, "sfc_status" },
+       { NVRAM_PARTITION_TYPE_BUNDLE,             0,    0, "sfc_bundle" },
+       { NVRAM_PARTITION_TYPE_BUNDLE_METADATA,    0,    0, "sfc_bundle_metadata" },
 };
 #define EF10_NVRAM_PARTITION_COUNT     ARRAY_SIZE(efx_ef10_nvram_types)
 
@@ -6123,7 +6125,7 @@ static int efx_ef10_mtd_probe_partition(struct efx_nic *efx,
 static int efx_ef10_mtd_probe(struct efx_nic *efx)
 {
        MCDI_DECLARE_BUF(outbuf, MC_CMD_NVRAM_PARTITIONS_OUT_LENMAX);
-       DECLARE_BITMAP(found, EF10_NVRAM_PARTITION_COUNT);
+       DECLARE_BITMAP(found, EF10_NVRAM_PARTITION_COUNT) = { 0 };
        struct efx_mcdi_mtd_partition *parts;
        size_t outlen, n_parts_total, i, n_parts;
        unsigned int type;
index 3643015..bc655ff 100644 (file)
@@ -915,7 +915,7 @@ rollback:
 
 void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue)
 {
-       mod_timer(&rx_queue->slow_fill, jiffies + msecs_to_jiffies(100));
+       mod_timer(&rx_queue->slow_fill, jiffies + msecs_to_jiffies(10));
 }
 
 static bool efx_default_channel_want_txqs(struct efx_channel *channel)
index 3839eec..20a5523 100644 (file)
  * subset of the information stored in this partition.
  */
 #define          NVRAM_PARTITION_TYPE_FRU_INFORMATION 0x1d00
+/* enum: Bundle image partition */
+#define          NVRAM_PARTITION_TYPE_BUNDLE 0x1e00
+/* enum: Bundle metadata partition that holds additional information related to
+ * a bundle update in TLV format
+ */
+#define          NVRAM_PARTITION_TYPE_BUNDLE_METADATA 0x1e01
+/* enum: Bundle update non-volatile log output partition */
+#define          NVRAM_PARTITION_TYPE_BUNDLE_LOG 0x1e02
 /* enum: Start of reserved value range (firmware may use for any purpose) */
 #define          NVRAM_PARTITION_TYPE_RESERVED_VALUES_MIN 0xff00
 /* enum: End of reserved value range (firmware may use for any purpose) */
index 396ff01..8702ab4 100644 (file)
@@ -360,8 +360,7 @@ void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, bool atomic)
                rc = efx_init_rx_buffers(rx_queue, atomic);
                if (unlikely(rc)) {
                        /* Ensure that we don't leave the rx queue empty */
-                       if (rx_queue->added_count == rx_queue->removed_count)
-                               efx_schedule_slow_fill(rx_queue);
+                       efx_schedule_slow_fill(rx_queue);
                        goto out;
                }
        } while ((space -= batch_size) >= batch_size);
index 22eb059..06c8f28 100644 (file)
@@ -471,7 +471,7 @@ static int efx_tx_tso_fallback(struct efx_tx_queue *tx_queue,
        if (IS_ERR(segments))
                return PTR_ERR(segments);
 
-       dev_kfree_skb_any(skb);
+       dev_consume_skb_any(skb);
        skb = segments;
 
        while (skb) {
index 3140999..358e66b 100644 (file)
@@ -666,7 +666,7 @@ static inline void ioc3_tx(struct net_device *dev)
                packets++;
                skb = ip->tx_skbs[o_entry];
                bytes += skb->len;
-               dev_kfree_skb_irq(skb);
+               dev_consume_skb_irq(skb);
                ip->tx_skbs[o_entry] = NULL;
 
                o_entry = (o_entry + 1) & 127;          /* Next */
index 0e1b7e9..f425ab5 100644 (file)
@@ -68,6 +68,8 @@ module_param(timeout, int, 0);
  * packets in and out, so there is place for a packet
  */
 struct meth_private {
+       struct platform_device *pdev;
+
        /* in-memory copy of MAC Control register */
        u64 mac_ctrl;
 
@@ -211,8 +213,8 @@ static void meth_check_link(struct net_device *dev)
 static int meth_init_tx_ring(struct meth_private *priv)
 {
        /* Init TX ring */
-       priv->tx_ring = dma_alloc_coherent(NULL, TX_RING_BUFFER_SIZE,
-                                          &priv->tx_ring_dma, GFP_ATOMIC);
+       priv->tx_ring = dma_alloc_coherent(&priv->pdev->dev,
+                       TX_RING_BUFFER_SIZE, &priv->tx_ring_dma, GFP_KERNEL);
        if (!priv->tx_ring)
                return -ENOMEM;
 
@@ -236,7 +238,7 @@ static int meth_init_rx_ring(struct meth_private *priv)
                priv->rx_ring[i]=(rx_packet*)(priv->rx_skbs[i]->head);
                /* I'll need to re-sync it after each RX */
                priv->rx_ring_dmas[i] =
-                       dma_map_single(NULL, priv->rx_ring[i],
+                       dma_map_single(&priv->pdev->dev, priv->rx_ring[i],
                                       METH_RX_BUFF_SIZE, DMA_FROM_DEVICE);
                mace->eth.rx_fifo = priv->rx_ring_dmas[i];
        }
@@ -253,7 +255,7 @@ static void meth_free_tx_ring(struct meth_private *priv)
                        dev_kfree_skb(priv->tx_skbs[i]);
                priv->tx_skbs[i] = NULL;
        }
-       dma_free_coherent(NULL, TX_RING_BUFFER_SIZE, priv->tx_ring,
+       dma_free_coherent(&priv->pdev->dev, TX_RING_BUFFER_SIZE, priv->tx_ring,
                          priv->tx_ring_dma);
 }
 
@@ -263,7 +265,7 @@ static void meth_free_rx_ring(struct meth_private *priv)
        int i;
 
        for (i = 0; i < RX_RING_ENTRIES; i++) {
-               dma_unmap_single(NULL, priv->rx_ring_dmas[i],
+               dma_unmap_single(&priv->pdev->dev, priv->rx_ring_dmas[i],
                                 METH_RX_BUFF_SIZE, DMA_FROM_DEVICE);
                priv->rx_ring[i] = 0;
                priv->rx_ring_dmas[i] = 0;
@@ -393,7 +395,8 @@ static void meth_rx(struct net_device* dev, unsigned long int_status)
                fifo_rptr = (fifo_rptr - 1) & 0x0f;
        }
        while (priv->rx_write != fifo_rptr) {
-               dma_unmap_single(NULL, priv->rx_ring_dmas[priv->rx_write],
+               dma_unmap_single(&priv->pdev->dev,
+                                priv->rx_ring_dmas[priv->rx_write],
                                 METH_RX_BUFF_SIZE, DMA_FROM_DEVICE);
                status = priv->rx_ring[priv->rx_write]->status.raw;
 #if MFE_DEBUG
@@ -454,7 +457,8 @@ static void meth_rx(struct net_device* dev, unsigned long int_status)
                priv->rx_ring[priv->rx_write] = (rx_packet*)skb->head;
                priv->rx_ring[priv->rx_write]->status.raw = 0;
                priv->rx_ring_dmas[priv->rx_write] =
-                       dma_map_single(NULL, priv->rx_ring[priv->rx_write],
+                       dma_map_single(&priv->pdev->dev,
+                                      priv->rx_ring[priv->rx_write],
                                       METH_RX_BUFF_SIZE, DMA_FROM_DEVICE);
                mace->eth.rx_fifo = priv->rx_ring_dmas[priv->rx_write];
                ADVANCE_RX_PTR(priv->rx_write);
@@ -521,7 +525,7 @@ static void meth_tx_cleanup(struct net_device* dev, unsigned long int_status)
                        DPRINTK("RPTR points us here, but packet not done?\n");
                        break;
                }
-               dev_kfree_skb_irq(skb);
+               dev_consume_skb_irq(skb);
                priv->tx_skbs[priv->tx_read] = NULL;
                priv->tx_ring[priv->tx_read].header.raw = 0;
                priv->tx_read = (priv->tx_read+1)&(TX_RING_ENTRIES-1);
@@ -637,7 +641,7 @@ static void meth_tx_1page_prepare(struct meth_private *priv,
        }
 
        /* first page */
-       catbuf = dma_map_single(NULL, buffer_data, buffer_len,
+       catbuf = dma_map_single(&priv->pdev->dev, buffer_data, buffer_len,
                                DMA_TO_DEVICE);
        desc->data.cat_buf[0].form.start_addr = catbuf >> 3;
        desc->data.cat_buf[0].form.len = buffer_len - 1;
@@ -663,12 +667,12 @@ static void meth_tx_2page_prepare(struct meth_private *priv,
        }
 
        /* first page */
-       catbuf1 = dma_map_single(NULL, buffer1_data, buffer1_len,
+       catbuf1 = dma_map_single(&priv->pdev->dev, buffer1_data, buffer1_len,
                                 DMA_TO_DEVICE);
        desc->data.cat_buf[0].form.start_addr = catbuf1 >> 3;
        desc->data.cat_buf[0].form.len = buffer1_len - 1;
        /* second page */
-       catbuf2 = dma_map_single(NULL, buffer2_data, buffer2_len,
+       catbuf2 = dma_map_single(&priv->pdev->dev, buffer2_data, buffer2_len,
                                 DMA_TO_DEVICE);
        desc->data.cat_buf[1].form.start_addr = catbuf2 >> 3;
        desc->data.cat_buf[1].form.len = buffer2_len - 1;
@@ -840,6 +844,7 @@ static int meth_probe(struct platform_device *pdev)
        memcpy(dev->dev_addr, o2meth_eaddr, ETH_ALEN);
 
        priv = netdev_priv(dev);
+       priv->pdev = pdev;
        spin_lock_init(&priv->meth_lock);
        SET_NETDEV_DEV(dev, &pdev->dev);
 
index 808cf98..5b351be 100644 (file)
@@ -714,7 +714,7 @@ static void sis190_tx_interrupt(struct net_device *dev,
 
                sis190_unmap_tx_skb(tp->pci_dev, skb, txd);
                tp->Tx_skbuff[entry] = NULL;
-               dev_kfree_skb_irq(skb);
+               dev_consume_skb_irq(skb);
        }
 
        if (tp->dirty_tx != dirty_tx) {
index 4bb89f7..6073387 100644 (file)
@@ -1927,7 +1927,7 @@ static void sis900_finish_xmit (struct net_device *net_dev)
                pci_unmap_single(sis_priv->pci_dev,
                        sis_priv->tx_ring[entry].bufptr, skb->len,
                        PCI_DMA_TODEVICE);
-               dev_kfree_skb_irq(skb);
+               dev_consume_skb_irq(skb);
                sis_priv->tx_skbuff[entry] = NULL;
                sis_priv->tx_ring[entry].bufptr = 0;
                sis_priv->tx_ring[entry].cmdsts = 0;
index 15c62c1..be47d86 100644 (file)
@@ -1037,7 +1037,7 @@ static void epic_tx(struct net_device *dev, struct epic_private *ep)
                skb = ep->tx_skbuff[entry];
                pci_unmap_single(ep->pci_dev, ep->tx_ring[entry].bufaddr,
                                 skb->len, PCI_DMA_TODEVICE);
-               dev_kfree_skb_irq(skb);
+               dev_consume_skb_irq(skb);
                ep->tx_skbuff[entry] = NULL;
        }
 
index 8355dfb..b550e62 100644 (file)
@@ -1188,7 +1188,7 @@ smc911x_tx_dma_irq(void *data)
 
        DBG(SMC_DEBUG_TX | SMC_DEBUG_DMA, dev, "TX DMA irq handler\n");
        BUG_ON(skb == NULL);
-       dma_unmap_single(NULL, tx_dmabuf, tx_dmalen, DMA_TO_DEVICE);
+       dma_unmap_single(lp->dev, tx_dmabuf, tx_dmalen, DMA_TO_DEVICE);
        netif_trans_update(dev);
        dev_kfree_skb_irq(skb);
        lp->current_tx_skb = NULL;
@@ -1219,7 +1219,7 @@ smc911x_rx_dma_irq(void *data)
 
        DBG(SMC_DEBUG_FUNC, dev, "--> %s\n", __func__);
        DBG(SMC_DEBUG_RX | SMC_DEBUG_DMA, dev, "RX DMA irq handler\n");
-       dma_unmap_single(NULL, rx_dmabuf, rx_dmalen, DMA_FROM_DEVICE);
+       dma_unmap_single(lp->dev, rx_dmabuf, rx_dmalen, DMA_FROM_DEVICE);
        BUG_ON(skb == NULL);
        lp->current_rx_skb = NULL;
        PRINT_PKT(skb->data, skb->len);
index 20299f6..90045ff 100644 (file)
@@ -268,7 +268,7 @@ static int dwmac4_wrback_get_rx_timestamp_status(void *desc, void *next_desc,
        int ret = -EINVAL;
 
        /* Get the status from normal w/b descriptor */
-       if (likely(p->des3 & TDES3_RS1V)) {
+       if (likely(le32_to_cpu(p->des3) & RDES3_RDES1_VALID)) {
                if (likely(le32_to_cpu(p->des1) & RDES1_TIMESTAMP_AVAILABLE)) {
                        int i = 0;
 
index d1f61c2..5d85742 100644 (file)
@@ -721,8 +721,11 @@ static u32 stmmac_usec2riwt(u32 usec, struct stmmac_priv *priv)
 {
        unsigned long clk = clk_get_rate(priv->plat->stmmac_clk);
 
-       if (!clk)
-               return 0;
+       if (!clk) {
+               clk = priv->plat->clk_ref_rate;
+               if (!clk)
+                       return 0;
+       }
 
        return (usec * (clk / 1000000)) / 256;
 }
@@ -731,8 +734,11 @@ static u32 stmmac_riwt2usec(u32 riwt, struct stmmac_priv *priv)
 {
        unsigned long clk = clk_get_rate(priv->plat->stmmac_clk);
 
-       if (!clk)
-               return 0;
+       if (!clk) {
+               clk = priv->plat->clk_ref_rate;
+               if (!clk)
+                       return 0;
+       }
 
        return (riwt * 256) / (clk / 1000000);
 }
index effff17..f12dd59 100644 (file)
@@ -3039,10 +3039,22 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 
        tx_q = &priv->tx_queue[queue];
 
+       if (priv->tx_path_in_lpi_mode)
+               stmmac_disable_eee_mode(priv);
+
        /* Manage oversized TCP frames for GMAC4 device */
        if (skb_is_gso(skb) && priv->tso) {
-               if (skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))
+               if (skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) {
+                       /*
+                        * There is no way to determine the number of TSO
+                        * capable Queues. Let's use always the Queue 0
+                        * because if TSO is supported then at least this
+                        * one will be capable.
+                        */
+                       skb_set_queue_mapping(skb, 0);
+
                        return stmmac_tso_xmit(skb, dev);
+               }
        }
 
        if (unlikely(stmmac_tx_avail(priv, queue) < nfrags + 1)) {
@@ -3057,9 +3069,6 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
                return NETDEV_TX_BUSY;
        }
 
-       if (priv->tx_path_in_lpi_mode)
-               stmmac_disable_eee_mode(priv);
-
        entry = tx_q->cur_tx;
        first_entry = entry;
        WARN_ON(tx_q->tx_skbuff[first_entry]);
index 7ec4eb7..6fc05c1 100644 (file)
@@ -1898,7 +1898,7 @@ static inline void cas_tx_ringN(struct cas *cp, int ring, int limit)
                cp->net_stats[ring].tx_packets++;
                cp->net_stats[ring].tx_bytes += skb->len;
                spin_unlock(&cp->stat_lock[ring]);
-               dev_kfree_skb_irq(skb);
+               dev_consume_skb_irq(skb);
        }
        cp->tx_old[ring] = entry;
 
index 720b7ac..e9b757b 100644 (file)
@@ -781,7 +781,7 @@ static void bigmac_tx(struct bigmac *bp)
 
                DTX(("skb(%p) ", skb));
                bp->tx_skbs[elem] = NULL;
-               dev_kfree_skb_irq(skb);
+               dev_consume_skb_irq(skb);
 
                elem = NEXT_TX(elem);
        }
index ff641cf..d007dfe 100644 (file)
@@ -1962,7 +1962,7 @@ static void happy_meal_tx(struct happy_meal *hp)
                        this = &txbase[elem];
                }
 
-               dev_kfree_skb_irq(skb);
+               dev_consume_skb_irq(skb);
                dev->stats.tx_packets++;
        }
        hp->tx_old = elem;
index dc966dd..b24c111 100644 (file)
@@ -1739,7 +1739,7 @@ static void bdx_tx_cleanup(struct bdx_priv *priv)
                tx_level -= db->rptr->len;      /* '-' koz len is negative */
 
                /* now should come skb pointer - free it */
-               dev_kfree_skb_irq(db->rptr->addr.skb);
+               dev_consume_skb_irq(db->rptr->addr.skb);
                bdx_tx_db_inc_rptr(db);
        }
 
index 8241269..27f6cf1 100644 (file)
@@ -1740,7 +1740,7 @@ static void velocity_free_tx_buf(struct velocity_info *vptr,
                dma_unmap_single(vptr->dev, tdinfo->skb_dma[i],
                                 le16_to_cpu(pktlen), DMA_TO_DEVICE);
        }
-       dev_kfree_skb_irq(skb);
+       dev_consume_skb_irq(skb);
        tdinfo->skb = NULL;
 }
 
index 15bb058..44efffb 100644 (file)
@@ -630,7 +630,7 @@ static void temac_start_xmit_done(struct net_device *ndev)
                dma_unmap_single(ndev->dev.parent, cur_p->phys, cur_p->len,
                                 DMA_TO_DEVICE);
                if (cur_p->app4)
-                       dev_kfree_skb_irq((struct sk_buff *)cur_p->app4);
+                       dev_consume_skb_irq((struct sk_buff *)cur_p->app4);
                cur_p->app0 = 0;
                cur_p->app1 = 0;
                cur_p->app2 = 0;
index 0789d8a..ec7e7ec 100644 (file)
@@ -595,7 +595,7 @@ static void axienet_start_xmit_done(struct net_device *ndev)
                                (cur_p->cntrl & XAXIDMA_BD_CTRL_LENGTH_MASK),
                                DMA_TO_DEVICE);
                if (cur_p->app4)
-                       dev_kfree_skb_irq((struct sk_buff *)cur_p->app4);
+                       dev_consume_skb_irq((struct sk_buff *)cur_p->app4);
                /*cur_p->phys = 0;*/
                cur_p->app0 = 0;
                cur_p->app1 = 0;
index 639e3e9..b03a417 100644 (file)
@@ -581,7 +581,7 @@ static void xemaclite_tx_handler(struct net_device *dev)
                return;
 
        dev->stats.tx_bytes += lp->deferred_skb->len;
-       dev_kfree_skb_irq(lp->deferred_skb);
+       dev_consume_skb_irq(lp->deferred_skb);
        lp->deferred_skb = NULL;
        netif_trans_update(dev); /* prevent tx timeout */
        netif_wake_queue(dev);
index aee55c0..ed6623a 100644 (file)
 #ifdef __ARMEB__
 typedef struct sk_buff buffer_t;
 #define free_buffer dev_kfree_skb
-#define free_buffer_irq dev_kfree_skb_irq
+#define free_buffer_irq dev_consume_skb_irq
 #else
 typedef void buffer_t;
 #define free_buffer kfree
index 38ac8ef..56b7791 100644 (file)
@@ -3512,7 +3512,7 @@ static int dfx_xmt_done(DFX_board_t *bp)
                                 bp->descr_block_virt->xmt_data[comp].long_1,
                                 p_xmt_drv_descr->p_skb->len,
                                 DMA_TO_DEVICE);
-               dev_kfree_skb_irq(p_xmt_drv_descr->p_skb);
+               dev_consume_skb_irq(p_xmt_drv_descr->p_skb);
 
                /*
                 * Move to start of next packet by updating completion index
index 6ef44c4..f29f5a6 100644 (file)
@@ -851,6 +851,7 @@ static void pcm_fsm(struct s_smc *smc, struct s_phy *phy, int cmd)
 
        case ACTIONS(PC5_SIGNAL) :
                ACTIONS_DONE() ;
+               /* fall through */
        case PC5_SIGNAL :
                if ((cmd != PC_SIGNAL) && (cmd != PC_TIMEOUT_LCT))
                        break ;
index 58bbba8..3377ac6 100644 (file)
@@ -1512,9 +1512,13 @@ static void geneve_link_config(struct net_device *dev,
        }
 #if IS_ENABLED(CONFIG_IPV6)
        case AF_INET6: {
-               struct rt6_info *rt = rt6_lookup(geneve->net,
-                                                &info->key.u.ipv6.dst, NULL, 0,
-                                                NULL, 0);
+               struct rt6_info *rt;
+
+               if (!__in6_dev_get(dev))
+                       break;
+
+               rt = rt6_lookup(geneve->net, &info->key.u.ipv6.dst, NULL, 0,
+                               NULL, 0);
 
                if (rt && rt->dst.dev)
                        ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV6_HLEN;
index 44de81e..c589f5a 100644 (file)
@@ -905,9 +905,9 @@ mcr20a_irq_clean_complete(void *context)
                }
                break;
        case (DAR_IRQSTS1_RXIRQ | DAR_IRQSTS1_SEQIRQ):
-                       /* rx is starting */
-                       dev_dbg(printdev(lp), "RX is starting\n");
-                       mcr20a_handle_rx(lp);
+               /* rx is starting */
+               dev_dbg(printdev(lp), "RX is starting\n");
+               mcr20a_handle_rx(lp);
                break;
        case (DAR_IRQSTS1_RXIRQ | DAR_IRQSTS1_TXIRQ | DAR_IRQSTS1_SEQIRQ):
                if (lp->is_tx) {
index 8a2c64d..3ee9536 100644 (file)
@@ -5,4 +5,5 @@
 obj-$(CONFIG_IPVLAN) += ipvlan.o
 obj-$(CONFIG_IPVTAP) += ipvtap.o
 
-ipvlan-objs := ipvlan_core.o ipvlan_main.o
+ipvlan-objs-$(CONFIG_IPVLAN_L3S) += ipvlan_l3s.o
+ipvlan-objs := ipvlan_core.o ipvlan_main.o $(ipvlan-objs-y)
index adb826f..b906d2f 100644 (file)
@@ -165,10 +165,9 @@ struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan,
                                   const void *iaddr, bool is_v6);
 bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6);
 void ipvlan_ht_addr_del(struct ipvl_addr *addr);
-struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb,
-                             u16 proto);
-unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb,
-                            const struct nf_hook_state *state);
+struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port, void *lyr3h,
+                                    int addr_type, bool use_dest);
+void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, int *type);
 void ipvlan_count_rx(const struct ipvl_dev *ipvlan,
                     unsigned int len, bool success, bool mcast);
 int ipvlan_link_new(struct net *src_net, struct net_device *dev,
@@ -177,6 +176,36 @@ int ipvlan_link_new(struct net *src_net, struct net_device *dev,
 void ipvlan_link_delete(struct net_device *dev, struct list_head *head);
 void ipvlan_link_setup(struct net_device *dev);
 int ipvlan_link_register(struct rtnl_link_ops *ops);
+#ifdef CONFIG_IPVLAN_L3S
+int ipvlan_l3s_register(struct ipvl_port *port);
+void ipvlan_l3s_unregister(struct ipvl_port *port);
+void ipvlan_migrate_l3s_hook(struct net *oldnet, struct net *newnet);
+int ipvlan_l3s_init(void);
+void ipvlan_l3s_cleanup(void);
+#else
+static inline int ipvlan_l3s_register(struct ipvl_port *port)
+{
+       return -ENOTSUPP;
+}
+
+static inline void ipvlan_l3s_unregister(struct ipvl_port *port)
+{
+}
+
+static inline void ipvlan_migrate_l3s_hook(struct net *oldnet,
+                                          struct net *newnet)
+{
+}
+
+static inline int ipvlan_l3s_init(void)
+{
+       return 0;
+}
+
+static inline void ipvlan_l3s_cleanup(void)
+{
+}
+#endif /* CONFIG_IPVLAN_L3S */
 
 static inline bool netif_is_ipvlan_port(const struct net_device *dev)
 {
index 1a8132e..e0f5bc8 100644 (file)
@@ -138,7 +138,7 @@ bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6)
        return ret;
 }
 
-static void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, int *type)
+void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, int *type)
 {
        void *lyr3h = NULL;
 
@@ -355,9 +355,8 @@ out:
        return ret;
 }
 
-static struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port,
-                                           void *lyr3h, int addr_type,
-                                           bool use_dest)
+struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port, void *lyr3h,
+                                    int addr_type, bool use_dest)
 {
        struct ipvl_addr *addr = NULL;
 
@@ -647,7 +646,9 @@ int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev)
        case IPVLAN_MODE_L2:
                return ipvlan_xmit_mode_l2(skb, dev);
        case IPVLAN_MODE_L3:
+#ifdef CONFIG_IPVLAN_L3S
        case IPVLAN_MODE_L3S:
+#endif
                return ipvlan_xmit_mode_l3(skb, dev);
        }
 
@@ -743,8 +744,10 @@ rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb)
                return ipvlan_handle_mode_l2(pskb, port);
        case IPVLAN_MODE_L3:
                return ipvlan_handle_mode_l3(pskb, port);
+#ifdef CONFIG_IPVLAN_L3S
        case IPVLAN_MODE_L3S:
                return RX_HANDLER_PASS;
+#endif
        }
 
        /* Should not reach here */
@@ -753,97 +756,3 @@ rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb)
        kfree_skb(skb);
        return RX_HANDLER_CONSUMED;
 }
-
-static struct ipvl_addr *ipvlan_skb_to_addr(struct sk_buff *skb,
-                                           struct net_device *dev)
-{
-       struct ipvl_addr *addr = NULL;
-       struct ipvl_port *port;
-       void *lyr3h;
-       int addr_type;
-
-       if (!dev || !netif_is_ipvlan_port(dev))
-               goto out;
-
-       port = ipvlan_port_get_rcu(dev);
-       if (!port || port->mode != IPVLAN_MODE_L3S)
-               goto out;
-
-       lyr3h = ipvlan_get_L3_hdr(port, skb, &addr_type);
-       if (!lyr3h)
-               goto out;
-
-       addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true);
-out:
-       return addr;
-}
-
-struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb,
-                             u16 proto)
-{
-       struct ipvl_addr *addr;
-       struct net_device *sdev;
-
-       addr = ipvlan_skb_to_addr(skb, dev);
-       if (!addr)
-               goto out;
-
-       sdev = addr->master->dev;
-       switch (proto) {
-       case AF_INET:
-       {
-               int err;
-               struct iphdr *ip4h = ip_hdr(skb);
-
-               err = ip_route_input_noref(skb, ip4h->daddr, ip4h->saddr,
-                                          ip4h->tos, sdev);
-               if (unlikely(err))
-                       goto out;
-               break;
-       }
-#if IS_ENABLED(CONFIG_IPV6)
-       case AF_INET6:
-       {
-               struct dst_entry *dst;
-               struct ipv6hdr *ip6h = ipv6_hdr(skb);
-               int flags = RT6_LOOKUP_F_HAS_SADDR;
-               struct flowi6 fl6 = {
-                       .flowi6_iif   = sdev->ifindex,
-                       .daddr        = ip6h->daddr,
-                       .saddr        = ip6h->saddr,
-                       .flowlabel    = ip6_flowinfo(ip6h),
-                       .flowi6_mark  = skb->mark,
-                       .flowi6_proto = ip6h->nexthdr,
-               };
-
-               skb_dst_drop(skb);
-               dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6,
-                                            skb, flags);
-               skb_dst_set(skb, dst);
-               break;
-       }
-#endif
-       default:
-               break;
-       }
-
-out:
-       return skb;
-}
-
-unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb,
-                            const struct nf_hook_state *state)
-{
-       struct ipvl_addr *addr;
-       unsigned int len;
-
-       addr = ipvlan_skb_to_addr(skb, skb->dev);
-       if (!addr)
-               goto out;
-
-       skb->dev = addr->master->dev;
-       len = skb->len + ETH_HLEN;
-       ipvlan_count_rx(addr->master, len, true, false);
-out:
-       return NF_ACCEPT;
-}
diff --git a/drivers/net/ipvlan/ipvlan_l3s.c b/drivers/net/ipvlan/ipvlan_l3s.c
new file mode 100644 (file)
index 0000000..d17480a
--- /dev/null
@@ -0,0 +1,227 @@
+/* Copyright (c) 2014 Mahesh Bandewar <maheshb@google.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ */
+
+#include "ipvlan.h"
+
+static unsigned int ipvlan_netid __read_mostly;
+
+struct ipvlan_netns {
+       unsigned int ipvl_nf_hook_refcnt;
+};
+
+static struct ipvl_addr *ipvlan_skb_to_addr(struct sk_buff *skb,
+                                           struct net_device *dev)
+{
+       struct ipvl_addr *addr = NULL;
+       struct ipvl_port *port;
+       int addr_type;
+       void *lyr3h;
+
+       if (!dev || !netif_is_ipvlan_port(dev))
+               goto out;
+
+       port = ipvlan_port_get_rcu(dev);
+       if (!port || port->mode != IPVLAN_MODE_L3S)
+               goto out;
+
+       lyr3h = ipvlan_get_L3_hdr(port, skb, &addr_type);
+       if (!lyr3h)
+               goto out;
+
+       addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true);
+out:
+       return addr;
+}
+
+static struct sk_buff *ipvlan_l3_rcv(struct net_device *dev,
+                                    struct sk_buff *skb, u16 proto)
+{
+       struct ipvl_addr *addr;
+       struct net_device *sdev;
+
+       addr = ipvlan_skb_to_addr(skb, dev);
+       if (!addr)
+               goto out;
+
+       sdev = addr->master->dev;
+       switch (proto) {
+       case AF_INET:
+       {
+               struct iphdr *ip4h = ip_hdr(skb);
+               int err;
+
+               err = ip_route_input_noref(skb, ip4h->daddr, ip4h->saddr,
+                                          ip4h->tos, sdev);
+               if (unlikely(err))
+                       goto out;
+               break;
+       }
+#if IS_ENABLED(CONFIG_IPV6)
+       case AF_INET6:
+       {
+               struct dst_entry *dst;
+               struct ipv6hdr *ip6h = ipv6_hdr(skb);
+               int flags = RT6_LOOKUP_F_HAS_SADDR;
+               struct flowi6 fl6 = {
+                       .flowi6_iif   = sdev->ifindex,
+                       .daddr        = ip6h->daddr,
+                       .saddr        = ip6h->saddr,
+                       .flowlabel    = ip6_flowinfo(ip6h),
+                       .flowi6_mark  = skb->mark,
+                       .flowi6_proto = ip6h->nexthdr,
+               };
+
+               skb_dst_drop(skb);
+               dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6,
+                                            skb, flags);
+               skb_dst_set(skb, dst);
+               break;
+       }
+#endif
+       default:
+               break;
+       }
+out:
+       return skb;
+}
+
+static const struct l3mdev_ops ipvl_l3mdev_ops = {
+       .l3mdev_l3_rcv = ipvlan_l3_rcv,
+};
+
+static unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb,
+                                   const struct nf_hook_state *state)
+{
+       struct ipvl_addr *addr;
+       unsigned int len;
+
+       addr = ipvlan_skb_to_addr(skb, skb->dev);
+       if (!addr)
+               goto out;
+
+       skb->dev = addr->master->dev;
+       len = skb->len + ETH_HLEN;
+       ipvlan_count_rx(addr->master, len, true, false);
+out:
+       return NF_ACCEPT;
+}
+
+static const struct nf_hook_ops ipvl_nfops[] = {
+       {
+               .hook     = ipvlan_nf_input,
+               .pf       = NFPROTO_IPV4,
+               .hooknum  = NF_INET_LOCAL_IN,
+               .priority = INT_MAX,
+       },
+#if IS_ENABLED(CONFIG_IPV6)
+       {
+               .hook     = ipvlan_nf_input,
+               .pf       = NFPROTO_IPV6,
+               .hooknum  = NF_INET_LOCAL_IN,
+               .priority = INT_MAX,
+       },
+#endif
+};
+
+static int ipvlan_register_nf_hook(struct net *net)
+{
+       struct ipvlan_netns *vnet = net_generic(net, ipvlan_netid);
+       int err = 0;
+
+       if (!vnet->ipvl_nf_hook_refcnt) {
+               err = nf_register_net_hooks(net, ipvl_nfops,
+                                           ARRAY_SIZE(ipvl_nfops));
+               if (!err)
+                       vnet->ipvl_nf_hook_refcnt = 1;
+       } else {
+               vnet->ipvl_nf_hook_refcnt++;
+       }
+
+       return err;
+}
+
+static void ipvlan_unregister_nf_hook(struct net *net)
+{
+       struct ipvlan_netns *vnet = net_generic(net, ipvlan_netid);
+
+       if (WARN_ON(!vnet->ipvl_nf_hook_refcnt))
+               return;
+
+       vnet->ipvl_nf_hook_refcnt--;
+       if (!vnet->ipvl_nf_hook_refcnt)
+               nf_unregister_net_hooks(net, ipvl_nfops,
+                                       ARRAY_SIZE(ipvl_nfops));
+}
+
+void ipvlan_migrate_l3s_hook(struct net *oldnet, struct net *newnet)
+{
+       struct ipvlan_netns *old_vnet;
+
+       ASSERT_RTNL();
+
+       old_vnet = net_generic(oldnet, ipvlan_netid);
+       if (!old_vnet->ipvl_nf_hook_refcnt)
+               return;
+
+       ipvlan_register_nf_hook(newnet);
+       ipvlan_unregister_nf_hook(oldnet);
+}
+
+static void ipvlan_ns_exit(struct net *net)
+{
+       struct ipvlan_netns *vnet = net_generic(net, ipvlan_netid);
+
+       if (WARN_ON_ONCE(vnet->ipvl_nf_hook_refcnt)) {
+               vnet->ipvl_nf_hook_refcnt = 0;
+               nf_unregister_net_hooks(net, ipvl_nfops,
+                                       ARRAY_SIZE(ipvl_nfops));
+       }
+}
+
+static struct pernet_operations ipvlan_net_ops = {
+       .id   = &ipvlan_netid,
+       .size = sizeof(struct ipvlan_netns),
+       .exit = ipvlan_ns_exit,
+};
+
+int ipvlan_l3s_init(void)
+{
+       return register_pernet_subsys(&ipvlan_net_ops);
+}
+
+void ipvlan_l3s_cleanup(void)
+{
+       unregister_pernet_subsys(&ipvlan_net_ops);
+}
+
+int ipvlan_l3s_register(struct ipvl_port *port)
+{
+       struct net_device *dev = port->dev;
+       int ret;
+
+       ASSERT_RTNL();
+
+       ret = ipvlan_register_nf_hook(read_pnet(&port->pnet));
+       if (!ret) {
+               dev->l3mdev_ops = &ipvl_l3mdev_ops;
+               dev->priv_flags |= IFF_L3MDEV_RX_HANDLER;
+       }
+
+       return ret;
+}
+
+void ipvlan_l3s_unregister(struct ipvl_port *port)
+{
+       struct net_device *dev = port->dev;
+
+       ASSERT_RTNL();
+
+       dev->priv_flags &= ~IFF_L3MDEV_RX_HANDLER;
+       ipvlan_unregister_nf_hook(read_pnet(&port->pnet));
+       dev->l3mdev_ops = NULL;
+}
index 19bdde6..8ec73d9 100644 (file)
@@ -9,73 +9,10 @@
 
 #include "ipvlan.h"
 
-static unsigned int ipvlan_netid __read_mostly;
-
-struct ipvlan_netns {
-       unsigned int ipvl_nf_hook_refcnt;
-};
-
-static const struct nf_hook_ops ipvl_nfops[] = {
-       {
-               .hook     = ipvlan_nf_input,
-               .pf       = NFPROTO_IPV4,
-               .hooknum  = NF_INET_LOCAL_IN,
-               .priority = INT_MAX,
-       },
-#if IS_ENABLED(CONFIG_IPV6)
-       {
-               .hook     = ipvlan_nf_input,
-               .pf       = NFPROTO_IPV6,
-               .hooknum  = NF_INET_LOCAL_IN,
-               .priority = INT_MAX,
-       },
-#endif
-};
-
-static const struct l3mdev_ops ipvl_l3mdev_ops = {
-       .l3mdev_l3_rcv = ipvlan_l3_rcv,
-};
-
-static void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev)
-{
-       ipvlan->dev->mtu = dev->mtu;
-}
-
-static int ipvlan_register_nf_hook(struct net *net)
-{
-       struct ipvlan_netns *vnet = net_generic(net, ipvlan_netid);
-       int err = 0;
-
-       if (!vnet->ipvl_nf_hook_refcnt) {
-               err = nf_register_net_hooks(net, ipvl_nfops,
-                                           ARRAY_SIZE(ipvl_nfops));
-               if (!err)
-                       vnet->ipvl_nf_hook_refcnt = 1;
-       } else {
-               vnet->ipvl_nf_hook_refcnt++;
-       }
-
-       return err;
-}
-
-static void ipvlan_unregister_nf_hook(struct net *net)
-{
-       struct ipvlan_netns *vnet = net_generic(net, ipvlan_netid);
-
-       if (WARN_ON(!vnet->ipvl_nf_hook_refcnt))
-               return;
-
-       vnet->ipvl_nf_hook_refcnt--;
-       if (!vnet->ipvl_nf_hook_refcnt)
-               nf_unregister_net_hooks(net, ipvl_nfops,
-                                       ARRAY_SIZE(ipvl_nfops));
-}
-
 static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval,
                                struct netlink_ext_ack *extack)
 {
        struct ipvl_dev *ipvlan;
-       struct net_device *mdev = port->dev;
        unsigned int flags;
        int err;
 
@@ -97,17 +34,12 @@ static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval,
                }
                if (nval == IPVLAN_MODE_L3S) {
                        /* New mode is L3S */
-                       err = ipvlan_register_nf_hook(read_pnet(&port->pnet));
-                       if (!err) {
-                               mdev->l3mdev_ops = &ipvl_l3mdev_ops;
-                               mdev->priv_flags |= IFF_L3MDEV_MASTER;
-                       } else
+                       err = ipvlan_l3s_register(port);
+                       if (err)
                                goto fail;
                } else if (port->mode == IPVLAN_MODE_L3S) {
                        /* Old mode was L3S */
-                       mdev->priv_flags &= ~IFF_L3MDEV_MASTER;
-                       ipvlan_unregister_nf_hook(read_pnet(&port->pnet));
-                       mdev->l3mdev_ops = NULL;
+                       ipvlan_l3s_unregister(port);
                }
                port->mode = nval;
        }
@@ -166,11 +98,8 @@ static void ipvlan_port_destroy(struct net_device *dev)
        struct ipvl_port *port = ipvlan_port_get_rtnl(dev);
        struct sk_buff *skb;
 
-       if (port->mode == IPVLAN_MODE_L3S) {
-               dev->priv_flags &= ~IFF_L3MDEV_MASTER;
-               ipvlan_unregister_nf_hook(dev_net(dev));
-               dev->l3mdev_ops = NULL;
-       }
+       if (port->mode == IPVLAN_MODE_L3S)
+               ipvlan_l3s_unregister(port);
        netdev_rx_handler_unregister(dev);
        cancel_work_sync(&port->wq);
        while ((skb = __skb_dequeue(&port->backlog)) != NULL) {
@@ -446,6 +375,11 @@ static const struct header_ops ipvlan_header_ops = {
        .cache_update   = eth_header_cache_update,
 };
 
+static void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev)
+{
+       ipvlan->dev->mtu = dev->mtu;
+}
+
 static bool netif_is_ipvlan(const struct net_device *dev)
 {
        /* both ipvlan and ipvtap devices use the same netdev_ops */
@@ -781,7 +715,6 @@ static int ipvlan_device_event(struct notifier_block *unused,
 
        case NETDEV_REGISTER: {
                struct net *oldnet, *newnet = dev_net(dev);
-               struct ipvlan_netns *old_vnet;
 
                oldnet = read_pnet(&port->pnet);
                if (net_eq(newnet, oldnet))
@@ -789,12 +722,7 @@ static int ipvlan_device_event(struct notifier_block *unused,
 
                write_pnet(&port->pnet, newnet);
 
-               old_vnet = net_generic(oldnet, ipvlan_netid);
-               if (!old_vnet->ipvl_nf_hook_refcnt)
-                       break;
-
-               ipvlan_register_nf_hook(newnet);
-               ipvlan_unregister_nf_hook(oldnet);
+               ipvlan_migrate_l3s_hook(oldnet, newnet);
                break;
        }
        case NETDEV_UNREGISTER:
@@ -1068,23 +996,6 @@ static struct notifier_block ipvlan_addr6_vtor_notifier_block __read_mostly = {
 };
 #endif
 
-static void ipvlan_ns_exit(struct net *net)
-{
-       struct ipvlan_netns *vnet = net_generic(net, ipvlan_netid);
-
-       if (WARN_ON_ONCE(vnet->ipvl_nf_hook_refcnt)) {
-               vnet->ipvl_nf_hook_refcnt = 0;
-               nf_unregister_net_hooks(net, ipvl_nfops,
-                                       ARRAY_SIZE(ipvl_nfops));
-       }
-}
-
-static struct pernet_operations ipvlan_net_ops = {
-       .id = &ipvlan_netid,
-       .size = sizeof(struct ipvlan_netns),
-       .exit = ipvlan_ns_exit,
-};
-
 static int __init ipvlan_init_module(void)
 {
        int err;
@@ -1099,13 +1010,13 @@ static int __init ipvlan_init_module(void)
        register_inetaddr_notifier(&ipvlan_addr4_notifier_block);
        register_inetaddr_validator_notifier(&ipvlan_addr4_vtor_notifier_block);
 
-       err = register_pernet_subsys(&ipvlan_net_ops);
+       err = ipvlan_l3s_init();
        if (err < 0)
                goto error;
 
        err = ipvlan_link_register(&ipvlan_link_ops);
        if (err < 0) {
-               unregister_pernet_subsys(&ipvlan_net_ops);
+               ipvlan_l3s_cleanup();
                goto error;
        }
 
@@ -1126,7 +1037,7 @@ error:
 static void __exit ipvlan_cleanup_module(void)
 {
        rtnl_link_unregister(&ipvlan_link_ops);
-       unregister_pernet_subsys(&ipvlan_net_ops);
+       ipvlan_l3s_cleanup();
        unregister_netdevice_notifier(&ipvlan_notifier_block);
        unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block);
        unregister_inetaddr_validator_notifier(
index 482004e..0f772a4 100644 (file)
@@ -17,6 +17,7 @@
 #define PHY_ID_AQR105  0x03a1b4a2
 #define PHY_ID_AQR106  0x03a1b4d0
 #define PHY_ID_AQR107  0x03a1b4e0
+#define PHY_ID_AQCS109 0x03a1b5c2
 #define PHY_ID_AQR405  0x03a1b4b0
 
 #define MDIO_AN_TX_VEND_STATUS1                        0xc800
@@ -203,6 +204,16 @@ static struct phy_driver aqr_driver[] = {
        .read_status    = aqr_read_status,
 },
 {
+       PHY_ID_MATCH_MODEL(PHY_ID_AQCS109),
+       .name           = "Aquantia AQCS109",
+       .features       = PHY_10GBIT_FULL_FEATURES,
+       .aneg_done      = genphy_c45_aneg_done,
+       .config_aneg    = aqr_config_aneg,
+       .config_intr    = aqr_config_intr,
+       .ack_interrupt  = aqr_ack_interrupt,
+       .read_status    = aqr_read_status,
+},
+{
        PHY_ID_MATCH_MODEL(PHY_ID_AQR405),
        .name           = "Aquantia AQR405",
        .features       = PHY_10GBIT_FULL_FEATURES,
@@ -222,6 +233,7 @@ static struct mdio_device_id __maybe_unused aqr_tbl[] = {
        { PHY_ID_MATCH_MODEL(PHY_ID_AQR105) },
        { PHY_ID_MATCH_MODEL(PHY_ID_AQR106) },
        { PHY_ID_MATCH_MODEL(PHY_ID_AQR107) },
+       { PHY_ID_MATCH_MODEL(PHY_ID_AQCS109) },
        { PHY_ID_MATCH_MODEL(PHY_ID_AQR405) },
        { }
 };
index 25ef483..2fe2eba 100644 (file)
@@ -885,14 +885,14 @@ static void decode_txts(struct dp83640_private *dp83640,
                        struct phy_txts *phy_txts)
 {
        struct skb_shared_hwtstamps shhwtstamps;
+       struct dp83640_skb_info *skb_info;
        struct sk_buff *skb;
-       u64 ns;
        u8 overflow;
+       u64 ns;
 
        /* We must already have the skb that triggered this. */
-
+again:
        skb = skb_dequeue(&dp83640->tx_queue);
-
        if (!skb) {
                pr_debug("have timestamp but tx_queue empty\n");
                return;
@@ -907,6 +907,11 @@ static void decode_txts(struct dp83640_private *dp83640,
                }
                return;
        }
+       skb_info = (struct dp83640_skb_info *)skb->cb;
+       if (time_after(jiffies, skb_info->tmo)) {
+               kfree_skb(skb);
+               goto again;
+       }
 
        ns = phy2txts(phy_txts);
        memset(&shhwtstamps, 0, sizeof(shhwtstamps));
@@ -1459,6 +1464,7 @@ static bool dp83640_rxtstamp(struct phy_device *phydev,
 static void dp83640_txtstamp(struct phy_device *phydev,
                             struct sk_buff *skb, int type)
 {
+       struct dp83640_skb_info *skb_info = (struct dp83640_skb_info *)skb->cb;
        struct dp83640_private *dp83640 = phydev->priv;
 
        switch (dp83640->hwts_tx_en) {
@@ -1471,6 +1477,7 @@ static void dp83640_txtstamp(struct phy_device *phydev,
                /* fall through */
        case HWTSTAMP_TX_ON:
                skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+               skb_info->tmo = jiffies + SKB_TIMESTAMP_TIMEOUT;
                skb_queue_tail(&dp83640->tx_queue, skb);
                break;
 
index 90f44ba..3ccba37 100644 (file)
@@ -842,7 +842,6 @@ static int m88e1510_config_init(struct phy_device *phydev)
 
        /* SGMII-to-Copper mode initialization */
        if (phydev->interface == PHY_INTERFACE_MODE_SGMII) {
-
                /* Select page 18 */
                err = marvell_set_page(phydev, 18);
                if (err < 0)
@@ -865,21 +864,6 @@ static int m88e1510_config_init(struct phy_device *phydev)
                err = marvell_set_page(phydev, MII_MARVELL_COPPER_PAGE);
                if (err < 0)
                        return err;
-
-               /* There appears to be a bug in the 88e1512 when used in
-                * SGMII to copper mode, where the AN advertisement register
-                * clears the pause bits each time a negotiation occurs.
-                * This means we can never be truely sure what was advertised,
-                * so disable Pause support.
-                */
-               linkmode_clear_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT,
-                                  phydev->supported);
-               linkmode_clear_bit(ETHTOOL_LINK_MODE_Pause_BIT,
-                                  phydev->supported);
-               linkmode_clear_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT,
-                                  phydev->advertising);
-               linkmode_clear_bit(ETHTOOL_LINK_MODE_Pause_BIT,
-                                  phydev->advertising);
        }
 
        return m88e1318_config_init(phydev);
index 296a537..496805c 100644 (file)
@@ -141,10 +141,9 @@ static int mv3310_hwmon_config(struct phy_device *phydev, bool enable)
                return ret;
 
        val = enable ? MV_V2_TEMP_CTRL_SAMPLE : MV_V2_TEMP_CTRL_DISABLE;
-       ret = phy_modify_mmd(phydev, MDIO_MMD_VEND2, MV_V2_TEMP_CTRL,
-                            MV_V2_TEMP_CTRL_MASK, val);
 
-       return ret < 0 ? ret : 0;
+       return phy_modify_mmd(phydev, MDIO_MMD_VEND2, MV_V2_TEMP_CTRL,
+                             MV_V2_TEMP_CTRL_MASK, val);
 }
 
 static void mv3310_hwmon_disable(void *data)
@@ -234,8 +233,7 @@ static int mv3310_resume(struct phy_device *phydev)
 
 static int mv3310_config_init(struct phy_device *phydev)
 {
-       __ETHTOOL_DECLARE_LINK_MODE_MASK(supported) = { 0, };
-       int val;
+       int ret, val;
 
        /* Check that the PHY interface type is compatible */
        if (phydev->interface != PHY_INTERFACE_MODE_SGMII &&
@@ -244,8 +242,8 @@ static int mv3310_config_init(struct phy_device *phydev)
            phydev->interface != PHY_INTERFACE_MODE_10GKR)
                return -ENODEV;
 
-       __set_bit(ETHTOOL_LINK_MODE_Pause_BIT, supported);
-       __set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, supported);
+       __set_bit(ETHTOOL_LINK_MODE_Pause_BIT, phydev->supported);
+       __set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, phydev->supported);
 
        if (phydev->c45_ids.devices_in_package & MDIO_DEVS_AN) {
                val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_STAT1);
@@ -253,74 +251,13 @@ static int mv3310_config_init(struct phy_device *phydev)
                        return val;
 
                if (val & MDIO_AN_STAT1_ABLE)
-                       __set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, supported);
+                       __set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT,
+                                 phydev->supported);
        }
 
-       val = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_STAT2);
-       if (val < 0)
-               return val;
-
-       /* Ethtool does not support the WAN mode bits */
-       if (val & (MDIO_PMA_STAT2_10GBSR | MDIO_PMA_STAT2_10GBLR |
-                  MDIO_PMA_STAT2_10GBER | MDIO_PMA_STAT2_10GBLX4 |
-                  MDIO_PMA_STAT2_10GBSW | MDIO_PMA_STAT2_10GBLW |
-                  MDIO_PMA_STAT2_10GBEW))
-               __set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, supported);
-       if (val & MDIO_PMA_STAT2_10GBSR)
-               __set_bit(ETHTOOL_LINK_MODE_10000baseSR_Full_BIT, supported);
-       if (val & MDIO_PMA_STAT2_10GBLR)
-               __set_bit(ETHTOOL_LINK_MODE_10000baseLR_Full_BIT, supported);
-       if (val & MDIO_PMA_STAT2_10GBER)
-               __set_bit(ETHTOOL_LINK_MODE_10000baseER_Full_BIT, supported);
-
-       if (val & MDIO_PMA_STAT2_EXTABLE) {
-               val = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_PMA_EXTABLE);
-               if (val < 0)
-                       return val;
-
-               if (val & (MDIO_PMA_EXTABLE_10GBT | MDIO_PMA_EXTABLE_1000BT |
-                          MDIO_PMA_EXTABLE_100BTX | MDIO_PMA_EXTABLE_10BT))
-                       __set_bit(ETHTOOL_LINK_MODE_TP_BIT, supported);
-               if (val & MDIO_PMA_EXTABLE_10GBLRM)
-                       __set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, supported);
-               if (val & (MDIO_PMA_EXTABLE_10GBKX4 | MDIO_PMA_EXTABLE_10GBKR |
-                          MDIO_PMA_EXTABLE_1000BKX))
-                       __set_bit(ETHTOOL_LINK_MODE_Backplane_BIT, supported);
-               if (val & MDIO_PMA_EXTABLE_10GBLRM)
-                       __set_bit(ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT,
-                                 supported);
-               if (val & MDIO_PMA_EXTABLE_10GBT)
-                       __set_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT,
-                                 supported);
-               if (val & MDIO_PMA_EXTABLE_10GBKX4)
-                       __set_bit(ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT,
-                                 supported);
-               if (val & MDIO_PMA_EXTABLE_10GBKR)
-                       __set_bit(ETHTOOL_LINK_MODE_10000baseKR_Full_BIT,
-                                 supported);
-               if (val & MDIO_PMA_EXTABLE_1000BT)
-                       __set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
-                                 supported);
-               if (val & MDIO_PMA_EXTABLE_1000BKX)
-                       __set_bit(ETHTOOL_LINK_MODE_1000baseKX_Full_BIT,
-                                 supported);
-               if (val & MDIO_PMA_EXTABLE_100BTX) {
-                       __set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT,
-                                 supported);
-                       __set_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT,
-                                 supported);
-               }
-               if (val & MDIO_PMA_EXTABLE_10BT) {
-                       __set_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT,
-                                 supported);
-                       __set_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT,
-                                 supported);
-               }
-       }
-
-       linkmode_copy(phydev->supported, supported);
-       linkmode_and(phydev->advertising, phydev->advertising,
-                    phydev->supported);
+       ret = genphy_c45_pma_read_abilities(phydev);
+       if (ret)
+               return ret;
 
        return 0;
 }
@@ -345,7 +282,7 @@ static int mv3310_config_aneg(struct phy_device *phydev)
        linkmode_and(phydev->advertising, phydev->advertising,
                     phydev->supported);
 
-       ret = phy_modify_mmd(phydev, MDIO_MMD_AN, MDIO_AN_ADVERTISE,
+       ret = phy_modify_mmd_changed(phydev, MDIO_MMD_AN, MDIO_AN_ADVERTISE,
                             ADVERTISE_ALL | ADVERTISE_100BASE4 |
                             ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM,
                             linkmode_adv_to_mii_adv_t(phydev->advertising));
@@ -355,7 +292,7 @@ static int mv3310_config_aneg(struct phy_device *phydev)
                changed = true;
 
        reg = linkmode_adv_to_mii_ctrl1000_t(phydev->advertising);
-       ret = phy_modify_mmd(phydev, MDIO_MMD_AN, MV_AN_CTRL1000,
+       ret = phy_modify_mmd_changed(phydev, MDIO_MMD_AN, MV_AN_CTRL1000,
                             ADVERTISE_1000FULL | ADVERTISE_1000HALF, reg);
        if (ret < 0)
                return ret;
@@ -369,8 +306,8 @@ static int mv3310_config_aneg(struct phy_device *phydev)
        else
                reg = 0;
 
-       ret = phy_modify_mmd(phydev, MDIO_MMD_AN, MDIO_AN_10GBT_CTRL,
-                            MDIO_AN_10GBT_CTRL_ADV10G, reg);
+       ret = phy_modify_mmd_changed(phydev, MDIO_MMD_AN, MDIO_AN_10GBT_CTRL,
+                                    MDIO_AN_10GBT_CTRL_ADV10G, reg);
        if (ret < 0)
                return ret;
        if (ret > 0)
@@ -428,16 +365,8 @@ static int mv3310_read_10gbr_status(struct phy_device *phydev)
 
 static int mv3310_read_status(struct phy_device *phydev)
 {
-       u32 mmd_mask = phydev->c45_ids.devices_in_package;
        int val;
 
-       /* The vendor devads do not report link status.  Avoid the PHYXS
-        * instance as there are three, and its status depends on the MAC
-        * being appropriately configured for the negotiated speed.
-        */
-       mmd_mask &= ~(BIT(MDIO_MMD_VEND1) | BIT(MDIO_MMD_VEND2) |
-                     BIT(MDIO_MMD_PHYXS));
-
        phydev->speed = SPEED_UNKNOWN;
        phydev->duplex = DUPLEX_UNKNOWN;
        linkmode_zero(phydev->lp_advertising);
@@ -453,7 +382,7 @@ static int mv3310_read_status(struct phy_device *phydev)
        if (val & MDIO_STAT1_LSTATUS)
                return mv3310_read_10gbr_status(phydev);
 
-       val = genphy_c45_read_link(phydev, mmd_mask);
+       val = genphy_c45_read_link(phydev);
        if (val < 0)
                return val;
 
index c92d0fb..7af5fa8 100644 (file)
@@ -47,6 +47,16 @@ int genphy_c45_pma_setup_forced(struct phy_device *phydev)
                /* Assume 1000base-T */
                ctrl2 |= MDIO_PMA_CTRL2_1000BT;
                break;
+       case SPEED_2500:
+               ctrl1 |= MDIO_CTRL1_SPEED2_5G;
+               /* Assume 2.5Gbase-T */
+               ctrl2 |= MDIO_PMA_CTRL2_2_5GBT;
+               break;
+       case SPEED_5000:
+               ctrl1 |= MDIO_CTRL1_SPEED5G;
+               /* Assume 5Gbase-T */
+               ctrl2 |= MDIO_PMA_CTRL2_5GBT;
+               break;
        case SPEED_10000:
                ctrl1 |= MDIO_CTRL1_SPEED10G;
                /* Assume 10Gbase-T */
@@ -118,25 +128,40 @@ EXPORT_SYMBOL_GPL(genphy_c45_aneg_done);
 /**
  * genphy_c45_read_link - read the overall link status from the MMDs
  * @phydev: target phy_device struct
- * @mmd_mask: MMDs to read status from
  *
  * Read the link status from the specified MMDs, and if they all indicate
  * that the link is up, set phydev->link to 1.  If an error is encountered,
  * a negative errno will be returned, otherwise zero.
  */
-int genphy_c45_read_link(struct phy_device *phydev, u32 mmd_mask)
+int genphy_c45_read_link(struct phy_device *phydev)
 {
+       u32 mmd_mask = phydev->c45_ids.devices_in_package;
        int val, devad;
        bool link = true;
 
+       /* The vendor devads and C22EXT do not report link status. Avoid the
+        * PHYXS instance as its status may depend on the MAC being
+        * appropriately configured for the negotiated speed.
+        */
+       mmd_mask &= ~(MDIO_DEVS_VEND1 | MDIO_DEVS_VEND2 | MDIO_DEVS_C22EXT |
+                     MDIO_DEVS_PHYXS);
+
        while (mmd_mask && link) {
                devad = __ffs(mmd_mask);
                mmd_mask &= ~BIT(devad);
 
                /* The link state is latched low so that momentary link
-                * drops can be detected.  Do not double-read the status
-                * register if the link is down.
+                * drops can be detected. Do not double-read the status
+                * in polling mode to detect such short link drops.
                 */
+               if (!phy_polling_mode(phydev)) {
+                       val = phy_read_mmd(phydev, devad, MDIO_STAT1);
+                       if (val < 0)
+                               return val;
+                       else if (val & MDIO_STAT1_LSTATUS)
+                               continue;
+               }
+
                val = phy_read_mmd(phydev, devad, MDIO_STAT1);
                if (val < 0)
                        return val;
@@ -179,6 +204,12 @@ int genphy_c45_read_lpa(struct phy_device *phydev)
        if (val < 0)
                return val;
 
+       if (val & MDIO_AN_10GBT_STAT_LP2_5G)
+               linkmode_set_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT,
+                                phydev->lp_advertising);
+       if (val & MDIO_AN_10GBT_STAT_LP5G)
+               linkmode_set_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT,
+                                phydev->lp_advertising);
        if (val & MDIO_AN_10GBT_STAT_LP10G)
                linkmode_set_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT,
                                 phydev->lp_advertising);
@@ -209,6 +240,12 @@ int genphy_c45_read_pma(struct phy_device *phydev)
        case MDIO_PMA_CTRL1_SPEED1000:
                phydev->speed = SPEED_1000;
                break;
+       case MDIO_CTRL1_SPEED2_5G:
+               phydev->speed = SPEED_2500;
+               break;
+       case MDIO_CTRL1_SPEED5G:
+               phydev->speed = SPEED_5000;
+               break;
        case MDIO_CTRL1_SPEED10G:
                phydev->speed = SPEED_10000;
                break;
@@ -256,6 +293,95 @@ int genphy_c45_read_mdix(struct phy_device *phydev)
 }
 EXPORT_SYMBOL_GPL(genphy_c45_read_mdix);
 
+/**
+ * genphy_c45_pma_read_abilities - read supported link modes from PMA
+ * @phydev: target phy_device struct
+ *
+ * Read the supported link modes from the PMA Status 2 (1.8) register. If bit
+ * 1.8.9 is set, the list of supported modes is build using the values in the
+ * PMA Extended Abilities (1.11) register, indicating 1000BASET an 10G related
+ * modes. If bit 1.11.14 is set, then the list is also extended with the modes
+ * in the 2.5G/5G PMA Extended register (1.21), indicating if 2.5GBASET and
+ * 5GBASET are supported.
+ */
+int genphy_c45_pma_read_abilities(struct phy_device *phydev)
+{
+       int val;
+
+       val = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_STAT2);
+       if (val < 0)
+               return val;
+
+       linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseSR_Full_BIT,
+                        phydev->supported,
+                        val & MDIO_PMA_STAT2_10GBSR);
+
+       linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseLR_Full_BIT,
+                        phydev->supported,
+                        val & MDIO_PMA_STAT2_10GBLR);
+
+       linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseER_Full_BIT,
+                        phydev->supported,
+                        val & MDIO_PMA_STAT2_10GBER);
+
+       if (val & MDIO_PMA_STAT2_EXTABLE) {
+               val = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_PMA_EXTABLE);
+               if (val < 0)
+                       return val;
+
+               linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT,
+                                phydev->supported,
+                                val & MDIO_PMA_EXTABLE_10GBLRM);
+               linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT,
+                                phydev->supported,
+                                val & MDIO_PMA_EXTABLE_10GBT);
+               linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT,
+                                phydev->supported,
+                                val & MDIO_PMA_EXTABLE_10GBKX4);
+               linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseKR_Full_BIT,
+                                phydev->supported,
+                                val & MDIO_PMA_EXTABLE_10GBKR);
+               linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
+                                phydev->supported,
+                                val & MDIO_PMA_EXTABLE_1000BT);
+               linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseKX_Full_BIT,
+                                phydev->supported,
+                                val & MDIO_PMA_EXTABLE_1000BKX);
+
+               linkmode_mod_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT,
+                                phydev->supported,
+                                val & MDIO_PMA_EXTABLE_100BTX);
+               linkmode_mod_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT,
+                                phydev->supported,
+                                val & MDIO_PMA_EXTABLE_100BTX);
+
+               linkmode_mod_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT,
+                                phydev->supported,
+                                val & MDIO_PMA_EXTABLE_10BT);
+               linkmode_mod_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT,
+                                phydev->supported,
+                                val & MDIO_PMA_EXTABLE_10BT);
+
+               if (val & MDIO_PMA_EXTABLE_NBT) {
+                       val = phy_read_mmd(phydev, MDIO_MMD_PMAPMD,
+                                          MDIO_PMA_NG_EXTABLE);
+                       if (val < 0)
+                               return val;
+
+                       linkmode_mod_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT,
+                                        phydev->supported,
+                                        val & MDIO_PMA_NG_EXTABLE_2_5GBT);
+
+                       linkmode_mod_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT,
+                                        phydev->supported,
+                                        val & MDIO_PMA_NG_EXTABLE_5GBT);
+               }
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(genphy_c45_pma_read_abilities);
+
 /* The gen10g_* functions are the old Clause 45 stub */
 
 int gen10g_config_aneg(struct phy_device *phydev)
@@ -266,16 +392,11 @@ EXPORT_SYMBOL_GPL(gen10g_config_aneg);
 
 int gen10g_read_status(struct phy_device *phydev)
 {
-       u32 mmd_mask = phydev->c45_ids.devices_in_package;
-
        /* For now just lie and say it's 10G all the time */
        phydev->speed = SPEED_10000;
        phydev->duplex = DUPLEX_FULL;
 
-       /* Avoid reading the vendor MMDs */
-       mmd_mask &= ~(BIT(MDIO_MMD_VEND1) | BIT(MDIO_MMD_VEND2));
-
-       return genphy_c45_read_link(phydev, mmd_mask);
+       return genphy_c45_read_link(phydev);
 }
 EXPORT_SYMBOL_GPL(gen10g_read_status);
 
index 7d6aad2..de58a59 100644 (file)
@@ -4,6 +4,7 @@
  */
 #include <linux/export.h>
 #include <linux/phy.h>
+#include <linux/of.h>
 
 const char *phy_speed_to_str(int speed)
 {
@@ -338,6 +339,77 @@ size_t phy_speeds(unsigned int *speeds, size_t size,
        return count;
 }
 
+static int __set_phy_supported(struct phy_device *phydev, u32 max_speed)
+{
+       const struct phy_setting *p;
+       int i;
+
+       for (i = 0, p = settings; i < ARRAY_SIZE(settings); i++, p++) {
+               if (p->speed > max_speed)
+                       linkmode_clear_bit(p->bit, phydev->supported);
+               else
+                       break;
+       }
+
+       return 0;
+}
+
+int phy_set_max_speed(struct phy_device *phydev, u32 max_speed)
+{
+       int err;
+
+       err = __set_phy_supported(phydev, max_speed);
+       if (err)
+               return err;
+
+       linkmode_copy(phydev->advertising, phydev->supported);
+
+       return 0;
+}
+EXPORT_SYMBOL(phy_set_max_speed);
+
+void of_set_phy_supported(struct phy_device *phydev)
+{
+       struct device_node *node = phydev->mdio.dev.of_node;
+       u32 max_speed;
+
+       if (!IS_ENABLED(CONFIG_OF_MDIO))
+               return;
+
+       if (!node)
+               return;
+
+       if (!of_property_read_u32(node, "max-speed", &max_speed))
+               __set_phy_supported(phydev, max_speed);
+}
+
+void of_set_phy_eee_broken(struct phy_device *phydev)
+{
+       struct device_node *node = phydev->mdio.dev.of_node;
+       u32 broken = 0;
+
+       if (!IS_ENABLED(CONFIG_OF_MDIO))
+               return;
+
+       if (!node)
+               return;
+
+       if (of_property_read_bool(node, "eee-broken-100tx"))
+               broken |= MDIO_EEE_100TX;
+       if (of_property_read_bool(node, "eee-broken-1000t"))
+               broken |= MDIO_EEE_1000T;
+       if (of_property_read_bool(node, "eee-broken-10gt"))
+               broken |= MDIO_EEE_10GT;
+       if (of_property_read_bool(node, "eee-broken-1000kx"))
+               broken |= MDIO_EEE_1000KX;
+       if (of_property_read_bool(node, "eee-broken-10gkx4"))
+               broken |= MDIO_EEE_10GKX4;
+       if (of_property_read_bool(node, "eee-broken-10gkr"))
+               broken |= MDIO_EEE_10GKR;
+
+       phydev->eee_broken_modes = broken;
+}
+
 /**
  * phy_resolve_aneg_linkmode - resolve the advertisements into phy settings
  * @phydev: The phy_device struct
@@ -531,7 +603,7 @@ int phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val)
 EXPORT_SYMBOL(phy_write_mmd);
 
 /**
- * __phy_modify() - Convenience function for modifying a PHY register
+ * __phy_modify_changed() - Convenience function for modifying a PHY register
  * @phydev: a pointer to a &struct phy_device
  * @regnum: register number
  * @mask: bit mask of bits to clear
@@ -539,16 +611,69 @@ EXPORT_SYMBOL(phy_write_mmd);
  *
  * Unlocked helper function which allows a PHY register to be modified as
  * new register value = (old register value & ~mask) | set
+ *
+ * Returns negative errno, 0 if there was no change, and 1 in case of change
  */
-int __phy_modify(struct phy_device *phydev, u32 regnum, u16 mask, u16 set)
+int __phy_modify_changed(struct phy_device *phydev, u32 regnum, u16 mask,
+                        u16 set)
 {
-       int ret;
+       int new, ret;
 
        ret = __phy_read(phydev, regnum);
        if (ret < 0)
                return ret;
 
-       ret = __phy_write(phydev, regnum, (ret & ~mask) | set);
+       new = (ret & ~mask) | set;
+       if (new == ret)
+               return 0;
+
+       ret = __phy_write(phydev, regnum, new);
+
+       return ret < 0 ? ret : 1;
+}
+EXPORT_SYMBOL_GPL(__phy_modify_changed);
+
+/**
+ * phy_modify_changed - Function for modifying a PHY register
+ * @phydev: the phy_device struct
+ * @regnum: register number to modify
+ * @mask: bit mask of bits to clear
+ * @set: new value of bits set in mask to write to @regnum
+ *
+ * NOTE: MUST NOT be called from interrupt context,
+ * because the bus read/write functions may wait for an interrupt
+ * to conclude the operation.
+ *
+ * Returns negative errno, 0 if there was no change, and 1 in case of change
+ */
+int phy_modify_changed(struct phy_device *phydev, u32 regnum, u16 mask, u16 set)
+{
+       int ret;
+
+       mutex_lock(&phydev->mdio.bus->mdio_lock);
+       ret = __phy_modify_changed(phydev, regnum, mask, set);
+       mutex_unlock(&phydev->mdio.bus->mdio_lock);
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(phy_modify_changed);
+
+/**
+ * __phy_modify - Convenience function for modifying a PHY register
+ * @phydev: the phy_device struct
+ * @regnum: register number to modify
+ * @mask: bit mask of bits to clear
+ * @set: new value of bits set in mask to write to @regnum
+ *
+ * NOTE: MUST NOT be called from interrupt context,
+ * because the bus read/write functions may wait for an interrupt
+ * to conclude the operation.
+ */
+int __phy_modify(struct phy_device *phydev, u32 regnum, u16 mask, u16 set)
+{
+       int ret;
+
+       ret = __phy_modify_changed(phydev, regnum, mask, set);
 
        return ret < 0 ? ret : 0;
 }
@@ -578,7 +703,7 @@ int phy_modify(struct phy_device *phydev, u32 regnum, u16 mask, u16 set)
 EXPORT_SYMBOL_GPL(phy_modify);
 
 /**
- * __phy_modify_mmd - Convenience function for modifying a register on MMD
+ * __phy_modify_mmd_changed - Function for modifying a register on MMD
  * @phydev: the phy_device struct
  * @devad: the MMD containing register to modify
  * @regnum: register number to modify
@@ -587,17 +712,73 @@ EXPORT_SYMBOL_GPL(phy_modify);
  *
  * Unlocked helper function which allows a MMD register to be modified as
  * new register value = (old register value & ~mask) | set
+ *
+ * Returns negative errno, 0 if there was no change, and 1 in case of change
  */
-int __phy_modify_mmd(struct phy_device *phydev, int devad, u32 regnum,
-                    u16 mask, u16 set)
+int __phy_modify_mmd_changed(struct phy_device *phydev, int devad, u32 regnum,
+                            u16 mask, u16 set)
 {
-       int ret;
+       int new, ret;
 
        ret = __phy_read_mmd(phydev, devad, regnum);
        if (ret < 0)
                return ret;
 
-       ret = __phy_write_mmd(phydev, devad, regnum, (ret & ~mask) | set);
+       new = (ret & ~mask) | set;
+       if (new == ret)
+               return 0;
+
+       ret = __phy_write_mmd(phydev, devad, regnum, new);
+
+       return ret < 0 ? ret : 1;
+}
+EXPORT_SYMBOL_GPL(__phy_modify_mmd_changed);
+
+/**
+ * phy_modify_mmd_changed - Function for modifying a register on MMD
+ * @phydev: the phy_device struct
+ * @devad: the MMD containing register to modify
+ * @regnum: register number to modify
+ * @mask: bit mask of bits to clear
+ * @set: new value of bits set in mask to write to @regnum
+ *
+ * NOTE: MUST NOT be called from interrupt context,
+ * because the bus read/write functions may wait for an interrupt
+ * to conclude the operation.
+ *
+ * Returns negative errno, 0 if there was no change, and 1 in case of change
+ */
+int phy_modify_mmd_changed(struct phy_device *phydev, int devad, u32 regnum,
+                          u16 mask, u16 set)
+{
+       int ret;
+
+       mutex_lock(&phydev->mdio.bus->mdio_lock);
+       ret = __phy_modify_mmd_changed(phydev, devad, regnum, mask, set);
+       mutex_unlock(&phydev->mdio.bus->mdio_lock);
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(phy_modify_mmd_changed);
+
+/**
+ * __phy_modify_mmd - Convenience function for modifying a register on MMD
+ * @phydev: the phy_device struct
+ * @devad: the MMD containing register to modify
+ * @regnum: register number to modify
+ * @mask: bit mask of bits to clear
+ * @set: new value of bits set in mask to write to @regnum
+ *
+ * NOTE: MUST NOT be called from interrupt context,
+ * because the bus read/write functions may wait for an interrupt
+ * to conclude the operation.
+ */
+int __phy_modify_mmd(struct phy_device *phydev, int devad, u32 regnum,
+                    u16 mask, u16 set)
+{
+       int ret;
+
+       ret = __phy_modify_mmd_changed(phydev, devad, regnum, mask, set);
 
        return ret < 0 ? ret : 0;
 }
index 89ead29..69dc64a 100644 (file)
@@ -543,7 +543,7 @@ int phy_start_aneg(struct phy_device *phydev)
        if (err < 0)
                goto out_unlock;
 
-       if (__phy_is_started(phydev)) {
+       if (phy_is_started(phydev)) {
                if (phydev->autoneg == AUTONEG_ENABLE) {
                        err = phy_check_link_status(phydev);
                } else {
@@ -699,7 +699,7 @@ void phy_stop_machine(struct phy_device *phydev)
        cancel_delayed_work_sync(&phydev->state_queue);
 
        mutex_lock(&phydev->lock);
-       if (__phy_is_started(phydev))
+       if (phy_is_started(phydev))
                phydev->state = PHY_UP;
        mutex_unlock(&phydev->lock);
 }
@@ -752,9 +752,6 @@ static irqreturn_t phy_interrupt(int irq, void *phy_dat)
 {
        struct phy_device *phydev = phy_dat;
 
-       if (!phy_is_started(phydev))
-               return IRQ_NONE;                /* It can't be ours.  */
-
        if (phydev->drv->did_interrupt && !phydev->drv->did_interrupt(phydev))
                return IRQ_NONE;
 
@@ -813,15 +810,14 @@ EXPORT_SYMBOL(phy_request_interrupt);
  */
 void phy_stop(struct phy_device *phydev)
 {
-       mutex_lock(&phydev->lock);
-
-       if (!__phy_is_started(phydev)) {
+       if (!phy_is_started(phydev)) {
                WARN(1, "called from state %s\n",
                     phy_state_to_str(phydev->state));
-               mutex_unlock(&phydev->lock);
                return;
        }
 
+       mutex_lock(&phydev->lock);
+
        if (phy_interrupt_is_valid(phydev))
                phy_disable_interrupts(phydev);
 
@@ -961,8 +957,10 @@ void phy_state_machine(struct work_struct *work)
         * state machine would be pointless and possibly error prone when
         * called from phy_disconnect() synchronously.
         */
+       mutex_lock(&phydev->lock);
        if (phy_polling_mode(phydev) && phy_is_started(phydev))
                phy_queue_state_machine(phydev, PHY_STATE_TIME);
+       mutex_unlock(&phydev->lock);
 }
 
 /**
index 891e017..a752de2 100644 (file)
@@ -30,7 +30,6 @@
 #include <linux/mdio.h>
 #include <linux/io.h>
 #include <linux/uaccess.h>
-#include <linux/of.h>
 
 MODULE_DESCRIPTION("PHY library");
 MODULE_AUTHOR("Andy Fleming");
@@ -676,13 +675,16 @@ static int get_phy_c45_devs_in_pkg(struct mii_bus *bus, int addr, int dev_addr,
        phy_reg = mdiobus_read(bus, addr, reg_addr);
        if (phy_reg < 0)
                return -EIO;
-       *devices_in_package = (phy_reg & 0xffff) << 16;
+       *devices_in_package = phy_reg << 16;
 
        reg_addr = MII_ADDR_C45 | dev_addr << 16 | MDIO_DEVS1;
        phy_reg = mdiobus_read(bus, addr, reg_addr);
        if (phy_reg < 0)
                return -EIO;
-       *devices_in_package |= (phy_reg & 0xffff);
+       *devices_in_package |= phy_reg;
+
+       /* Bit 0 doesn't represent a device, it indicates c22 regs presence */
+       *devices_in_package &= ~BIT(0);
 
        return 0;
 }
@@ -743,13 +745,13 @@ static int get_phy_c45_ids(struct mii_bus *bus, int addr, u32 *phy_id,
                phy_reg = mdiobus_read(bus, addr, reg_addr);
                if (phy_reg < 0)
                        return -EIO;
-               c45_ids->device_ids[i] = (phy_reg & 0xffff) << 16;
+               c45_ids->device_ids[i] = phy_reg << 16;
 
                reg_addr = MII_ADDR_C45 | i << 16 | MII_PHYSID2;
                phy_reg = mdiobus_read(bus, addr, reg_addr);
                if (phy_reg < 0)
                        return -EIO;
-               c45_ids->device_ids[i] |= (phy_reg & 0xffff);
+               c45_ids->device_ids[i] |= phy_reg;
        }
        *phy_id = 0;
        return 0;
@@ -786,14 +788,14 @@ static int get_phy_id(struct mii_bus *bus, int addr, u32 *phy_id,
                return (phy_reg == -EIO || phy_reg == -ENODEV) ? -ENODEV : -EIO;
        }
 
-       *phy_id = (phy_reg & 0xffff) << 16;
+       *phy_id = phy_reg << 16;
 
        /* Grab the bits from PHYIR2, and put them in the lower half */
        phy_reg = mdiobus_read(bus, addr, MII_PHYSID2);
        if (phy_reg < 0)
                return -EIO;
 
-       *phy_id |= (phy_reg & 0xffff);
+       *phy_id |= phy_reg;
 
        return 0;
 }
@@ -1513,7 +1515,7 @@ EXPORT_SYMBOL(phy_reset_after_clk_enable);
 static int genphy_config_advert(struct phy_device *phydev)
 {
        u32 advertise;
-       int oldadv, adv, bmsr;
+       int bmsr, adv;
        int err, changed = 0;
 
        /* Only allow advertising what this PHY supports */
@@ -1526,22 +1528,14 @@ static int genphy_config_advert(struct phy_device *phydev)
                            phydev->advertising);
 
        /* Setup standard advertisement */
-       adv = phy_read(phydev, MII_ADVERTISE);
-       if (adv < 0)
-               return adv;
-
-       oldadv = adv;
-       adv &= ~(ADVERTISE_ALL | ADVERTISE_100BASE4 | ADVERTISE_PAUSE_CAP |
-                ADVERTISE_PAUSE_ASYM);
-       adv |= ethtool_adv_to_mii_adv_t(advertise);
-
-       if (adv != oldadv) {
-               err = phy_write(phydev, MII_ADVERTISE, adv);
-
-               if (err < 0)
-                       return err;
+       err = phy_modify_changed(phydev, MII_ADVERTISE,
+                                ADVERTISE_ALL | ADVERTISE_100BASE4 |
+                                ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM,
+                                ethtool_adv_to_mii_adv_t(advertise));
+       if (err < 0)
+               return err;
+       if (err > 0)
                changed = 1;
-       }
 
        bmsr = phy_read(phydev, MII_BMSR);
        if (bmsr < 0)
@@ -1555,25 +1549,20 @@ static int genphy_config_advert(struct phy_device *phydev)
                return changed;
 
        /* Configure gigabit if it's supported */
-       adv = phy_read(phydev, MII_CTRL1000);
-       if (adv < 0)
-               return adv;
-
-       oldadv = adv;
-       adv &= ~(ADVERTISE_1000FULL | ADVERTISE_1000HALF);
-
+       adv = 0;
        if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT,
                              phydev->supported) ||
            linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
                              phydev->supported))
-               adv |= ethtool_adv_to_mii_ctrl1000_t(advertise);
+               adv = ethtool_adv_to_mii_ctrl1000_t(advertise);
 
-       if (adv != oldadv)
-               changed = 1;
-
-       err = phy_write(phydev, MII_CTRL1000, adv);
+       err = phy_modify_changed(phydev, MII_CTRL1000,
+                                ADVERTISE_1000FULL | ADVERTISE_1000HALF,
+                                adv);
        if (err < 0)
                return err;
+       if (err > 0)
+               changed = 1;
 
        return changed;
 }
@@ -1588,31 +1577,16 @@ static int genphy_config_advert(struct phy_device *phydev)
  */
 static int genphy_config_eee_advert(struct phy_device *phydev)
 {
-       int broken = phydev->eee_broken_modes;
-       int old_adv, adv;
+       int err;
 
        /* Nothing to disable */
-       if (!broken)
-               return 0;
-
-       /* If the following call fails, we assume that EEE is not
-        * supported by the phy. If we read 0, EEE is not advertised
-        * In both case, we don't need to continue
-        */
-       adv = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_EEE_ADV);
-       if (adv <= 0)
+       if (!phydev->eee_broken_modes)
                return 0;
 
-       old_adv = adv;
-       adv &= ~broken;
-
-       /* Advertising remains unchanged with the broken mask */
-       if (old_adv == adv)
-               return 0;
-
-       phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_AN_EEE_ADV, adv);
-
-       return 1;
+       err = phy_modify_mmd_changed(phydev, MDIO_MMD_AN, MDIO_AN_EEE_ADV,
+                                    phydev->eee_broken_modes, 0);
+       /* If the call failed, we assume that EEE is not supported */
+       return err < 0 ? 0 : err;
 }
 
 /**
@@ -1729,10 +1703,19 @@ int genphy_update_link(struct phy_device *phydev)
 {
        int status;
 
-       /* Do a fake read */
-       status = phy_read(phydev, MII_BMSR);
-       if (status < 0)
-               return status;
+       /* The link state is latched low so that momentary link
+        * drops can be detected. Do not double-read the status
+        * in polling mode to detect such short link drops.
+        */
+       if (!phy_polling_mode(phydev)) {
+               status = phy_read(phydev, MII_BMSR);
+               if (status < 0) {
+                       return status;
+               } else if (status & BMSR_LSTATUS) {
+                       phydev->link = 1;
+                       return 0;
+               }
+       }
 
        /* Read link and autonegotiation status */
        status = phy_read(phydev, MII_BMSR);
@@ -1965,44 +1948,6 @@ int genphy_loopback(struct phy_device *phydev, bool enable)
 }
 EXPORT_SYMBOL(genphy_loopback);
 
-static int __set_phy_supported(struct phy_device *phydev, u32 max_speed)
-{
-       switch (max_speed) {
-       case SPEED_10:
-               linkmode_clear_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT,
-                                  phydev->supported);
-               linkmode_clear_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT,
-                                  phydev->supported);
-               /* fall through */
-       case SPEED_100:
-               linkmode_clear_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT,
-                                  phydev->supported);
-               linkmode_clear_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
-                                  phydev->supported);
-               break;
-       case SPEED_1000:
-               break;
-       default:
-               return -ENOTSUPP;
-       }
-
-       return 0;
-}
-
-int phy_set_max_speed(struct phy_device *phydev, u32 max_speed)
-{
-       int err;
-
-       err = __set_phy_supported(phydev, max_speed);
-       if (err)
-               return err;
-
-       linkmode_copy(phydev->advertising, phydev->supported);
-
-       return 0;
-}
-EXPORT_SYMBOL(phy_set_max_speed);
-
 /**
  * phy_remove_link_mode - Remove a supported link mode
  * @phydev: phy_device structure to remove link mode from
@@ -2133,48 +2078,6 @@ bool phy_validate_pause(struct phy_device *phydev,
 }
 EXPORT_SYMBOL(phy_validate_pause);
 
-static void of_set_phy_supported(struct phy_device *phydev)
-{
-       struct device_node *node = phydev->mdio.dev.of_node;
-       u32 max_speed;
-
-       if (!IS_ENABLED(CONFIG_OF_MDIO))
-               return;
-
-       if (!node)
-               return;
-
-       if (!of_property_read_u32(node, "max-speed", &max_speed))
-               __set_phy_supported(phydev, max_speed);
-}
-
-static void of_set_phy_eee_broken(struct phy_device *phydev)
-{
-       struct device_node *node = phydev->mdio.dev.of_node;
-       u32 broken = 0;
-
-       if (!IS_ENABLED(CONFIG_OF_MDIO))
-               return;
-
-       if (!node)
-               return;
-
-       if (of_property_read_bool(node, "eee-broken-100tx"))
-               broken |= MDIO_EEE_100TX;
-       if (of_property_read_bool(node, "eee-broken-1000t"))
-               broken |= MDIO_EEE_1000T;
-       if (of_property_read_bool(node, "eee-broken-10gt"))
-               broken |= MDIO_EEE_10GT;
-       if (of_property_read_bool(node, "eee-broken-1000kx"))
-               broken |= MDIO_EEE_1000KX;
-       if (of_property_read_bool(node, "eee-broken-10gkx4"))
-               broken |= MDIO_EEE_10GKX4;
-       if (of_property_read_bool(node, "eee-broken-10gkr"))
-               broken |= MDIO_EEE_10GKR;
-
-       phydev->eee_broken_modes = broken;
-}
-
 static bool phy_drv_supports_irq(struct phy_driver *phydrv)
 {
        return phydrv->config_intr && phydrv->ack_interrupt;
@@ -2208,11 +2111,30 @@ static int phy_probe(struct device *dev)
 
        mutex_lock(&phydev->lock);
 
+       if (phydev->drv->probe) {
+               /* Deassert the reset signal */
+               phy_device_reset(phydev, 0);
+
+               err = phydev->drv->probe(phydev);
+               if (err) {
+                       /* Assert the reset signal */
+                       phy_device_reset(phydev, 1);
+                       goto out;
+               }
+       }
+
        /* Start out supporting everything. Eventually,
         * a controller will attach, and may modify one
         * or both of these values
         */
-       linkmode_copy(phydev->supported, phydrv->features);
+       if (phydrv->features) {
+               linkmode_copy(phydev->supported, phydrv->features);
+       } else {
+               err = phydrv->get_features(phydev);
+               if (err)
+                       goto out;
+       }
+
        of_set_phy_supported(phydev);
        linkmode_copy(phydev->advertising, phydev->supported);
 
@@ -2232,20 +2154,8 @@ static int phy_probe(struct device *dev)
         * (e.g. hardware erratum) where the driver wants to set only one
         * of these bits.
         */
-       if (test_bit(ETHTOOL_LINK_MODE_Pause_BIT, phydrv->features) ||
-           test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, phydrv->features)) {
-               linkmode_clear_bit(ETHTOOL_LINK_MODE_Pause_BIT,
-                                  phydev->supported);
-               linkmode_clear_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT,
-                                  phydev->supported);
-               if (test_bit(ETHTOOL_LINK_MODE_Pause_BIT, phydrv->features))
-                       linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT,
-                                        phydev->supported);
-               if (test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT,
-                            phydrv->features))
-                       linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT,
-                                        phydev->supported);
-       } else {
+       if (!test_bit(ETHTOOL_LINK_MODE_Pause_BIT, phydev->supported) &&
+           !test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, phydev->supported)) {
                linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT,
                                 phydev->supported);
                linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT,
@@ -2255,17 +2165,7 @@ static int phy_probe(struct device *dev)
        /* Set the state to READY by default */
        phydev->state = PHY_READY;
 
-       if (phydev->drv->probe) {
-               /* Deassert the reset signal */
-               phy_device_reset(phydev, 0);
-
-               err = phydev->drv->probe(phydev);
-               if (err) {
-                       /* Assert the reset signal */
-                       phy_device_reset(phydev, 1);
-               }
-       }
-
+out:
        mutex_unlock(&phydev->lock);
 
        return err;
@@ -2301,7 +2201,11 @@ int phy_driver_register(struct phy_driver *new_driver, struct module *owner)
 {
        int retval;
 
-       if (WARN_ON(!new_driver->features)) {
+       /* Either the features are hard coded, or dynamically
+        * determine. It cannot be both or neither
+        */
+       if (WARN_ON((!new_driver->features && !new_driver->get_features) ||
+                   (new_driver->features && new_driver->get_features))) {
                pr_err("%s: Driver features are missing\n", new_driver->name);
                return -EINVAL;
        }
index 2e21ce4..59d175a 100644 (file)
@@ -302,6 +302,13 @@ static void phylink_mac_config(struct phylink *pl,
        pl->ops->mac_config(pl->netdev, pl->link_an_mode, state);
 }
 
+static void phylink_mac_config_up(struct phylink *pl,
+                                 const struct phylink_link_state *state)
+{
+       if (state->link)
+               phylink_mac_config(pl, state);
+}
+
 static void phylink_mac_an_restart(struct phylink *pl)
 {
        if (pl->link_config.an_enabled &&
@@ -401,12 +408,12 @@ static void phylink_resolve(struct work_struct *w)
                case MLO_AN_PHY:
                        link_state = pl->phy_state;
                        phylink_resolve_flow(pl, &link_state);
-                       phylink_mac_config(pl, &link_state);
+                       phylink_mac_config_up(pl, &link_state);
                        break;
 
                case MLO_AN_FIXED:
                        phylink_get_fixed_state(pl, &link_state);
-                       phylink_mac_config(pl, &link_state);
+                       phylink_mac_config_up(pl, &link_state);
                        break;
 
                case MLO_AN_INBAND:
@@ -471,6 +478,17 @@ static void phylink_run_resolve(struct phylink *pl)
                queue_work(system_power_efficient_wq, &pl->resolve);
 }
 
+static void phylink_run_resolve_and_disable(struct phylink *pl, int bit)
+{
+       unsigned long state = pl->phylink_disable_state;
+
+       set_bit(bit, &pl->phylink_disable_state);
+       if (state == 0) {
+               queue_work(system_power_efficient_wq, &pl->resolve);
+               flush_work(&pl->resolve);
+       }
+}
+
 static void phylink_fixed_poll(struct timer_list *t)
 {
        struct phylink *pl = container_of(t, struct phylink, link_poll);
@@ -920,9 +938,7 @@ void phylink_stop(struct phylink *pl)
        if (pl->link_an_mode == MLO_AN_FIXED && !IS_ERR(pl->link_gpio))
                del_timer_sync(&pl->link_poll);
 
-       set_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state);
-       queue_work(system_power_efficient_wq, &pl->resolve);
-       flush_work(&pl->resolve);
+       phylink_run_resolve_and_disable(pl, PHYLINK_DISABLE_STOPPED);
 }
 EXPORT_SYMBOL_GPL(phylink_stop);
 
@@ -1265,6 +1281,24 @@ int phylink_get_eee_err(struct phylink *pl)
 EXPORT_SYMBOL_GPL(phylink_get_eee_err);
 
 /**
+ * phylink_init_eee() - init and check the EEE features
+ * @pl: a pointer to a &struct phylink returned from phylink_create()
+ * @clk_stop_enable: allow PHY to stop receive clock
+ *
+ * Must be called either with RTNL held or within mac_link_up()
+ */
+int phylink_init_eee(struct phylink *pl, bool clk_stop_enable)
+{
+       int ret = -EOPNOTSUPP;
+
+       if (pl->phydev)
+               ret = phy_init_eee(pl->phydev, clk_stop_enable);
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(phylink_init_eee);
+
+/**
  * phylink_ethtool_get_eee() - read the energy efficient ethernet parameters
  * @pl: a pointer to a &struct phylink returned from phylink_create()
  * @eee: a pointer to a &struct ethtool_eee for the read parameters
@@ -1628,9 +1662,7 @@ static void phylink_sfp_link_down(void *upstream)
 
        ASSERT_RTNL();
 
-       set_bit(PHYLINK_DISABLE_LINK, &pl->phylink_disable_state);
-       queue_work(system_power_efficient_wq, &pl->resolve);
-       flush_work(&pl->resolve);
+       phylink_run_resolve_and_disable(pl, PHYLINK_DISABLE_LINK);
 }
 
 static void phylink_sfp_link_up(void *upstream)
index ad9db65..fef701b 100644 (file)
@@ -347,6 +347,7 @@ static int sfp_register_bus(struct sfp_bus *bus)
                                return ret;
                }
        }
+       bus->socket_ops->attach(bus->sfp);
        if (bus->started)
                bus->socket_ops->start(bus->sfp);
        bus->netdev->sfp_bus = bus;
@@ -362,6 +363,7 @@ static void sfp_unregister_bus(struct sfp_bus *bus)
        if (bus->registered) {
                if (bus->started)
                        bus->socket_ops->stop(bus->sfp);
+               bus->socket_ops->detach(bus->sfp);
                if (bus->phydev && ops && ops->disconnect_phy)
                        ops->disconnect_phy(bus->upstream);
        }
index 298ab75..d4635c2 100644 (file)
@@ -185,6 +185,7 @@ struct sfp {
 
        struct gpio_desc *gpio[GPIO_MAX];
 
+       bool attached;
        unsigned int state;
        struct delayed_work poll;
        struct delayed_work timeout;
@@ -1476,7 +1477,7 @@ static void sfp_sm_event(struct sfp *sfp, unsigned int event)
         */
        switch (sfp->sm_mod_state) {
        default:
-               if (event == SFP_E_INSERT) {
+               if (event == SFP_E_INSERT && sfp->attached) {
                        sfp_module_tx_disable(sfp);
                        sfp_sm_ins_next(sfp, SFP_MOD_PROBE, T_PROBE_INIT);
                }
@@ -1608,6 +1609,19 @@ static void sfp_sm_event(struct sfp *sfp, unsigned int event)
        mutex_unlock(&sfp->sm_mutex);
 }
 
+static void sfp_attach(struct sfp *sfp)
+{
+       sfp->attached = true;
+       if (sfp->state & SFP_F_PRESENT)
+               sfp_sm_event(sfp, SFP_E_INSERT);
+}
+
+static void sfp_detach(struct sfp *sfp)
+{
+       sfp->attached = false;
+       sfp_sm_event(sfp, SFP_E_REMOVE);
+}
+
 static void sfp_start(struct sfp *sfp)
 {
        sfp_sm_event(sfp, SFP_E_DEV_UP);
@@ -1668,6 +1682,8 @@ static int sfp_module_eeprom(struct sfp *sfp, struct ethtool_eeprom *ee,
 }
 
 static const struct sfp_socket_ops sfp_module_ops = {
+       .attach = sfp_attach,
+       .detach = sfp_detach,
        .start = sfp_start,
        .stop = sfp_stop,
        .module_info = sfp_module_info,
@@ -1835,10 +1851,6 @@ static int sfp_probe(struct platform_device *pdev)
        dev_info(sfp->dev, "Host maximum power %u.%uW\n",
                 sfp->max_power_mW / 1000, (sfp->max_power_mW / 100) % 10);
 
-       sfp->sfp_bus = sfp_register_socket(sfp->dev, sfp, &sfp_module_ops);
-       if (!sfp->sfp_bus)
-               return -ENOMEM;
-
        /* Get the initial state, and always signal TX disable,
         * since the network interface will not be up.
         */
@@ -1849,10 +1861,6 @@ static int sfp_probe(struct platform_device *pdev)
                sfp->state |= SFP_F_RATE_SELECT;
        sfp_set_state(sfp, sfp->state);
        sfp_module_tx_disable(sfp);
-       rtnl_lock();
-       if (sfp->state & SFP_F_PRESENT)
-               sfp_sm_event(sfp, SFP_E_INSERT);
-       rtnl_unlock();
 
        for (i = 0; i < GPIO_MAX; i++) {
                if (gpio_flags[i] != GPIOD_IN || !sfp->gpio[i])
@@ -1885,6 +1893,10 @@ static int sfp_probe(struct platform_device *pdev)
                dev_warn(sfp->dev,
                         "No tx_disable pin: SFP modules will always be emitting.\n");
 
+       sfp->sfp_bus = sfp_register_socket(sfp->dev, sfp, &sfp_module_ops);
+       if (!sfp->sfp_bus)
+               return -ENOMEM;
+
        return 0;
 }
 
index 31b0acf..64f54b0 100644 (file)
@@ -7,6 +7,8 @@
 struct sfp;
 
 struct sfp_socket_ops {
+       void (*attach)(struct sfp *sfp);
+       void (*detach)(struct sfp *sfp);
        void (*start)(struct sfp *sfp);
        void (*stop)(struct sfp *sfp);
        int (*module_info)(struct sfp *sfp, struct ethtool_modinfo *modinfo);
index afd9d25..958f1cf 100644 (file)
@@ -256,17 +256,6 @@ static void __team_option_inst_mark_removed_port(struct team *team,
        }
 }
 
-static bool __team_option_inst_tmp_find(const struct list_head *opts,
-                                       const struct team_option_inst *needle)
-{
-       struct team_option_inst *opt_inst;
-
-       list_for_each_entry(opt_inst, opts, tmp_list)
-               if (opt_inst == needle)
-                       return true;
-       return false;
-}
-
 static int __team_options_register(struct team *team,
                                   const struct team_option *option,
                                   size_t option_count)
@@ -2460,7 +2449,6 @@ static int team_nl_cmd_options_set(struct sk_buff *skb, struct genl_info *info)
        int err = 0;
        int i;
        struct nlattr *nl_option;
-       LIST_HEAD(opt_inst_list);
 
        rtnl_lock();
 
@@ -2480,6 +2468,7 @@ static int team_nl_cmd_options_set(struct sk_buff *skb, struct genl_info *info)
                struct nlattr *opt_attrs[TEAM_ATTR_OPTION_MAX + 1];
                struct nlattr *attr;
                struct nlattr *attr_data;
+               LIST_HEAD(opt_inst_list);
                enum team_option_type opt_type;
                int opt_port_ifindex = 0; /* != 0 for per-port options */
                u32 opt_array_index = 0;
@@ -2584,23 +2573,17 @@ static int team_nl_cmd_options_set(struct sk_buff *skb, struct genl_info *info)
                        if (err)
                                goto team_put;
                        opt_inst->changed = true;
-
-                       /* dumb/evil user-space can send us duplicate opt,
-                        * keep only the last one
-                        */
-                       if (__team_option_inst_tmp_find(&opt_inst_list,
-                                                       opt_inst))
-                               continue;
-
                        list_add(&opt_inst->tmp_list, &opt_inst_list);
                }
                if (!opt_found) {
                        err = -ENOENT;
                        goto team_put;
                }
-       }
 
-       err = team_nl_send_event_options_get(team, &opt_inst_list);
+               err = team_nl_send_event_options_get(team, &opt_inst_list);
+               if (err)
+                       break;
+       }
 
 team_put:
        team_nl_team_put(team);
index 18656c4..fed298c 100644 (file)
@@ -866,8 +866,6 @@ static int tun_attach(struct tun_struct *tun, struct file *file,
        if (rtnl_dereference(tun->xdp_prog))
                sock_set_flag(&tfile->sk, SOCK_XDP);
 
-       tun_set_real_num_queues(tun);
-
        /* device is allowed to go away first, so no need to hold extra
         * refcnt.
         */
@@ -879,6 +877,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file,
        rcu_assign_pointer(tfile->tun, tun);
        rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile);
        tun->numqueues++;
+       tun_set_real_num_queues(tun);
 out:
        return err;
 }
index 78b16eb..63aaae4 100644 (file)
@@ -361,8 +361,8 @@ static int usbpn_probe(struct usb_interface *intf, const struct usb_device_id *i
        else
                return -EINVAL;
 
-       dev = alloc_netdev(sizeof(*pnd) + sizeof(pnd->urbs[0]) * rxq_size,
-                          ifname, NET_NAME_UNKNOWN, usbpn_setup);
+       dev = alloc_netdev(struct_size(pnd, urbs, rxq_size), ifname,
+                          NET_NAME_UNKNOWN, usbpn_setup);
        if (!dev)
                return -ENOMEM;
 
index f4247b2..63e44e7 100644 (file)
@@ -1011,6 +1011,7 @@ static int pegasus_ioctl(struct net_device *net, struct ifreq *rq, int cmd)
        switch (cmd) {
        case SIOCDEVPRIVATE:
                data[0] = pegasus->phy;
+               /* fall through */
        case SIOCDEVPRIVATE + 1:
                read_mii_word(pegasus, data[0], data[1] & 0x1f, &data[3]);
                res = 0;
index 3f145e4..59dbdbb 100644 (file)
@@ -847,6 +847,7 @@ static int rtl8150_ioctl(struct net_device *netdev, struct ifreq *rq, int cmd)
        switch (cmd) {
        case SIOCDEVPRIVATE:
                data[0] = dev->phy;
+               /* fall through */
        case SIOCDEVPRIVATE + 1:
                read_mii_word(dev, dev->phy, (data[1] & 0x1f), &data[3]);
                break;
index f412ea1..fbf890e 100644 (file)
@@ -540,8 +540,10 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq,
                        goto xdp_xmit;
                default:
                        bpf_warn_invalid_xdp_action(act);
+                       /* fall through */
                case XDP_ABORTED:
                        trace_xdp_exception(rq->dev, xdp_prog, act);
+                       /* fall through */
                case XDP_DROP:
                        goto err_xdp;
                }
@@ -661,8 +663,10 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, struct sk_buff *skb,
                goto xdp_xmit;
        default:
                bpf_warn_invalid_xdp_action(act);
+               /* fall through */
        case XDP_ABORTED:
                trace_xdp_exception(rq->dev, xdp_prog, act);
+               /* fall through */
        case XDP_DROP:
                goto drop;
        }
index 2a0edd4..7eb38ea 100644 (file)
@@ -57,6 +57,8 @@ module_param(napi_tx, bool, 0644);
 #define VIRTIO_XDP_TX          BIT(0)
 #define VIRTIO_XDP_REDIR       BIT(1)
 
+#define VIRTIO_XDP_FLAG        BIT(0)
+
 /* RX packet size EWMA. The average packet size is used to determine the packet
  * buffer size when refilling RX rings. As the entire RX ring may be refilled
  * at once, the weight is chosen so that the EWMA will be insensitive to short-
@@ -252,6 +254,21 @@ struct padded_vnet_hdr {
        char padding[4];
 };
 
+static bool is_xdp_frame(void *ptr)
+{
+       return (unsigned long)ptr & VIRTIO_XDP_FLAG;
+}
+
+static void *xdp_to_ptr(struct xdp_frame *ptr)
+{
+       return (void *)((unsigned long)ptr | VIRTIO_XDP_FLAG);
+}
+
+static struct xdp_frame *ptr_to_xdp(void *ptr)
+{
+       return (struct xdp_frame *)((unsigned long)ptr & ~VIRTIO_XDP_FLAG);
+}
+
 /* Converting between virtqueue no. and kernel tx/rx queue no.
  * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq
  */
@@ -462,7 +479,8 @@ static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
 
        sg_init_one(sq->sg, xdpf->data, xdpf->len);
 
-       err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdpf, GFP_ATOMIC);
+       err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdp_to_ptr(xdpf),
+                                  GFP_ATOMIC);
        if (unlikely(err))
                return -ENOSPC; /* Caller handle free/refcnt */
 
@@ -482,36 +500,47 @@ static int virtnet_xdp_xmit(struct net_device *dev,
 {
        struct virtnet_info *vi = netdev_priv(dev);
        struct receive_queue *rq = vi->rq;
-       struct xdp_frame *xdpf_sent;
        struct bpf_prog *xdp_prog;
        struct send_queue *sq;
        unsigned int len;
+       int packets = 0;
+       int bytes = 0;
        int drops = 0;
        int kicks = 0;
        int ret, err;
+       void *ptr;
        int i;
 
-       sq = virtnet_xdp_sq(vi);
-
-       if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) {
-               ret = -EINVAL;
-               drops = n;
-               goto out;
-       }
-
        /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this
         * indicate XDP resources have been successfully allocated.
         */
        xdp_prog = rcu_dereference(rq->xdp_prog);
-       if (!xdp_prog) {
-               ret = -ENXIO;
+       if (!xdp_prog)
+               return -ENXIO;
+
+       sq = virtnet_xdp_sq(vi);
+
+       if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) {
+               ret = -EINVAL;
                drops = n;
                goto out;
        }
 
        /* Free up any pending old buffers before queueing new ones. */
-       while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
-               xdp_return_frame(xdpf_sent);
+       while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) {
+               if (likely(is_xdp_frame(ptr))) {
+                       struct xdp_frame *frame = ptr_to_xdp(ptr);
+
+                       bytes += frame->len;
+                       xdp_return_frame(frame);
+               } else {
+                       struct sk_buff *skb = ptr;
+
+                       bytes += skb->len;
+                       napi_consume_skb(skb, false);
+               }
+               packets++;
+       }
 
        for (i = 0; i < n; i++) {
                struct xdp_frame *xdpf = frames[i];
@@ -530,6 +559,8 @@ static int virtnet_xdp_xmit(struct net_device *dev,
        }
 out:
        u64_stats_update_begin(&sq->stats.syncp);
+       sq->stats.bytes += bytes;
+       sq->stats.packets += packets;
        sq->stats.xdp_tx += n;
        sq->stats.xdp_tx_drops += drops;
        sq->stats.kicks += kicks;
@@ -1333,18 +1364,26 @@ static int virtnet_receive(struct receive_queue *rq, int budget,
 
 static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi)
 {
-       struct sk_buff *skb;
        unsigned int len;
        unsigned int packets = 0;
        unsigned int bytes = 0;
+       void *ptr;
 
-       while ((skb = virtqueue_get_buf(sq->vq, &len)) != NULL) {
-               pr_debug("Sent skb %p\n", skb);
+       while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) {
+               if (likely(!is_xdp_frame(ptr))) {
+                       struct sk_buff *skb = ptr;
 
-               bytes += skb->len;
-               packets++;
+                       pr_debug("Sent skb %p\n", skb);
+
+                       bytes += skb->len;
+                       napi_consume_skb(skb, in_napi);
+               } else {
+                       struct xdp_frame *frame = ptr_to_xdp(ptr);
 
-               napi_consume_skb(skb, in_napi);
+                       bytes += frame->len;
+                       xdp_return_frame(frame);
+               }
+               packets++;
        }
 
        /* Avoid overhead when no packets have been processed
@@ -1359,6 +1398,16 @@ static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi)
        u64_stats_update_end(&sq->stats.syncp);
 }
 
+static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q)
+{
+       if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs))
+               return false;
+       else if (q < vi->curr_queue_pairs)
+               return true;
+       else
+               return false;
+}
+
 static void virtnet_poll_cleantx(struct receive_queue *rq)
 {
        struct virtnet_info *vi = rq->vq->vdev->priv;
@@ -1366,7 +1415,7 @@ static void virtnet_poll_cleantx(struct receive_queue *rq)
        struct send_queue *sq = &vi->sq[index];
        struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index);
 
-       if (!sq->napi.weight)
+       if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index))
                return;
 
        if (__netif_tx_trylock(txq)) {
@@ -1443,8 +1492,16 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget)
 {
        struct send_queue *sq = container_of(napi, struct send_queue, napi);
        struct virtnet_info *vi = sq->vq->vdev->priv;
-       struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, vq2txq(sq->vq));
+       unsigned int index = vq2txq(sq->vq);
+       struct netdev_queue *txq;
 
+       if (unlikely(is_xdp_raw_buffer_queue(vi, index))) {
+               /* We don't need to enable cb for XDP */
+               napi_complete_done(napi, 0);
+               return 0;
+       }
+
+       txq = netdev_get_tx_queue(vi->dev, index);
        __netif_tx_lock(txq, raw_smp_processor_id());
        free_old_xmit_skbs(sq, true);
        __netif_tx_unlock(txq);
@@ -2396,6 +2453,10 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
                return -ENOMEM;
        }
 
+       old_prog = rtnl_dereference(vi->rq[0].xdp_prog);
+       if (!prog && !old_prog)
+               return 0;
+
        if (prog) {
                prog = bpf_prog_add(prog, vi->max_queue_pairs - 1);
                if (IS_ERR(prog))
@@ -2403,36 +2464,62 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
        }
 
        /* Make sure NAPI is not using any XDP TX queues for RX. */
-       if (netif_running(dev))
-               for (i = 0; i < vi->max_queue_pairs; i++)
+       if (netif_running(dev)) {
+               for (i = 0; i < vi->max_queue_pairs; i++) {
                        napi_disable(&vi->rq[i].napi);
+                       virtnet_napi_tx_disable(&vi->sq[i].napi);
+               }
+       }
+
+       if (!prog) {
+               for (i = 0; i < vi->max_queue_pairs; i++) {
+                       rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
+                       if (i == 0)
+                               virtnet_restore_guest_offloads(vi);
+               }
+               synchronize_net();
+       }
 
-       netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp);
        err = _virtnet_set_queues(vi, curr_qp + xdp_qp);
        if (err)
                goto err;
+       netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp);
        vi->xdp_queue_pairs = xdp_qp;
 
-       for (i = 0; i < vi->max_queue_pairs; i++) {
-               old_prog = rtnl_dereference(vi->rq[i].xdp_prog);
-               rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
-               if (i == 0) {
-                       if (!old_prog)
+       if (prog) {
+               for (i = 0; i < vi->max_queue_pairs; i++) {
+                       rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
+                       if (i == 0 && !old_prog)
                                virtnet_clear_guest_offloads(vi);
-                       if (!prog)
-                               virtnet_restore_guest_offloads(vi);
                }
+       }
+
+       for (i = 0; i < vi->max_queue_pairs; i++) {
                if (old_prog)
                        bpf_prog_put(old_prog);
-               if (netif_running(dev))
+               if (netif_running(dev)) {
                        virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
+                       virtnet_napi_tx_enable(vi, vi->sq[i].vq,
+                                              &vi->sq[i].napi);
+               }
        }
 
        return 0;
 
 err:
-       for (i = 0; i < vi->max_queue_pairs; i++)
-               virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
+       if (!prog) {
+               virtnet_clear_guest_offloads(vi);
+               for (i = 0; i < vi->max_queue_pairs; i++)
+                       rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog);
+       }
+
+       if (netif_running(dev)) {
+               for (i = 0; i < vi->max_queue_pairs; i++) {
+                       virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
+                       virtnet_napi_tx_enable(vi, vi->sq[i].vq,
+                                              &vi->sq[i].napi);
+               }
+       }
        if (prog)
                bpf_prog_sub(prog, vi->max_queue_pairs - 1);
        return err;
@@ -2614,16 +2701,6 @@ static void free_receive_page_frags(struct virtnet_info *vi)
                        put_page(vi->rq[i].alloc_frag.page);
 }
 
-static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q)
-{
-       if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs))
-               return false;
-       else if (q < vi->curr_queue_pairs)
-               return true;
-       else
-               return false;
-}
-
 static void free_unused_bufs(struct virtnet_info *vi)
 {
        void *buf;
@@ -2632,10 +2709,10 @@ static void free_unused_bufs(struct virtnet_info *vi)
        for (i = 0; i < vi->max_queue_pairs; i++) {
                struct virtqueue *vq = vi->sq[i].vq;
                while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
-                       if (!is_xdp_raw_buffer_queue(vi, i))
+                       if (!is_xdp_frame(buf))
                                dev_kfree_skb(buf);
                        else
-                               put_page(virt_to_head_page(buf));
+                               xdp_return_frame(ptr_to_xdp(buf));
                }
        }
 
index c0cd1c0..33edc78 100644 (file)
@@ -2293,7 +2293,7 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
        struct pcpu_sw_netstats *tx_stats, *rx_stats;
        union vxlan_addr loopback;
        union vxlan_addr *remote_ip = &dst_vxlan->default_dst.remote_ip;
-       struct net_device *dev = skb->dev;
+       struct net_device *dev;
        int len = skb->len;
 
        tx_stats = this_cpu_ptr(src_vxlan->dev->tstats);
@@ -2313,9 +2313,15 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
 #endif
        }
 
+       rcu_read_lock();
+       dev = skb->dev;
+       if (unlikely(!(dev->flags & IFF_UP))) {
+               kfree_skb(skb);
+               goto drop;
+       }
+
        if (dst_vxlan->cfg.flags & VXLAN_F_LEARN)
-               vxlan_snoop(skb->dev, &loopback, eth_hdr(skb)->h_source, 0,
-                           vni);
+               vxlan_snoop(dev, &loopback, eth_hdr(skb)->h_source, 0, vni);
 
        u64_stats_update_begin(&tx_stats->syncp);
        tx_stats->tx_packets++;
@@ -2328,8 +2334,10 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
                rx_stats->rx_bytes += len;
                u64_stats_update_end(&rx_stats->syncp);
        } else {
+drop:
                dev->stats.rx_dropped++;
        }
+       rcu_read_unlock();
 }
 
 static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev,
index d5dc823..fa78d2b 100644 (file)
@@ -1575,7 +1575,7 @@ try:
                                        dev->stats.tx_packets++;
                                        dev->stats.tx_bytes += skb->len;
                                }
-                               dev_kfree_skb_irq(skb);
+                               dev_consume_skb_irq(skb);
                                dpriv->tx_skbuff[cur] = NULL;
                                ++dpriv->tx_dirty;
                        } else {
index 66d889d..a08f04c 100644 (file)
@@ -482,7 +482,7 @@ static int hdlc_tx_done(struct ucc_hdlc_private *priv)
                memset(priv->tx_buffer +
                       (be32_to_cpu(bd->buf) - priv->dma_tx_addr),
                       0, skb->len);
-               dev_kfree_skb_irq(skb);
+               dev_consume_skb_irq(skb);
 
                priv->tx_skbuff[priv->skb_dirtytx] = NULL;
                priv->skb_dirtytx =
index d573a57..459ce52 100644 (file)
@@ -565,7 +565,7 @@ static int wanxl_pci_init_one(struct pci_dev *pdev,
        u32 plx_phy;            /* PLX PCI base address */
        u32 mem_phy;            /* memory PCI base addr */
        u8 __iomem *mem;        /* memory virtual base addr */
-       int i, ports, alloc_size;
+       int i, ports;
 
 #ifndef MODULE
        pr_info_once("%s\n", version);
@@ -601,8 +601,7 @@ static int wanxl_pci_init_one(struct pci_dev *pdev,
        default: ports = 4;
        }
 
-       alloc_size = sizeof(struct card) + ports * sizeof(struct port);
-       card = kzalloc(alloc_size, GFP_KERNEL);
+       card = kzalloc(struct_size(card, ports, ports), GFP_KERNEL);
        if (card == NULL) {
                pci_release_regions(pdev);
                pci_disable_device(pdev);
index 0b60295..d28b96d 100644 (file)
@@ -1260,8 +1260,8 @@ int i2400m_rx(struct i2400m *i2400m, struct sk_buff *skb)
                goto error_msg_hdr_check;
        result = -EIO;
        num_pls = le16_to_cpu(msg_hdr->num_pls);
-       pl_itr = sizeof(*msg_hdr) +     /* Check payload descriptor(s) */
-               num_pls * sizeof(msg_hdr->pld[0]);
+       /* Check payload descriptor(s) */
+       pl_itr = struct_size(msg_hdr, pld, num_pls);
        pl_itr = ALIGN(pl_itr, I2400M_PL_ALIGN);
        if (pl_itr > skb_len) { /* got all the payload descriptors? */
                dev_err(dev, "RX: HW BUG? message too short (%u bytes) for "
index f8eb66e..73842a8 100644 (file)
@@ -210,6 +210,7 @@ retry:
                        msleep(10);     /* give the device some time */
                        goto retry;
                }
+               /* fall through */
        case -EINVAL:                   /* while removing driver */
        case -ENODEV:                   /* dev disconnect ... */
        case -ENOENT:                   /* just ignore it */
index 399b501..e8891f5 100644 (file)
@@ -548,7 +548,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = {
        {
                .id = WCN3990_HW_1_0_DEV_VERSION,
                .dev_id = 0,
-               .bus = ATH10K_BUS_PCI,
+               .bus = ATH10K_BUS_SNOC,
                .name = "wcn3990 hw1.0",
                .continuous_frag_desc = true,
                .tx_chain_mask = 0x7,
index 491ca3c..83d5bce 100644 (file)
@@ -1,6 +1,6 @@
 config IWLWIFI
        tristate "Intel Wireless WiFi Next Gen AGN - Wireless-N/Advanced-N/Ultimate-N (iwlwifi) "
-       depends on PCI && HAS_IOMEM
+       depends on PCI && HAS_IOMEM && CFG80211
        select FW_LOADER
        ---help---
          Select to build the driver supporting the:
@@ -47,6 +47,7 @@ if IWLWIFI
 config IWLWIFI_LEDS
        bool
        depends on LEDS_CLASS=y || LEDS_CLASS=IWLWIFI
+       depends on IWLMVM || IWLDVM
        select LEDS_TRIGGERS
        select MAC80211_LEDS
        default y
index 497e762..b2cabce 100644 (file)
@@ -212,24 +212,24 @@ void mt76x0_get_tx_power_per_rate(struct mt76x02_dev *dev)
        mt76x02_add_rate_power_offset(t, delta);
 }
 
-void mt76x0_get_power_info(struct mt76x02_dev *dev, u8 *info)
+void mt76x0_get_power_info(struct mt76x02_dev *dev, s8 *tp)
 {
        struct mt76x0_chan_map {
                u8 chan;
                u8 offset;
        } chan_map[] = {
-               {   2,  0 }, {   4,  1 }, {   6,  2 }, {   8,  3 },
-               {  10,  4 }, {  12,  5 }, {  14,  6 }, {  38,  0 },
-               {  44,  1 }, {  48,  2 }, {  54,  3 }, {  60,  4 },
-               {  64,  5 }, { 102,  6 }, { 108,  7 }, { 112,  8 },
-               { 118,  9 }, { 124, 10 }, { 128, 11 }, { 134, 12 },
-               { 140, 13 }, { 151, 14 }, { 157, 15 }, { 161, 16 },
-               { 167, 17 }, { 171, 18 }, { 173, 19 },
+               {   2,  0 }, {   4,  2 }, {   6,  4 }, {   8,  6 },
+               {  10,  8 }, {  12, 10 }, {  14, 12 }, {  38,  0 },
+               {  44,  2 }, {  48,  4 }, {  54,  6 }, {  60,  8 },
+               {  64, 10 }, { 102, 12 }, { 108, 14 }, { 112, 16 },
+               { 118, 18 }, { 124, 20 }, { 128, 22 }, { 134, 24 },
+               { 140, 26 }, { 151, 28 }, { 157, 30 }, { 161, 32 },
+               { 167, 34 }, { 171, 36 }, { 175, 38 },
        };
        struct ieee80211_channel *chan = dev->mt76.chandef.chan;
        u8 offset, addr;
+       int i, idx = 0;
        u16 data;
-       int i;
 
        if (mt76x0_tssi_enabled(dev)) {
                s8 target_power;
@@ -239,14 +239,14 @@ void mt76x0_get_power_info(struct mt76x02_dev *dev, u8 *info)
                else
                        data = mt76x02_eeprom_get(dev, MT_EE_2G_TARGET_POWER);
                target_power = (data & 0xff) - dev->mt76.rate_power.ofdm[7];
-               info[0] = target_power + mt76x0_get_delta(dev);
-               info[1] = 0;
+               *tp = target_power + mt76x0_get_delta(dev);
 
                return;
        }
 
        for (i = 0; i < ARRAY_SIZE(chan_map); i++) {
-               if (chan_map[i].chan <= chan->hw_value) {
+               if (chan->hw_value <= chan_map[i].chan) {
+                       idx = (chan->hw_value == chan_map[i].chan);
                        offset = chan_map[i].offset;
                        break;
                }
@@ -258,13 +258,16 @@ void mt76x0_get_power_info(struct mt76x02_dev *dev, u8 *info)
                addr = MT_EE_TX_POWER_DELTA_BW80 + offset;
        } else {
                switch (chan->hw_value) {
+               case 42:
+                       offset = 2;
+                       break;
                case 58:
                        offset = 8;
                        break;
                case 106:
                        offset = 14;
                        break;
-               case 112:
+               case 122:
                        offset = 20;
                        break;
                case 155:
@@ -277,14 +280,9 @@ void mt76x0_get_power_info(struct mt76x02_dev *dev, u8 *info)
        }
 
        data = mt76x02_eeprom_get(dev, addr);
-
-       info[0] = data;
-       if (!info[0] || info[0] > 0x3f)
-               info[0] = 5;
-
-       info[1] = data >> 8;
-       if (!info[1] || info[1] > 0x3f)
-               info[1] = 5;
+       *tp = data >> (8 * idx);
+       if (*tp < 0 || *tp > 0x3f)
+               *tp = 5;
 }
 
 static int mt76x0_check_eeprom(struct mt76x02_dev *dev)
index ee9ade9..42b259f 100644 (file)
@@ -26,7 +26,7 @@ struct mt76x02_dev;
 int mt76x0_eeprom_init(struct mt76x02_dev *dev);
 void mt76x0_read_rx_gain(struct mt76x02_dev *dev);
 void mt76x0_get_tx_power_per_rate(struct mt76x02_dev *dev);
-void mt76x0_get_power_info(struct mt76x02_dev *dev, u8 *info);
+void mt76x0_get_power_info(struct mt76x02_dev *dev, s8 *tp);
 
 static inline s8 s6_to_s8(u32 val)
 {
index 5a4c6f3..1117cdc 100644 (file)
@@ -845,17 +845,17 @@ static void mt76x0_phy_tssi_calibrate(struct mt76x02_dev *dev)
 void mt76x0_phy_set_txpower(struct mt76x02_dev *dev)
 {
        struct mt76_rate_power *t = &dev->mt76.rate_power;
-       u8 info[2];
+       s8 info;
 
        mt76x0_get_tx_power_per_rate(dev);
-       mt76x0_get_power_info(dev, info);
+       mt76x0_get_power_info(dev, &info);
 
-       mt76x02_add_rate_power_offset(t, info[0]);
+       mt76x02_add_rate_power_offset(t, info);
        mt76x02_limit_rate_power(t, dev->mt76.txpower_conf);
        dev->mt76.txpower_cur = mt76x02_get_max_rate_power(t);
-       mt76x02_add_rate_power_offset(t, -info[0]);
+       mt76x02_add_rate_power_offset(t, -info);
 
-       mt76x02_phy_set_txpower(dev, info[0], info[1]);
+       mt76x02_phy_set_txpower(dev, info, info);
 }
 
 void mt76x0_phy_calibrate(struct mt76x02_dev *dev, bool power_on)
index bd10165..4d4b077 100644 (file)
@@ -164,6 +164,12 @@ static int wl12xx_sdio_power_on(struct wl12xx_sdio_glue *glue)
        }
 
        sdio_claim_host(func);
+       /*
+        * To guarantee that the SDIO card is power cycled, as required to make
+        * the FW programming to succeed, let's do a brute force HW reset.
+        */
+       mmc_hw_reset(card->host);
+
        sdio_enable_func(func);
        sdio_release_host(func);
 
@@ -174,20 +180,13 @@ static int wl12xx_sdio_power_off(struct wl12xx_sdio_glue *glue)
 {
        struct sdio_func *func = dev_to_sdio_func(glue->dev);
        struct mmc_card *card = func->card;
-       int error;
 
        sdio_claim_host(func);
        sdio_disable_func(func);
        sdio_release_host(func);
 
        /* Let runtime PM know the card is powered off */
-       error = pm_runtime_put(&card->dev);
-       if (error < 0 && error != -EBUSY) {
-               dev_err(&card->dev, "%s failed: %i\n", __func__, error);
-
-               return error;
-       }
-
+       pm_runtime_put(&card->dev);
        return 0;
 }
 
index 2625740..330ddb6 100644 (file)
@@ -655,7 +655,7 @@ static void frontend_changed(struct xenbus_device *dev,
                set_backend_state(be, XenbusStateClosed);
                if (xenbus_dev_is_online(dev))
                        break;
-               /* fall through if not online */
+               /* fall through if not online */
        case XenbusStateUnknown:
                set_backend_state(be, XenbusStateClosed);
                device_unregister(&dev->dev);
index 150e497..6a9dd68 100644 (file)
@@ -1253,6 +1253,7 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
         * effects say only one namespace is affected.
         */
        if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) {
+               mutex_lock(&ctrl->scan_lock);
                nvme_start_freeze(ctrl);
                nvme_wait_freeze(ctrl);
        }
@@ -1281,8 +1282,10 @@ static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
         */
        if (effects & NVME_CMD_EFFECTS_LBCC)
                nvme_update_formats(ctrl);
-       if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK))
+       if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) {
                nvme_unfreeze(ctrl);
+               mutex_unlock(&ctrl->scan_lock);
+       }
        if (effects & NVME_CMD_EFFECTS_CCC)
                nvme_init_identify(ctrl);
        if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC))
@@ -3401,6 +3404,7 @@ static void nvme_scan_work(struct work_struct *work)
        if (nvme_identify_ctrl(ctrl, &id))
                return;
 
+       mutex_lock(&ctrl->scan_lock);
        nn = le32_to_cpu(id->nn);
        if (ctrl->vs >= NVME_VS(1, 1, 0) &&
            !(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) {
@@ -3409,6 +3413,7 @@ static void nvme_scan_work(struct work_struct *work)
        }
        nvme_scan_ns_sequential(ctrl, nn);
 out_free_id:
+       mutex_unlock(&ctrl->scan_lock);
        kfree(id);
        down_write(&ctrl->namespaces_rwsem);
        list_sort(NULL, &ctrl->namespaces, ns_cmp);
@@ -3652,6 +3657,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
 
        ctrl->state = NVME_CTRL_NEW;
        spin_lock_init(&ctrl->lock);
+       mutex_init(&ctrl->scan_lock);
        INIT_LIST_HEAD(&ctrl->namespaces);
        init_rwsem(&ctrl->namespaces_rwsem);
        ctrl->dev = dev;
index ab961bd..c4a1bb4 100644 (file)
@@ -154,6 +154,7 @@ struct nvme_ctrl {
        enum nvme_ctrl_state state;
        bool identified;
        spinlock_t lock;
+       struct mutex scan_lock;
        const struct nvme_ctrl_ops *ops;
        struct request_queue *admin_q;
        struct request_queue *connect_q;
index 9bc5854..7fee665 100644 (file)
@@ -2557,27 +2557,18 @@ static void nvme_reset_work(struct work_struct *work)
        if (dev->ctrl.ctrl_config & NVME_CC_ENABLE)
                nvme_dev_disable(dev, false);
 
-       /*
-        * Introduce CONNECTING state from nvme-fc/rdma transports to mark the
-        * initializing procedure here.
-        */
-       if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_CONNECTING)) {
-               dev_warn(dev->ctrl.device,
-                       "failed to mark controller CONNECTING\n");
-               goto out;
-       }
-
+       mutex_lock(&dev->shutdown_lock);
        result = nvme_pci_enable(dev);
        if (result)
-               goto out;
+               goto out_unlock;
 
        result = nvme_pci_configure_admin_queue(dev);
        if (result)
-               goto out;
+               goto out_unlock;
 
        result = nvme_alloc_admin_tags(dev);
        if (result)
-               goto out;
+               goto out_unlock;
 
        /*
         * Limit the max command size to prevent iod->sg allocations going
@@ -2585,6 +2576,17 @@ static void nvme_reset_work(struct work_struct *work)
         */
        dev->ctrl.max_hw_sectors = NVME_MAX_KB_SZ << 1;
        dev->ctrl.max_segments = NVME_MAX_SEGS;
+       mutex_unlock(&dev->shutdown_lock);
+
+       /*
+        * Introduce CONNECTING state from nvme-fc/rdma transports to mark the
+        * initializing procedure here.
+        */
+       if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_CONNECTING)) {
+               dev_warn(dev->ctrl.device,
+                       "failed to mark controller CONNECTING\n");
+               goto out;
+       }
 
        result = nvme_init_identify(&dev->ctrl);
        if (result)
@@ -2649,6 +2651,8 @@ static void nvme_reset_work(struct work_struct *work)
        nvme_start_ctrl(&dev->ctrl);
        return;
 
+ out_unlock:
+       mutex_unlock(&dev->shutdown_lock);
  out:
        nvme_remove_dead_ctrl(dev, result);
 }
index 52e47da..80f8430 100644 (file)
@@ -310,6 +310,9 @@ static int imx6_pcie_attach_pd(struct device *dev)
        imx6_pcie->pd_pcie = dev_pm_domain_attach_by_name(dev, "pcie");
        if (IS_ERR(imx6_pcie->pd_pcie))
                return PTR_ERR(imx6_pcie->pd_pcie);
+       /* Do nothing when power domain missing */
+       if (!imx6_pcie->pd_pcie)
+               return 0;
        link = device_link_add(dev, imx6_pcie->pd_pcie,
                        DL_FLAG_STATELESS |
                        DL_FLAG_PM_RUNTIME |
@@ -323,13 +326,13 @@ static int imx6_pcie_attach_pd(struct device *dev)
        if (IS_ERR(imx6_pcie->pd_pcie_phy))
                return PTR_ERR(imx6_pcie->pd_pcie_phy);
 
-       device_link_add(dev, imx6_pcie->pd_pcie_phy,
+       link = device_link_add(dev, imx6_pcie->pd_pcie_phy,
                        DL_FLAG_STATELESS |
                        DL_FLAG_PM_RUNTIME |
                        DL_FLAG_RPM_ACTIVE);
-       if (IS_ERR(link)) {
-               dev_err(dev, "Failed to add device_link to pcie_phy pd: %ld\n", PTR_ERR(link));
-               return PTR_ERR(link);
+       if (!link) {
+               dev_err(dev, "Failed to add device_link to pcie_phy pd.\n");
+               return -EINVAL;
        }
 
        return 0;
index b171b6b..0c389a3 100644 (file)
@@ -22,7 +22,6 @@
 #include <linux/resource.h>
 #include <linux/of_pci.h>
 #include <linux/of_irq.h>
-#include <linux/gpio/consumer.h>
 
 #include "pcie-designware.h"
 
@@ -30,7 +29,6 @@ struct armada8k_pcie {
        struct dw_pcie *pci;
        struct clk *clk;
        struct clk *clk_reg;
-       struct gpio_desc *reset_gpio;
 };
 
 #define PCIE_VENDOR_REGS_OFFSET                0x8000
@@ -139,12 +137,6 @@ static int armada8k_pcie_host_init(struct pcie_port *pp)
        struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
        struct armada8k_pcie *pcie = to_armada8k_pcie(pci);
 
-       if (pcie->reset_gpio) {
-               /* assert and then deassert the reset signal */
-               gpiod_set_value_cansleep(pcie->reset_gpio, 1);
-               msleep(100);
-               gpiod_set_value_cansleep(pcie->reset_gpio, 0);
-       }
        dw_pcie_setup_rc(pp);
        armada8k_pcie_establish_link(pcie);
 
@@ -257,14 +249,6 @@ static int armada8k_pcie_probe(struct platform_device *pdev)
                goto fail_clkreg;
        }
 
-       /* Get reset gpio signal and hold asserted (logically high) */
-       pcie->reset_gpio = devm_gpiod_get_optional(dev, "reset",
-                                                  GPIOD_OUT_HIGH);
-       if (IS_ERR(pcie->reset_gpio)) {
-               ret = PTR_ERR(pcie->reset_gpio);
-               goto fail_clkreg;
-       }
-
        platform_set_drvdata(pdev, pcie);
 
        ret = armada8k_add_pcie_port(pcie, pdev);
index b0a413f..e2a879e 100644 (file)
@@ -639,8 +639,9 @@ static void quirk_synopsys_haps(struct pci_dev *pdev)
                break;
        }
 }
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SYNOPSYS, PCI_ANY_ID,
-                        quirk_synopsys_haps);
+DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_SYNOPSYS, PCI_ANY_ID,
+                              PCI_CLASS_SERIAL_USB_XHCI, 0,
+                              quirk_synopsys_haps);
 
 /*
  * Let's make the southbridge information explicit instead of having to
index 05044e3..03ec7a5 100644 (file)
@@ -1513,7 +1513,7 @@ static const struct dmi_system_id chv_no_valid_mask[] = {
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"),
                        DMI_MATCH(DMI_PRODUCT_FAMILY, "Intel_Strago"),
-                       DMI_MATCH(DMI_BOARD_VERSION, "1.0"),
+                       DMI_MATCH(DMI_PRODUCT_VERSION, "1.0"),
                },
        },
        {
@@ -1521,7 +1521,7 @@ static const struct dmi_system_id chv_no_valid_mask[] = {
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "HP"),
                        DMI_MATCH(DMI_PRODUCT_NAME, "Setzer"),
-                       DMI_MATCH(DMI_BOARD_VERSION, "1.0"),
+                       DMI_MATCH(DMI_PRODUCT_VERSION, "1.0"),
                },
        },
        {
@@ -1529,7 +1529,7 @@ static const struct dmi_system_id chv_no_valid_mask[] = {
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"),
                        DMI_MATCH(DMI_PRODUCT_NAME, "Cyan"),
-                       DMI_MATCH(DMI_BOARD_VERSION, "1.0"),
+                       DMI_MATCH(DMI_PRODUCT_VERSION, "1.0"),
                },
        },
        {
@@ -1537,7 +1537,7 @@ static const struct dmi_system_id chv_no_valid_mask[] = {
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"),
                        DMI_MATCH(DMI_PRODUCT_NAME, "Celes"),
-                       DMI_MATCH(DMI_BOARD_VERSION, "1.0"),
+                       DMI_MATCH(DMI_PRODUCT_VERSION, "1.0"),
                },
        },
        {}
index 1817786..a005cbc 100644 (file)
@@ -45,12 +45,14 @@ config PINCTRL_MT2701
 config PINCTRL_MT7623
        bool "Mediatek MT7623 pin control with generic binding"
        depends on MACH_MT7623 || COMPILE_TEST
+       depends on OF
        default MACH_MT7623
        select PINCTRL_MTK_MOORE
 
 config PINCTRL_MT7629
        bool "Mediatek MT7629 pin control"
        depends on MACH_MT7629 || COMPILE_TEST
+       depends on OF
        default MACH_MT7629
        select PINCTRL_MTK_MOORE
 
@@ -92,6 +94,7 @@ config PINCTRL_MT6797
 
 config PINCTRL_MT7622
        bool "MediaTek MT7622 pin control"
+       depends on OF
        depends on ARM64 || COMPILE_TEST
        default ARM64 && ARCH_MEDIATEK
        select PINCTRL_MTK_MOORE
index b03481e..98905d4 100644 (file)
@@ -832,8 +832,13 @@ static int mcp23s08_probe_one(struct mcp23s08 *mcp, struct device *dev,
                break;
 
        case MCP_TYPE_S18:
+               one_regmap_config =
+                       devm_kmemdup(dev, &mcp23x17_regmap,
+                               sizeof(struct regmap_config), GFP_KERNEL);
+               if (!one_regmap_config)
+                       return -ENOMEM;
                mcp->regmap = devm_regmap_init(dev, &mcp23sxx_spi_regmap, mcp,
-                                              &mcp23x17_regmap);
+                                              one_regmap_config);
                mcp->reg_shift = 1;
                mcp->chip.ngpio = 16;
                mcp->chip.label = "mcp23s18";
index aa8b581..ef4268c 100644 (file)
@@ -588,7 +588,7 @@ static const unsigned int h6_irq_bank_map[] = { 1, 5, 6, 7 };
 static const struct sunxi_pinctrl_desc h6_pinctrl_data = {
        .pins = h6_pins,
        .npins = ARRAY_SIZE(h6_pins),
-       .irq_banks = 3,
+       .irq_banks = 4,
        .irq_bank_map = h6_irq_bank_map,
        .irq_read_needs_mux = true,
 };
index 5d9184d..0e7fa69 100644 (file)
@@ -698,26 +698,24 @@ static int sunxi_pmx_request(struct pinctrl_dev *pctldev, unsigned offset)
 {
        struct sunxi_pinctrl *pctl = pinctrl_dev_get_drvdata(pctldev);
        unsigned short bank = offset / PINS_PER_BANK;
-       struct sunxi_pinctrl_regulator *s_reg = &pctl->regulators[bank];
-       struct regulator *reg;
+       unsigned short bank_offset = bank - pctl->desc->pin_base /
+                                           PINS_PER_BANK;
+       struct sunxi_pinctrl_regulator *s_reg = &pctl->regulators[bank_offset];
+       struct regulator *reg = s_reg->regulator;
+       char supply[16];
        int ret;
 
-       reg = s_reg->regulator;
-       if (!reg) {
-               char supply[16];
-
-               snprintf(supply, sizeof(supply), "vcc-p%c", 'a' + bank);
-               reg = regulator_get(pctl->dev, supply);
-               if (IS_ERR(reg)) {
-                       dev_err(pctl->dev, "Couldn't get bank P%c regulator\n",
-                               'A' + bank);
-                       return PTR_ERR(reg);
-               }
-
-               s_reg->regulator = reg;
-               refcount_set(&s_reg->refcount, 1);
-       } else {
+       if (reg) {
                refcount_inc(&s_reg->refcount);
+               return 0;
+       }
+
+       snprintf(supply, sizeof(supply), "vcc-p%c", 'a' + bank);
+       reg = regulator_get(pctl->dev, supply);
+       if (IS_ERR(reg)) {
+               dev_err(pctl->dev, "Couldn't get bank P%c regulator\n",
+                       'A' + bank);
+               return PTR_ERR(reg);
        }
 
        ret = regulator_enable(reg);
@@ -727,13 +725,13 @@ static int sunxi_pmx_request(struct pinctrl_dev *pctldev, unsigned offset)
                goto out;
        }
 
+       s_reg->regulator = reg;
+       refcount_set(&s_reg->refcount, 1);
+
        return 0;
 
 out:
-       if (refcount_dec_and_test(&s_reg->refcount)) {
-               regulator_put(s_reg->regulator);
-               s_reg->regulator = NULL;
-       }
+       regulator_put(s_reg->regulator);
 
        return ret;
 }
@@ -742,7 +740,9 @@ static int sunxi_pmx_free(struct pinctrl_dev *pctldev, unsigned offset)
 {
        struct sunxi_pinctrl *pctl = pinctrl_dev_get_drvdata(pctldev);
        unsigned short bank = offset / PINS_PER_BANK;
-       struct sunxi_pinctrl_regulator *s_reg = &pctl->regulators[bank];
+       unsigned short bank_offset = bank - pctl->desc->pin_base /
+                                           PINS_PER_BANK;
+       struct sunxi_pinctrl_regulator *s_reg = &pctl->regulators[bank_offset];
 
        if (!refcount_dec_and_test(&s_reg->refcount))
                return 0;
index e340d2a..034c031 100644 (file)
@@ -136,7 +136,7 @@ struct sunxi_pinctrl {
        struct gpio_chip                *chip;
        const struct sunxi_pinctrl_desc *desc;
        struct device                   *dev;
-       struct sunxi_pinctrl_regulator  regulators[12];
+       struct sunxi_pinctrl_regulator  regulators[9];
        struct irq_domain               *domain;
        struct sunxi_pinctrl_function   *functions;
        unsigned                        nfunctions;
index 5e2109c..b5e9db8 100644 (file)
@@ -905,6 +905,7 @@ config TOSHIBA_WMI
 config ACPI_CMPC
        tristate "CMPC Laptop Extras"
        depends on ACPI && INPUT
+       depends on BACKLIGHT_LCD_SUPPORT
        depends on RFKILL || RFKILL=n
        select BACKLIGHT_CLASS_DEVICE
        help
@@ -1128,6 +1129,7 @@ config INTEL_OAKTRAIL
 config SAMSUNG_Q10
        tristate "Samsung Q10 Extras"
        depends on ACPI
+       depends on BACKLIGHT_LCD_SUPPORT
        select BACKLIGHT_CLASS_DEVICE
        ---help---
          This driver provides support for backlight control on Samsung Q10
index aeb4a8b..7fe1863 100644 (file)
@@ -43,7 +43,7 @@ config PTP_1588_CLOCK_DTE
 
 config PTP_1588_CLOCK_QORIQ
        tristate "Freescale QorIQ 1588 timer as PTP clock"
-       depends on GIANFAR || FSL_DPAA_ETH
+       depends on GIANFAR || FSL_DPAA_ETH || FSL_ENETC || FSL_ENETC_VF
        depends on PTP_1588_CLOCK
        default y
        help
index 43416b2..42d3654 100644 (file)
@@ -22,7 +22,6 @@
 
 #include <linux/device.h>
 #include <linux/hrtimer.h>
-#include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
  * Register access functions
  */
 
-/* Caller must hold qoriq_ptp->lock. */
-static u64 tmr_cnt_read(struct qoriq_ptp *qoriq_ptp)
+/* Caller must hold ptp_qoriq->lock. */
+static u64 tmr_cnt_read(struct ptp_qoriq *ptp_qoriq)
 {
-       struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
+       struct ptp_qoriq_registers *regs = &ptp_qoriq->regs;
        u64 ns;
        u32 lo, hi;
 
-       lo = qoriq_read(&regs->ctrl_regs->tmr_cnt_l);
-       hi = qoriq_read(&regs->ctrl_regs->tmr_cnt_h);
+       lo = ptp_qoriq->read(&regs->ctrl_regs->tmr_cnt_l);
+       hi = ptp_qoriq->read(&regs->ctrl_regs->tmr_cnt_h);
        ns = ((u64) hi) << 32;
        ns |= lo;
        return ns;
 }
 
-/* Caller must hold qoriq_ptp->lock. */
-static void tmr_cnt_write(struct qoriq_ptp *qoriq_ptp, u64 ns)
+/* Caller must hold ptp_qoriq->lock. */
+static void tmr_cnt_write(struct ptp_qoriq *ptp_qoriq, u64 ns)
 {
-       struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
+       struct ptp_qoriq_registers *regs = &ptp_qoriq->regs;
        u32 hi = ns >> 32;
        u32 lo = ns & 0xffffffff;
 
-       qoriq_write(&regs->ctrl_regs->tmr_cnt_l, lo);
-       qoriq_write(&regs->ctrl_regs->tmr_cnt_h, hi);
+       ptp_qoriq->write(&regs->ctrl_regs->tmr_cnt_l, lo);
+       ptp_qoriq->write(&regs->ctrl_regs->tmr_cnt_h, hi);
 }
 
-/* Caller must hold qoriq_ptp->lock. */
-static void set_alarm(struct qoriq_ptp *qoriq_ptp)
+/* Caller must hold ptp_qoriq->lock. */
+static void set_alarm(struct ptp_qoriq *ptp_qoriq)
 {
-       struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
+       struct ptp_qoriq_registers *regs = &ptp_qoriq->regs;
        u64 ns;
        u32 lo, hi;
 
-       ns = tmr_cnt_read(qoriq_ptp) + 1500000000ULL;
+       ns = tmr_cnt_read(ptp_qoriq) + 1500000000ULL;
        ns = div_u64(ns, 1000000000UL) * 1000000000ULL;
-       ns -= qoriq_ptp->tclk_period;
+       ns -= ptp_qoriq->tclk_period;
        hi = ns >> 32;
        lo = ns & 0xffffffff;
-       qoriq_write(&regs->alarm_regs->tmr_alarm1_l, lo);
-       qoriq_write(&regs->alarm_regs->tmr_alarm1_h, hi);
+       ptp_qoriq->write(&regs->alarm_regs->tmr_alarm1_l, lo);
+       ptp_qoriq->write(&regs->alarm_regs->tmr_alarm1_h, hi);
 }
 
-/* Caller must hold qoriq_ptp->lock. */
-static void set_fipers(struct qoriq_ptp *qoriq_ptp)
+/* Caller must hold ptp_qoriq->lock. */
+static void set_fipers(struct ptp_qoriq *ptp_qoriq)
 {
-       struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
+       struct ptp_qoriq_registers *regs = &ptp_qoriq->regs;
 
-       set_alarm(qoriq_ptp);
-       qoriq_write(&regs->fiper_regs->tmr_fiper1, qoriq_ptp->tmr_fiper1);
-       qoriq_write(&regs->fiper_regs->tmr_fiper2, qoriq_ptp->tmr_fiper2);
+       set_alarm(ptp_qoriq);
+       ptp_qoriq->write(&regs->fiper_regs->tmr_fiper1, ptp_qoriq->tmr_fiper1);
+       ptp_qoriq->write(&regs->fiper_regs->tmr_fiper2, ptp_qoriq->tmr_fiper2);
 }
 
-static int extts_clean_up(struct qoriq_ptp *qoriq_ptp, int index,
+static int extts_clean_up(struct ptp_qoriq *ptp_qoriq, int index,
                          bool update_event)
 {
-       struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
+       struct ptp_qoriq_registers *regs = &ptp_qoriq->regs;
        struct ptp_clock_event event;
        void __iomem *reg_etts_l;
        void __iomem *reg_etts_h;
@@ -116,17 +115,17 @@ static int extts_clean_up(struct qoriq_ptp *qoriq_ptp, int index,
        event.index = index;
 
        do {
-               lo = qoriq_read(reg_etts_l);
-               hi = qoriq_read(reg_etts_h);
+               lo = ptp_qoriq->read(reg_etts_l);
+               hi = ptp_qoriq->read(reg_etts_h);
 
                if (update_event) {
                        event.timestamp = ((u64) hi) << 32;
                        event.timestamp |= lo;
-                       ptp_clock_event(qoriq_ptp->clock, &event);
+                       ptp_clock_event(ptp_qoriq->clock, &event);
                }
 
-               stat = qoriq_read(&regs->ctrl_regs->tmr_stat);
-       } while (qoriq_ptp->extts_fifo_support && (stat & valid));
+               stat = ptp_qoriq->read(&regs->ctrl_regs->tmr_stat);
+       } while (ptp_qoriq->extts_fifo_support && (stat & valid));
 
        return 0;
 }
@@ -135,89 +134,90 @@ static int extts_clean_up(struct qoriq_ptp *qoriq_ptp, int index,
  * Interrupt service routine
  */
 
-static irqreturn_t isr(int irq, void *priv)
+irqreturn_t ptp_qoriq_isr(int irq, void *priv)
 {
-       struct qoriq_ptp *qoriq_ptp = priv;
-       struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
+       struct ptp_qoriq *ptp_qoriq = priv;
+       struct ptp_qoriq_registers *regs = &ptp_qoriq->regs;
        struct ptp_clock_event event;
        u64 ns;
        u32 ack = 0, lo, hi, mask, val, irqs;
 
-       spin_lock(&qoriq_ptp->lock);
+       spin_lock(&ptp_qoriq->lock);
 
-       val = qoriq_read(&regs->ctrl_regs->tmr_tevent);
-       mask = qoriq_read(&regs->ctrl_regs->tmr_temask);
+       val = ptp_qoriq->read(&regs->ctrl_regs->tmr_tevent);
+       mask = ptp_qoriq->read(&regs->ctrl_regs->tmr_temask);
 
-       spin_unlock(&qoriq_ptp->lock);
+       spin_unlock(&ptp_qoriq->lock);
 
        irqs = val & mask;
 
        if (irqs & ETS1) {
                ack |= ETS1;
-               extts_clean_up(qoriq_ptp, 0, true);
+               extts_clean_up(ptp_qoriq, 0, true);
        }
 
        if (irqs & ETS2) {
                ack |= ETS2;
-               extts_clean_up(qoriq_ptp, 1, true);
+               extts_clean_up(ptp_qoriq, 1, true);
        }
 
        if (irqs & ALM2) {
                ack |= ALM2;
-               if (qoriq_ptp->alarm_value) {
+               if (ptp_qoriq->alarm_value) {
                        event.type = PTP_CLOCK_ALARM;
                        event.index = 0;
-                       event.timestamp = qoriq_ptp->alarm_value;
-                       ptp_clock_event(qoriq_ptp->clock, &event);
+                       event.timestamp = ptp_qoriq->alarm_value;
+                       ptp_clock_event(ptp_qoriq->clock, &event);
                }
-               if (qoriq_ptp->alarm_interval) {
-                       ns = qoriq_ptp->alarm_value + qoriq_ptp->alarm_interval;
+               if (ptp_qoriq->alarm_interval) {
+                       ns = ptp_qoriq->alarm_value + ptp_qoriq->alarm_interval;
                        hi = ns >> 32;
                        lo = ns & 0xffffffff;
-                       qoriq_write(&regs->alarm_regs->tmr_alarm2_l, lo);
-                       qoriq_write(&regs->alarm_regs->tmr_alarm2_h, hi);
-                       qoriq_ptp->alarm_value = ns;
+                       ptp_qoriq->write(&regs->alarm_regs->tmr_alarm2_l, lo);
+                       ptp_qoriq->write(&regs->alarm_regs->tmr_alarm2_h, hi);
+                       ptp_qoriq->alarm_value = ns;
                } else {
-                       spin_lock(&qoriq_ptp->lock);
-                       mask = qoriq_read(&regs->ctrl_regs->tmr_temask);
+                       spin_lock(&ptp_qoriq->lock);
+                       mask = ptp_qoriq->read(&regs->ctrl_regs->tmr_temask);
                        mask &= ~ALM2EN;
-                       qoriq_write(&regs->ctrl_regs->tmr_temask, mask);
-                       spin_unlock(&qoriq_ptp->lock);
-                       qoriq_ptp->alarm_value = 0;
-                       qoriq_ptp->alarm_interval = 0;
+                       ptp_qoriq->write(&regs->ctrl_regs->tmr_temask, mask);
+                       spin_unlock(&ptp_qoriq->lock);
+                       ptp_qoriq->alarm_value = 0;
+                       ptp_qoriq->alarm_interval = 0;
                }
        }
 
        if (irqs & PP1) {
                ack |= PP1;
                event.type = PTP_CLOCK_PPS;
-               ptp_clock_event(qoriq_ptp->clock, &event);
+               ptp_clock_event(ptp_qoriq->clock, &event);
        }
 
        if (ack) {
-               qoriq_write(&regs->ctrl_regs->tmr_tevent, ack);
+               ptp_qoriq->write(&regs->ctrl_regs->tmr_tevent, ack);
                return IRQ_HANDLED;
        } else
                return IRQ_NONE;
 }
+EXPORT_SYMBOL_GPL(ptp_qoriq_isr);
 
 /*
  * PTP clock operations
  */
 
-static int ptp_qoriq_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
+int ptp_qoriq_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
 {
        u64 adj, diff;
        u32 tmr_add;
        int neg_adj = 0;
-       struct qoriq_ptp *qoriq_ptp = container_of(ptp, struct qoriq_ptp, caps);
-       struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
+       struct ptp_qoriq *ptp_qoriq = container_of(ptp, struct ptp_qoriq, caps);
+       struct ptp_qoriq_registers *regs = &ptp_qoriq->regs;
 
        if (scaled_ppm < 0) {
                neg_adj = 1;
                scaled_ppm = -scaled_ppm;
        }
-       tmr_add = qoriq_ptp->tmr_add;
+       tmr_add = ptp_qoriq->tmr_add;
        adj = tmr_add;
 
        /* calculate diff as adj*(scaled_ppm/65536)/1000000
@@ -229,71 +229,74 @@ static int ptp_qoriq_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
 
        tmr_add = neg_adj ? tmr_add - diff : tmr_add + diff;
 
-       qoriq_write(&regs->ctrl_regs->tmr_add, tmr_add);
+       ptp_qoriq->write(&regs->ctrl_regs->tmr_add, tmr_add);
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(ptp_qoriq_adjfine);
 
-static int ptp_qoriq_adjtime(struct ptp_clock_info *ptp, s64 delta)
+int ptp_qoriq_adjtime(struct ptp_clock_info *ptp, s64 delta)
 {
        s64 now;
        unsigned long flags;
-       struct qoriq_ptp *qoriq_ptp = container_of(ptp, struct qoriq_ptp, caps);
+       struct ptp_qoriq *ptp_qoriq = container_of(ptp, struct ptp_qoriq, caps);
 
-       spin_lock_irqsave(&qoriq_ptp->lock, flags);
+       spin_lock_irqsave(&ptp_qoriq->lock, flags);
 
-       now = tmr_cnt_read(qoriq_ptp);
+       now = tmr_cnt_read(ptp_qoriq);
        now += delta;
-       tmr_cnt_write(qoriq_ptp, now);
-       set_fipers(qoriq_ptp);
+       tmr_cnt_write(ptp_qoriq, now);
+       set_fipers(ptp_qoriq);
 
-       spin_unlock_irqrestore(&qoriq_ptp->lock, flags);
+       spin_unlock_irqrestore(&ptp_qoriq->lock, flags);
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(ptp_qoriq_adjtime);
 
-static int ptp_qoriq_gettime(struct ptp_clock_info *ptp,
-                              struct timespec64 *ts)
+int ptp_qoriq_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
 {
        u64 ns;
        unsigned long flags;
-       struct qoriq_ptp *qoriq_ptp = container_of(ptp, struct qoriq_ptp, caps);
+       struct ptp_qoriq *ptp_qoriq = container_of(ptp, struct ptp_qoriq, caps);
 
-       spin_lock_irqsave(&qoriq_ptp->lock, flags);
+       spin_lock_irqsave(&ptp_qoriq->lock, flags);
 
-       ns = tmr_cnt_read(qoriq_ptp);
+       ns = tmr_cnt_read(ptp_qoriq);
 
-       spin_unlock_irqrestore(&qoriq_ptp->lock, flags);
+       spin_unlock_irqrestore(&ptp_qoriq->lock, flags);
 
        *ts = ns_to_timespec64(ns);
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(ptp_qoriq_gettime);
 
-static int ptp_qoriq_settime(struct ptp_clock_info *ptp,
-                              const struct timespec64 *ts)
+int ptp_qoriq_settime(struct ptp_clock_info *ptp,
+                     const struct timespec64 *ts)
 {
        u64 ns;
        unsigned long flags;
-       struct qoriq_ptp *qoriq_ptp = container_of(ptp, struct qoriq_ptp, caps);
+       struct ptp_qoriq *ptp_qoriq = container_of(ptp, struct ptp_qoriq, caps);
 
        ns = timespec64_to_ns(ts);
 
-       spin_lock_irqsave(&qoriq_ptp->lock, flags);
+       spin_lock_irqsave(&ptp_qoriq->lock, flags);
 
-       tmr_cnt_write(qoriq_ptp, ns);
-       set_fipers(qoriq_ptp);
+       tmr_cnt_write(ptp_qoriq, ns);
+       set_fipers(ptp_qoriq);
 
-       spin_unlock_irqrestore(&qoriq_ptp->lock, flags);
+       spin_unlock_irqrestore(&ptp_qoriq->lock, flags);
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(ptp_qoriq_settime);
 
-static int ptp_qoriq_enable(struct ptp_clock_info *ptp,
-                             struct ptp_clock_request *rq, int on)
+int ptp_qoriq_enable(struct ptp_clock_info *ptp,
+                    struct ptp_clock_request *rq, int on)
 {
-       struct qoriq_ptp *qoriq_ptp = container_of(ptp, struct qoriq_ptp, caps);
-       struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
+       struct ptp_qoriq *ptp_qoriq = container_of(ptp, struct ptp_qoriq, caps);
+       struct ptp_qoriq_registers *regs = &ptp_qoriq->regs;
        unsigned long flags;
        u32 bit, mask = 0;
 
@@ -311,7 +314,7 @@ static int ptp_qoriq_enable(struct ptp_clock_info *ptp,
                }
 
                if (on)
-                       extts_clean_up(qoriq_ptp, rq->extts.index, false);
+                       extts_clean_up(ptp_qoriq, rq->extts.index, false);
 
                break;
        case PTP_CLK_REQ_PPS:
@@ -321,21 +324,22 @@ static int ptp_qoriq_enable(struct ptp_clock_info *ptp,
                return -EOPNOTSUPP;
        }
 
-       spin_lock_irqsave(&qoriq_ptp->lock, flags);
+       spin_lock_irqsave(&ptp_qoriq->lock, flags);
 
-       mask = qoriq_read(&regs->ctrl_regs->tmr_temask);
+       mask = ptp_qoriq->read(&regs->ctrl_regs->tmr_temask);
        if (on) {
                mask |= bit;
-               qoriq_write(&regs->ctrl_regs->tmr_tevent, bit);
+               ptp_qoriq->write(&regs->ctrl_regs->tmr_tevent, bit);
        } else {
                mask &= ~bit;
        }
 
-       qoriq_write(&regs->ctrl_regs->tmr_temask, mask);
+       ptp_qoriq->write(&regs->ctrl_regs->tmr_temask, mask);
 
-       spin_unlock_irqrestore(&qoriq_ptp->lock, flags);
+       spin_unlock_irqrestore(&ptp_qoriq->lock, flags);
        return 0;
 }
+EXPORT_SYMBOL_GPL(ptp_qoriq_enable);
 
 static const struct ptp_clock_info ptp_qoriq_caps = {
        .owner          = THIS_MODULE,
@@ -354,7 +358,7 @@ static const struct ptp_clock_info ptp_qoriq_caps = {
 };
 
 /**
- * qoriq_ptp_nominal_freq - calculate nominal frequency according to
+ * ptp_qoriq_nominal_freq - calculate nominal frequency according to
  *                         reference clock frequency
  *
  * @clk_src: reference clock frequency
@@ -365,7 +369,7 @@ static const struct ptp_clock_info ptp_qoriq_caps = {
  *
  * Return the nominal frequency
  */
-static u32 qoriq_ptp_nominal_freq(u32 clk_src)
+static u32 ptp_qoriq_nominal_freq(u32 clk_src)
 {
        u32 remainder = 0;
 
@@ -385,9 +389,9 @@ static u32 qoriq_ptp_nominal_freq(u32 clk_src)
 }
 
 /**
- * qoriq_ptp_auto_config - calculate a set of default configurations
+ * ptp_qoriq_auto_config - calculate a set of default configurations
  *
- * @qoriq_ptp: pointer to qoriq_ptp
+ * @ptp_qoriq: pointer to ptp_qoriq
  * @node: pointer to device_node
  *
  * If below dts properties are not provided, this function will be
@@ -401,7 +405,7 @@ static u32 qoriq_ptp_nominal_freq(u32 clk_src)
  *
  * Return 0 if success
  */
-static int qoriq_ptp_auto_config(struct qoriq_ptp *qoriq_ptp,
+static int ptp_qoriq_auto_config(struct ptp_qoriq *ptp_qoriq,
                                 struct device_node *node)
 {
        struct clk *clk;
@@ -411,7 +415,7 @@ static int qoriq_ptp_auto_config(struct qoriq_ptp *qoriq_ptp,
        u32 remainder = 0;
        u32 clk_src = 0;
 
-       qoriq_ptp->cksel = DEFAULT_CKSEL;
+       ptp_qoriq->cksel = DEFAULT_CKSEL;
 
        clk = of_clk_get(node, 0);
        if (!IS_ERR(clk)) {
@@ -424,12 +428,12 @@ static int qoriq_ptp_auto_config(struct qoriq_ptp *qoriq_ptp,
                return -EINVAL;
        }
 
-       nominal_freq = qoriq_ptp_nominal_freq(clk_src);
+       nominal_freq = ptp_qoriq_nominal_freq(clk_src);
        if (!nominal_freq)
                return -EINVAL;
 
-       qoriq_ptp->tclk_period = 1000000000UL / nominal_freq;
-       qoriq_ptp->tmr_prsc = DEFAULT_TMR_PRSC;
+       ptp_qoriq->tclk_period = 1000000000UL / nominal_freq;
+       ptp_qoriq->tmr_prsc = DEFAULT_TMR_PRSC;
 
        /* Calculate initial frequency compensation value for TMR_ADD register.
         * freq_comp = ceil(2^32 / freq_ratio)
@@ -440,172 +444,193 @@ static int qoriq_ptp_auto_config(struct qoriq_ptp *qoriq_ptp,
        if (remainder)
                freq_comp++;
 
-       qoriq_ptp->tmr_add = freq_comp;
-       qoriq_ptp->tmr_fiper1 = DEFAULT_FIPER1_PERIOD - qoriq_ptp->tclk_period;
-       qoriq_ptp->tmr_fiper2 = DEFAULT_FIPER2_PERIOD - qoriq_ptp->tclk_period;
+       ptp_qoriq->tmr_add = freq_comp;
+       ptp_qoriq->tmr_fiper1 = DEFAULT_FIPER1_PERIOD - ptp_qoriq->tclk_period;
+       ptp_qoriq->tmr_fiper2 = DEFAULT_FIPER2_PERIOD - ptp_qoriq->tclk_period;
 
        /* max_adj = 1000000000 * (freq_ratio - 1.0) - 1
         * freq_ratio = reference_clock_freq / nominal_freq
         */
        max_adj = 1000000000ULL * (clk_src - nominal_freq);
        max_adj = div_u64(max_adj, nominal_freq) - 1;
-       qoriq_ptp->caps.max_adj = max_adj;
+       ptp_qoriq->caps.max_adj = max_adj;
 
        return 0;
 }
 
-static int qoriq_ptp_probe(struct platform_device *dev)
+int ptp_qoriq_init(struct ptp_qoriq *ptp_qoriq, void __iomem *base,
+                  const struct ptp_clock_info caps)
 {
-       struct device_node *node = dev->dev.of_node;
-       struct qoriq_ptp *qoriq_ptp;
-       struct qoriq_ptp_registers *regs;
+       struct device_node *node = ptp_qoriq->dev->of_node;
+       struct ptp_qoriq_registers *regs;
        struct timespec64 now;
-       int err = -ENOMEM;
-       u32 tmr_ctrl;
        unsigned long flags;
-       void __iomem *base;
-
-       qoriq_ptp = kzalloc(sizeof(*qoriq_ptp), GFP_KERNEL);
-       if (!qoriq_ptp)
-               goto no_memory;
-
-       err = -EINVAL;
+       u32 tmr_ctrl;
 
-       qoriq_ptp->dev = &dev->dev;
-       qoriq_ptp->caps = ptp_qoriq_caps;
+       ptp_qoriq->base = base;
+       ptp_qoriq->caps = caps;
 
-       if (of_property_read_u32(node, "fsl,cksel", &qoriq_ptp->cksel))
-               qoriq_ptp->cksel = DEFAULT_CKSEL;
+       if (of_property_read_u32(node, "fsl,cksel", &ptp_qoriq->cksel))
+               ptp_qoriq->cksel = DEFAULT_CKSEL;
 
        if (of_property_read_bool(node, "fsl,extts-fifo"))
-               qoriq_ptp->extts_fifo_support = true;
+               ptp_qoriq->extts_fifo_support = true;
        else
-               qoriq_ptp->extts_fifo_support = false;
+               ptp_qoriq->extts_fifo_support = false;
 
        if (of_property_read_u32(node,
-                                "fsl,tclk-period", &qoriq_ptp->tclk_period) ||
+                                "fsl,tclk-period", &ptp_qoriq->tclk_period) ||
            of_property_read_u32(node,
-                                "fsl,tmr-prsc", &qoriq_ptp->tmr_prsc) ||
+                                "fsl,tmr-prsc", &ptp_qoriq->tmr_prsc) ||
            of_property_read_u32(node,
-                                "fsl,tmr-add", &qoriq_ptp->tmr_add) ||
+                                "fsl,tmr-add", &ptp_qoriq->tmr_add) ||
            of_property_read_u32(node,
-                                "fsl,tmr-fiper1", &qoriq_ptp->tmr_fiper1) ||
+                                "fsl,tmr-fiper1", &ptp_qoriq->tmr_fiper1) ||
            of_property_read_u32(node,
-                                "fsl,tmr-fiper2", &qoriq_ptp->tmr_fiper2) ||
+                                "fsl,tmr-fiper2", &ptp_qoriq->tmr_fiper2) ||
            of_property_read_u32(node,
-                                "fsl,max-adj", &qoriq_ptp->caps.max_adj)) {
+                                "fsl,max-adj", &ptp_qoriq->caps.max_adj)) {
                pr_warn("device tree node missing required elements, try automatic configuration\n");
 
-               if (qoriq_ptp_auto_config(qoriq_ptp, node))
-                       goto no_config;
+               if (ptp_qoriq_auto_config(ptp_qoriq, node))
+                       return -ENODEV;
        }
 
-       err = -ENODEV;
+       if (of_property_read_bool(node, "little-endian")) {
+               ptp_qoriq->read = qoriq_read_le;
+               ptp_qoriq->write = qoriq_write_le;
+       } else {
+               ptp_qoriq->read = qoriq_read_be;
+               ptp_qoriq->write = qoriq_write_be;
+       }
+
+       /* The eTSEC uses differnt memory map with DPAA/ENETC */
+       if (of_device_is_compatible(node, "fsl,etsec-ptp")) {
+               ptp_qoriq->regs.ctrl_regs = base + ETSEC_CTRL_REGS_OFFSET;
+               ptp_qoriq->regs.alarm_regs = base + ETSEC_ALARM_REGS_OFFSET;
+               ptp_qoriq->regs.fiper_regs = base + ETSEC_FIPER_REGS_OFFSET;
+               ptp_qoriq->regs.etts_regs = base + ETSEC_ETTS_REGS_OFFSET;
+       } else {
+               ptp_qoriq->regs.ctrl_regs = base + CTRL_REGS_OFFSET;
+               ptp_qoriq->regs.alarm_regs = base + ALARM_REGS_OFFSET;
+               ptp_qoriq->regs.fiper_regs = base + FIPER_REGS_OFFSET;
+               ptp_qoriq->regs.etts_regs = base + ETTS_REGS_OFFSET;
+       }
+
+       ktime_get_real_ts64(&now);
+       ptp_qoriq_settime(&ptp_qoriq->caps, &now);
+
+       tmr_ctrl =
+         (ptp_qoriq->tclk_period & TCLK_PERIOD_MASK) << TCLK_PERIOD_SHIFT |
+         (ptp_qoriq->cksel & CKSEL_MASK) << CKSEL_SHIFT;
+
+       spin_lock_init(&ptp_qoriq->lock);
+       spin_lock_irqsave(&ptp_qoriq->lock, flags);
+
+       regs = &ptp_qoriq->regs;
+       ptp_qoriq->write(&regs->ctrl_regs->tmr_ctrl, tmr_ctrl);
+       ptp_qoriq->write(&regs->ctrl_regs->tmr_add, ptp_qoriq->tmr_add);
+       ptp_qoriq->write(&regs->ctrl_regs->tmr_prsc, ptp_qoriq->tmr_prsc);
+       ptp_qoriq->write(&regs->fiper_regs->tmr_fiper1, ptp_qoriq->tmr_fiper1);
+       ptp_qoriq->write(&regs->fiper_regs->tmr_fiper2, ptp_qoriq->tmr_fiper2);
+       set_alarm(ptp_qoriq);
+       ptp_qoriq->write(&regs->ctrl_regs->tmr_ctrl,
+                        tmr_ctrl|FIPERST|RTPE|TE|FRD);
+
+       spin_unlock_irqrestore(&ptp_qoriq->lock, flags);
+
+       ptp_qoriq->clock = ptp_clock_register(&ptp_qoriq->caps, ptp_qoriq->dev);
+       if (IS_ERR(ptp_qoriq->clock))
+               return PTR_ERR(ptp_qoriq->clock);
+
+       ptp_qoriq->phc_index = ptp_clock_index(ptp_qoriq->clock);
+       ptp_qoriq_create_debugfs(ptp_qoriq);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(ptp_qoriq_init);
+
+void ptp_qoriq_free(struct ptp_qoriq *ptp_qoriq)
+{
+       struct ptp_qoriq_registers *regs = &ptp_qoriq->regs;
+
+       ptp_qoriq->write(&regs->ctrl_regs->tmr_temask, 0);
+       ptp_qoriq->write(&regs->ctrl_regs->tmr_ctrl,   0);
+
+       ptp_qoriq_remove_debugfs(ptp_qoriq);
+       ptp_clock_unregister(ptp_qoriq->clock);
+       iounmap(ptp_qoriq->base);
+       free_irq(ptp_qoriq->irq, ptp_qoriq);
+}
+EXPORT_SYMBOL_GPL(ptp_qoriq_free);
 
-       qoriq_ptp->irq = platform_get_irq(dev, 0);
+static int ptp_qoriq_probe(struct platform_device *dev)
+{
+       struct ptp_qoriq *ptp_qoriq;
+       int err = -ENOMEM;
+       void __iomem *base;
+
+       ptp_qoriq = kzalloc(sizeof(*ptp_qoriq), GFP_KERNEL);
+       if (!ptp_qoriq)
+               goto no_memory;
+
+       ptp_qoriq->dev = &dev->dev;
 
-       if (qoriq_ptp->irq < 0) {
+       err = -ENODEV;
+
+       ptp_qoriq->irq = platform_get_irq(dev, 0);
+       if (ptp_qoriq->irq < 0) {
                pr_err("irq not in device tree\n");
                goto no_node;
        }
-       if (request_irq(qoriq_ptp->irq, isr, IRQF_SHARED, DRIVER, qoriq_ptp)) {
+       if (request_irq(ptp_qoriq->irq, ptp_qoriq_isr, IRQF_SHARED,
+                       DRIVER, ptp_qoriq)) {
                pr_err("request_irq failed\n");
                goto no_node;
        }
 
-       qoriq_ptp->rsrc = platform_get_resource(dev, IORESOURCE_MEM, 0);
-       if (!qoriq_ptp->rsrc) {
+       ptp_qoriq->rsrc = platform_get_resource(dev, IORESOURCE_MEM, 0);
+       if (!ptp_qoriq->rsrc) {
                pr_err("no resource\n");
                goto no_resource;
        }
-       if (request_resource(&iomem_resource, qoriq_ptp->rsrc)) {
+       if (request_resource(&iomem_resource, ptp_qoriq->rsrc)) {
                pr_err("resource busy\n");
                goto no_resource;
        }
 
-       spin_lock_init(&qoriq_ptp->lock);
-
-       base = ioremap(qoriq_ptp->rsrc->start,
-                      resource_size(qoriq_ptp->rsrc));
+       base = ioremap(ptp_qoriq->rsrc->start,
+                      resource_size(ptp_qoriq->rsrc));
        if (!base) {
                pr_err("ioremap ptp registers failed\n");
                goto no_ioremap;
        }
 
-       qoriq_ptp->base = base;
-
-       if (of_device_is_compatible(node, "fsl,fman-ptp-timer")) {
-               qoriq_ptp->regs.ctrl_regs = base + FMAN_CTRL_REGS_OFFSET;
-               qoriq_ptp->regs.alarm_regs = base + FMAN_ALARM_REGS_OFFSET;
-               qoriq_ptp->regs.fiper_regs = base + FMAN_FIPER_REGS_OFFSET;
-               qoriq_ptp->regs.etts_regs = base + FMAN_ETTS_REGS_OFFSET;
-       } else {
-               qoriq_ptp->regs.ctrl_regs = base + CTRL_REGS_OFFSET;
-               qoriq_ptp->regs.alarm_regs = base + ALARM_REGS_OFFSET;
-               qoriq_ptp->regs.fiper_regs = base + FIPER_REGS_OFFSET;
-               qoriq_ptp->regs.etts_regs = base + ETTS_REGS_OFFSET;
-       }
-
-       ktime_get_real_ts64(&now);
-       ptp_qoriq_settime(&qoriq_ptp->caps, &now);
-
-       tmr_ctrl =
-         (qoriq_ptp->tclk_period & TCLK_PERIOD_MASK) << TCLK_PERIOD_SHIFT |
-         (qoriq_ptp->cksel & CKSEL_MASK) << CKSEL_SHIFT;
-
-       spin_lock_irqsave(&qoriq_ptp->lock, flags);
-
-       regs = &qoriq_ptp->regs;
-       qoriq_write(&regs->ctrl_regs->tmr_ctrl,   tmr_ctrl);
-       qoriq_write(&regs->ctrl_regs->tmr_add,    qoriq_ptp->tmr_add);
-       qoriq_write(&regs->ctrl_regs->tmr_prsc,   qoriq_ptp->tmr_prsc);
-       qoriq_write(&regs->fiper_regs->tmr_fiper1, qoriq_ptp->tmr_fiper1);
-       qoriq_write(&regs->fiper_regs->tmr_fiper2, qoriq_ptp->tmr_fiper2);
-       set_alarm(qoriq_ptp);
-       qoriq_write(&regs->ctrl_regs->tmr_ctrl,   tmr_ctrl|FIPERST|RTPE|TE|FRD);
-
-       spin_unlock_irqrestore(&qoriq_ptp->lock, flags);
-
-       qoriq_ptp->clock = ptp_clock_register(&qoriq_ptp->caps, &dev->dev);
-       if (IS_ERR(qoriq_ptp->clock)) {
-               err = PTR_ERR(qoriq_ptp->clock);
+       err = ptp_qoriq_init(ptp_qoriq, base, ptp_qoriq_caps);
+       if (err)
                goto no_clock;
-       }
-       qoriq_ptp->phc_index = ptp_clock_index(qoriq_ptp->clock);
-
-       ptp_qoriq_create_debugfs(qoriq_ptp);
-       platform_set_drvdata(dev, qoriq_ptp);
 
+       platform_set_drvdata(dev, ptp_qoriq);
        return 0;
 
 no_clock:
-       iounmap(qoriq_ptp->base);
+       iounmap(ptp_qoriq->base);
 no_ioremap:
-       release_resource(qoriq_ptp->rsrc);
+       release_resource(ptp_qoriq->rsrc);
 no_resource:
-       free_irq(qoriq_ptp->irq, qoriq_ptp);
-no_config:
+       free_irq(ptp_qoriq->irq, ptp_qoriq);
 no_node:
-       kfree(qoriq_ptp);
+       kfree(ptp_qoriq);
 no_memory:
        return err;
 }
 
-static int qoriq_ptp_remove(struct platform_device *dev)
+static int ptp_qoriq_remove(struct platform_device *dev)
 {
-       struct qoriq_ptp *qoriq_ptp = platform_get_drvdata(dev);
-       struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
-
-       qoriq_write(&regs->ctrl_regs->tmr_temask, 0);
-       qoriq_write(&regs->ctrl_regs->tmr_ctrl,   0);
-
-       ptp_qoriq_remove_debugfs(qoriq_ptp);
-       ptp_clock_unregister(qoriq_ptp->clock);
-       iounmap(qoriq_ptp->base);
-       release_resource(qoriq_ptp->rsrc);
-       free_irq(qoriq_ptp->irq, qoriq_ptp);
-       kfree(qoriq_ptp);
+       struct ptp_qoriq *ptp_qoriq = platform_get_drvdata(dev);
 
+       ptp_qoriq_free(ptp_qoriq);
+       release_resource(ptp_qoriq->rsrc);
+       kfree(ptp_qoriq);
        return 0;
 }
 
@@ -616,16 +641,16 @@ static const struct of_device_id match_table[] = {
 };
 MODULE_DEVICE_TABLE(of, match_table);
 
-static struct platform_driver qoriq_ptp_driver = {
+static struct platform_driver ptp_qoriq_driver = {
        .driver = {
                .name           = "ptp_qoriq",
                .of_match_table = match_table,
        },
-       .probe       = qoriq_ptp_probe,
-       .remove      = qoriq_ptp_remove,
+       .probe       = ptp_qoriq_probe,
+       .remove      = ptp_qoriq_remove,
 };
 
-module_platform_driver(qoriq_ptp_driver);
+module_platform_driver(ptp_qoriq_driver);
 
 MODULE_AUTHOR("Richard Cochran <richardcochran@gmail.com>");
 MODULE_DESCRIPTION("PTP clock for Freescale QorIQ 1588 timer");
index 9705950..e8dddce 100644 (file)
@@ -7,11 +7,11 @@
 
 static int ptp_qoriq_fiper1_lpbk_get(void *data, u64 *val)
 {
-       struct qoriq_ptp *qoriq_ptp = data;
-       struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
+       struct ptp_qoriq *ptp_qoriq = data;
+       struct ptp_qoriq_registers *regs = &ptp_qoriq->regs;
        u32 ctrl;
 
-       ctrl = qoriq_read(&regs->ctrl_regs->tmr_ctrl);
+       ctrl = ptp_qoriq->read(&regs->ctrl_regs->tmr_ctrl);
        *val = ctrl & PP1L ? 1 : 0;
 
        return 0;
@@ -19,17 +19,17 @@ static int ptp_qoriq_fiper1_lpbk_get(void *data, u64 *val)
 
 static int ptp_qoriq_fiper1_lpbk_set(void *data, u64 val)
 {
-       struct qoriq_ptp *qoriq_ptp = data;
-       struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
+       struct ptp_qoriq *ptp_qoriq = data;
+       struct ptp_qoriq_registers *regs = &ptp_qoriq->regs;
        u32 ctrl;
 
-       ctrl = qoriq_read(&regs->ctrl_regs->tmr_ctrl);
+       ctrl = ptp_qoriq->read(&regs->ctrl_regs->tmr_ctrl);
        if (val == 0)
                ctrl &= ~PP1L;
        else
                ctrl |= PP1L;
 
-       qoriq_write(&regs->ctrl_regs->tmr_ctrl, ctrl);
+       ptp_qoriq->write(&regs->ctrl_regs->tmr_ctrl, ctrl);
        return 0;
 }
 
@@ -38,11 +38,11 @@ DEFINE_DEBUGFS_ATTRIBUTE(ptp_qoriq_fiper1_fops, ptp_qoriq_fiper1_lpbk_get,
 
 static int ptp_qoriq_fiper2_lpbk_get(void *data, u64 *val)
 {
-       struct qoriq_ptp *qoriq_ptp = data;
-       struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
+       struct ptp_qoriq *ptp_qoriq = data;
+       struct ptp_qoriq_registers *regs = &ptp_qoriq->regs;
        u32 ctrl;
 
-       ctrl = qoriq_read(&regs->ctrl_regs->tmr_ctrl);
+       ctrl = ptp_qoriq->read(&regs->ctrl_regs->tmr_ctrl);
        *val = ctrl & PP2L ? 1 : 0;
 
        return 0;
@@ -50,52 +50,52 @@ static int ptp_qoriq_fiper2_lpbk_get(void *data, u64 *val)
 
 static int ptp_qoriq_fiper2_lpbk_set(void *data, u64 val)
 {
-       struct qoriq_ptp *qoriq_ptp = data;
-       struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
+       struct ptp_qoriq *ptp_qoriq = data;
+       struct ptp_qoriq_registers *regs = &ptp_qoriq->regs;
        u32 ctrl;
 
-       ctrl = qoriq_read(&regs->ctrl_regs->tmr_ctrl);
+       ctrl = ptp_qoriq->read(&regs->ctrl_regs->tmr_ctrl);
        if (val == 0)
                ctrl &= ~PP2L;
        else
                ctrl |= PP2L;
 
-       qoriq_write(&regs->ctrl_regs->tmr_ctrl, ctrl);
+       ptp_qoriq->write(&regs->ctrl_regs->tmr_ctrl, ctrl);
        return 0;
 }
 
 DEFINE_DEBUGFS_ATTRIBUTE(ptp_qoriq_fiper2_fops, ptp_qoriq_fiper2_lpbk_get,
                         ptp_qoriq_fiper2_lpbk_set, "%llu\n");
 
-void ptp_qoriq_create_debugfs(struct qoriq_ptp *qoriq_ptp)
+void ptp_qoriq_create_debugfs(struct ptp_qoriq *ptp_qoriq)
 {
        struct dentry *root;
 
-       root = debugfs_create_dir(dev_name(qoriq_ptp->dev), NULL);
+       root = debugfs_create_dir(dev_name(ptp_qoriq->dev), NULL);
        if (IS_ERR(root))
                return;
        if (!root)
                goto err_root;
 
-       qoriq_ptp->debugfs_root = root;
+       ptp_qoriq->debugfs_root = root;
 
        if (!debugfs_create_file_unsafe("fiper1-loopback", 0600, root,
-                                       qoriq_ptp, &ptp_qoriq_fiper1_fops))
+                                       ptp_qoriq, &ptp_qoriq_fiper1_fops))
                goto err_node;
        if (!debugfs_create_file_unsafe("fiper2-loopback", 0600, root,
-                                       qoriq_ptp, &ptp_qoriq_fiper2_fops))
+                                       ptp_qoriq, &ptp_qoriq_fiper2_fops))
                goto err_node;
        return;
 
 err_node:
        debugfs_remove_recursive(root);
-       qoriq_ptp->debugfs_root = NULL;
+       ptp_qoriq->debugfs_root = NULL;
 err_root:
-       dev_err(qoriq_ptp->dev, "failed to initialize debugfs\n");
+       dev_err(ptp_qoriq->dev, "failed to initialize debugfs\n");
 }
 
-void ptp_qoriq_remove_debugfs(struct qoriq_ptp *qoriq_ptp)
+void ptp_qoriq_remove_debugfs(struct ptp_qoriq *ptp_qoriq)
 {
-       debugfs_remove_recursive(qoriq_ptp->debugfs_root);
-       qoriq_ptp->debugfs_root = NULL;
+       debugfs_remove_recursive(ptp_qoriq->debugfs_root);
+       ptp_qoriq->debugfs_root = NULL;
 }
index 4e7b55a..6e294b4 100644 (file)
@@ -4469,6 +4469,14 @@ static int dasd_symm_io(struct dasd_device *device, void __user *argp)
                usrparm.psf_data &= 0x7fffffffULL;
                usrparm.rssd_result &= 0x7fffffffULL;
        }
+       /* at least 2 bytes are accessed and should be allocated */
+       if (usrparm.psf_data_len < 2) {
+               DBF_DEV_EVENT(DBF_WARNING, device,
+                             "Symmetrix ioctl invalid data length %d",
+                             usrparm.psf_data_len);
+               rc = -EINVAL;
+               goto out;
+       }
        /* alloc I/O data area */
        psf_data = kzalloc(usrparm.psf_data_len, GFP_KERNEL | GFP_DMA);
        rssd_result = kzalloc(usrparm.rssd_result_len, GFP_KERNEL | GFP_DMA);
index 48ea000..5a69974 100644 (file)
@@ -248,7 +248,8 @@ static inline int ap_test_config(unsigned int *field, unsigned int nr)
 static inline int ap_test_config_card_id(unsigned int id)
 {
        if (!ap_configuration)  /* QCI not supported */
-               return 1;
+               /* only ids 0...3F may be probed */
+               return id < 0x40 ? 1 : 0;
        return ap_test_config(ap_configuration->apm, id);
 }
 
index f2d6bbe..bc55ec3 100644 (file)
@@ -9,7 +9,7 @@ obj-$(CONFIG_NETIUCV) += netiucv.o fsm.o
 obj-$(CONFIG_SMSGIUCV) += smsgiucv.o
 obj-$(CONFIG_SMSGIUCV_EVENT) += smsgiucv_app.o
 obj-$(CONFIG_LCS) += lcs.o
-qeth-y += qeth_core_sys.o qeth_core_main.o qeth_core_mpc.o
+qeth-y += qeth_core_sys.o qeth_core_main.o qeth_core_mpc.o qeth_ethtool.o
 obj-$(CONFIG_QETH) += qeth.o
 qeth_l2-y += qeth_l2_main.o qeth_l2_sys.o
 obj-$(CONFIG_QETH_L2) += qeth_l2.o
index d65650e..c0c46be 100644 (file)
 #include <linux/in6.h>
 #include <linux/bitops.h>
 #include <linux/seq_file.h>
-#include <linux/ethtool.h>
 #include <linux/hashtable.h>
 #include <linux/ip.h>
 #include <linux/refcount.h>
+#include <linux/workqueue.h>
 
 #include <net/ipv6.h>
 #include <net/if_inet6.h>
@@ -34,6 +34,8 @@
 #include <asm/ccwgroup.h>
 #include <asm/sysinfo.h>
 
+#include <uapi/linux/if_link.h>
+
 #include "qeth_core_mpc.h"
 
 /**
@@ -112,59 +114,6 @@ static inline u32 qeth_get_device_id(struct ccw_device *cdev)
 #define CCW_DEVID(cdev)                (qeth_get_device_id(cdev))
 #define CARD_DEVID(card)       (CCW_DEVID(CARD_RDEV(card)))
 
-/**
- * card stuff
- */
-struct qeth_perf_stats {
-       unsigned int bufs_rec;
-       unsigned int bufs_sent;
-       unsigned int buf_elements_sent;
-
-       unsigned int skbs_sent_pack;
-       unsigned int bufs_sent_pack;
-
-       unsigned int sc_dp_p;
-       unsigned int sc_p_dp;
-       /* qdio_cq_handler: number of times called, time spent in */
-       __u64 cq_start_time;
-       unsigned int cq_cnt;
-       unsigned int cq_time;
-       /* qdio_input_handler: number of times called, time spent in */
-       __u64 inbound_start_time;
-       unsigned int inbound_cnt;
-       unsigned int inbound_time;
-       /* qeth_send_packet: number of times called, time spent in */
-       __u64 outbound_start_time;
-       unsigned int outbound_cnt;
-       unsigned int outbound_time;
-       /* qdio_output_handler: number of times called, time spent in */
-       __u64 outbound_handler_start_time;
-       unsigned int outbound_handler_cnt;
-       unsigned int outbound_handler_time;
-       /* number of calls to and time spent in do_QDIO for inbound queue */
-       __u64 inbound_do_qdio_start_time;
-       unsigned int inbound_do_qdio_cnt;
-       unsigned int inbound_do_qdio_time;
-       /* number of calls to and time spent in do_QDIO for outbound queues */
-       __u64 outbound_do_qdio_start_time;
-       unsigned int outbound_do_qdio_cnt;
-       unsigned int outbound_do_qdio_time;
-       unsigned int large_send_bytes;
-       unsigned int large_send_cnt;
-       unsigned int sg_skbs_sent;
-       /* initial values when measuring starts */
-       unsigned long initial_rx_packets;
-       unsigned long initial_tx_packets;
-       /* inbound scatter gather data */
-       unsigned int sg_skbs_rx;
-       unsigned int sg_frags_rx;
-       unsigned int sg_alloc_page_rx;
-       unsigned int tx_csum;
-       unsigned int tx_lin;
-       unsigned int tx_linfail;
-       unsigned int rx_csum;
-};
-
 /* Routing stuff */
 struct qeth_routing_info {
        enum qeth_routing_types type;
@@ -494,10 +443,54 @@ enum qeth_out_q_states {
        QETH_OUT_Q_LOCKED_FLUSH,
 };
 
+#define QETH_CARD_STAT_ADD(_c, _stat, _val)    ((_c)->stats._stat += (_val))
+#define QETH_CARD_STAT_INC(_c, _stat)          QETH_CARD_STAT_ADD(_c, _stat, 1)
+
+#define QETH_TXQ_STAT_ADD(_q, _stat, _val)     ((_q)->stats._stat += (_val))
+#define QETH_TXQ_STAT_INC(_q, _stat)           QETH_TXQ_STAT_ADD(_q, _stat, 1)
+
+struct qeth_card_stats {
+       u64 rx_bufs;
+       u64 rx_skb_csum;
+       u64 rx_sg_skbs;
+       u64 rx_sg_frags;
+       u64 rx_sg_alloc_page;
+
+       /* rtnl_link_stats64 */
+       u64 rx_packets;
+       u64 rx_bytes;
+       u64 rx_errors;
+       u64 rx_dropped;
+       u64 rx_multicast;
+       u64 tx_errors;
+};
+
+struct qeth_out_q_stats {
+       u64 bufs;
+       u64 bufs_pack;
+       u64 buf_elements;
+       u64 skbs_pack;
+       u64 skbs_sg;
+       u64 skbs_csum;
+       u64 skbs_tso;
+       u64 skbs_linearized;
+       u64 skbs_linearized_fail;
+       u64 tso_bytes;
+       u64 packing_mode_switch;
+
+       /* rtnl_link_stats64 */
+       u64 tx_packets;
+       u64 tx_bytes;
+       u64 tx_errors;
+       u64 tx_dropped;
+       u64 tx_carrier_errors;
+};
+
 struct qeth_qdio_out_q {
        struct qdio_buffer *qdio_bufs[QDIO_MAX_BUFFERS_PER_Q];
        struct qeth_qdio_out_buffer *bufs[QDIO_MAX_BUFFERS_PER_Q];
        struct qdio_outbuf_state *bufstates; /* convenience pointer */
+       struct qeth_out_q_stats stats;
        int queue_no;
        struct qeth_card *card;
        atomic_t state;
@@ -527,7 +520,7 @@ struct qeth_qdio_info {
 
        /* output */
        int no_out_queues;
-       struct qeth_qdio_out_q **out_qs;
+       struct qeth_qdio_out_q *out_qs[QETH_MAX_QUEUES];
        struct qdio_outbuf_state *out_bufstates;
 
        /* priority queueing */
@@ -594,8 +587,8 @@ struct qeth_channel;
 struct qeth_cmd_buffer {
        enum qeth_cmd_buffer_state state;
        struct qeth_channel *channel;
+       struct qeth_reply *reply;
        unsigned char *data;
-       int rc;
        void (*callback)(struct qeth_card *card, struct qeth_channel *channel,
                         struct qeth_cmd_buffer *iob);
 };
@@ -701,7 +694,6 @@ struct qeth_card_options {
        struct qeth_vnicc_info vnicc; /* VNICC options */
        int fake_broadcast;
        enum qeth_discipline_id layer;
-       int performance_stats;
        int rx_sg_cb;
        enum qeth_ipa_isolation_modes isolation;
        enum qeth_ipa_isolation_modes prev_isolation;
@@ -777,13 +769,13 @@ struct qeth_card {
        struct qeth_channel data;
 
        struct net_device *dev;
-       struct net_device_stats stats;
-
+       struct qeth_card_stats stats;
        struct qeth_card_info info;
        struct qeth_token token;
        struct qeth_seqno seqno;
        struct qeth_card_options options;
 
+       struct workqueue_struct *event_wq;
        wait_queue_head_t wait_q;
        spinlock_t mclock;
        unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
@@ -800,7 +792,6 @@ struct qeth_card {
        struct list_head cmd_waiter_list;
        /* QDIO buffer handling */
        struct qeth_qdio_info qdio;
-       struct qeth_perf_stats perf_stats;
        int read_or_write_problem;
        struct qeth_osn_info osn_info;
        struct qeth_discipline *discipline;
@@ -857,11 +848,6 @@ static inline int qeth_get_elements_for_range(addr_t start, addr_t end)
        return PFN_UP(end) - PFN_DOWN(start);
 }
 
-static inline int qeth_get_micros(void)
-{
-       return (int) (get_tod_clock() >> 12);
-}
-
 static inline int qeth_get_ip_version(struct sk_buff *skb)
 {
        struct vlan_ethhdr *veth = vlan_eth_hdr(skb);
@@ -886,8 +872,7 @@ static inline void qeth_rx_csum(struct qeth_card *card, struct sk_buff *skb,
        if ((card->dev->features & NETIF_F_RXCSUM) &&
            (flags & QETH_HDR_EXT_CSUM_TRANSP_REQ)) {
                skb->ip_summed = CHECKSUM_UNNECESSARY;
-               if (card->options.performance_stats)
-                       card->perf_stats.rx_csum++;
+               QETH_CARD_STAT_INC(card, rx_skb_csum);
        } else {
                skb->ip_summed = CHECKSUM_NONE;
        }
@@ -949,12 +934,13 @@ static inline struct qeth_qdio_out_q *qeth_get_tx_queue(struct qeth_card *card,
 
 extern struct qeth_discipline qeth_l2_discipline;
 extern struct qeth_discipline qeth_l3_discipline;
+extern const struct ethtool_ops qeth_ethtool_ops;
+extern const struct ethtool_ops qeth_osn_ethtool_ops;
 extern const struct attribute_group *qeth_generic_attr_groups[];
 extern const struct attribute_group *qeth_osn_attr_groups[];
 extern const struct attribute_group qeth_device_attr_group;
 extern const struct attribute_group qeth_device_blkt_group;
 extern const struct device_type qeth_generic_devtype;
-extern struct workqueue_struct *qeth_wq;
 
 int qeth_card_hw_is_reachable(struct qeth_card *);
 const char *qeth_get_cardname_short(struct qeth_card *);
@@ -993,33 +979,25 @@ void qeth_clear_working_pool_list(struct qeth_card *);
 void qeth_clear_cmd_buffers(struct qeth_channel *);
 void qeth_clear_qdio_buffers(struct qeth_card *);
 void qeth_setadp_promisc_mode(struct qeth_card *);
-struct net_device_stats *qeth_get_stats(struct net_device *);
 int qeth_setadpparms_change_macaddr(struct qeth_card *);
 void qeth_tx_timeout(struct net_device *);
 void qeth_prepare_control_data(struct qeth_card *, int,
                                struct qeth_cmd_buffer *);
 void qeth_release_buffer(struct qeth_channel *, struct qeth_cmd_buffer *);
-void qeth_prepare_ipa_cmd(struct qeth_card *card, struct qeth_cmd_buffer *iob);
+void qeth_prepare_ipa_cmd(struct qeth_card *card, struct qeth_cmd_buffer *iob,
+                         u16 cmd_length);
 struct qeth_cmd_buffer *qeth_wait_for_buffer(struct qeth_channel *);
 int qeth_query_switch_attributes(struct qeth_card *card,
                                  struct qeth_switch_info *sw_info);
-int qeth_send_control_data(struct qeth_card *, int, struct qeth_cmd_buffer *,
-       int (*reply_cb)(struct qeth_card *, struct qeth_reply*, unsigned long),
-       void *reply_param);
+int qeth_query_card_info(struct qeth_card *card,
+                        struct carrier_info *carrier_info);
 unsigned int qeth_count_elements(struct sk_buff *skb, unsigned int data_offset);
 int qeth_do_send_packet(struct qeth_card *card, struct qeth_qdio_out_q *queue,
                        struct sk_buff *skb, struct qeth_hdr *hdr,
                        unsigned int offset, unsigned int hd_len,
                        int elements_needed);
 int qeth_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
-int qeth_core_get_sset_count(struct net_device *, int);
-void qeth_core_get_ethtool_stats(struct net_device *,
-                               struct ethtool_stats *, u64 *);
-void qeth_core_get_strings(struct net_device *, u32, u8 *);
-void qeth_core_get_drvinfo(struct net_device *, struct ethtool_drvinfo *);
 void qeth_dbf_longtext(debug_info_t *id, int level, char *text, ...);
-int qeth_core_ethtool_get_link_ksettings(struct net_device *netdev,
-                                        struct ethtool_link_ksettings *cmd);
 int qeth_set_access_ctrl_online(struct qeth_card *card, int fallback);
 int qeth_configure_cq(struct qeth_card *, enum qeth_cq);
 int qeth_hw_trap(struct qeth_card *, enum qeth_diags_trap_action);
@@ -1036,14 +1014,16 @@ netdev_features_t qeth_fix_features(struct net_device *, netdev_features_t);
 netdev_features_t qeth_features_check(struct sk_buff *skb,
                                      struct net_device *dev,
                                      netdev_features_t features);
+void qeth_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats);
 int qeth_open(struct net_device *dev);
 int qeth_stop(struct net_device *dev);
 
 int qeth_vm_request_mac(struct qeth_card *card);
 int qeth_xmit(struct qeth_card *card, struct sk_buff *skb,
              struct qeth_qdio_out_q *queue, int ipv, int cast_type,
-             void (*fill_header)(struct qeth_card *card, struct qeth_hdr *hdr,
-                                 struct sk_buff *skb, int ipv, int cast_type,
+             void (*fill_header)(struct qeth_qdio_out_q *queue,
+                                 struct qeth_hdr *hdr, struct sk_buff *skb,
+                                 int ipv, int cast_type,
                                  unsigned int data_len));
 
 /* exports for OSN */
index dcc06e4..4708df3 100644 (file)
@@ -74,8 +74,7 @@ static void qeth_notify_skbs(struct qeth_qdio_out_q *queue,
 static void qeth_release_skbs(struct qeth_qdio_out_buffer *buf);
 static int qeth_init_qdio_out_buf(struct qeth_qdio_out_q *, int);
 
-struct workqueue_struct *qeth_wq;
-EXPORT_SYMBOL_GPL(qeth_wq);
+static struct workqueue_struct *qeth_wq;
 
 int qeth_card_hw_is_reachable(struct qeth_card *card)
 {
@@ -540,6 +539,7 @@ static int __qeth_issue_next_read(struct qeth_card *card)
                QETH_DBF_MESSAGE(2, "error %i on device %x when starting next read ccw!\n",
                                 rc, CARD_DEVID(card));
                atomic_set(&channel->irq_pending, 0);
+               qeth_release_buffer(channel, iob);
                card->read_or_write_problem = 1;
                qeth_schedule_recovery(card);
                wake_up(&card->wait_q);
@@ -566,6 +566,7 @@ static struct qeth_reply *qeth_alloc_reply(struct qeth_card *card)
        if (reply) {
                refcount_set(&reply->refcnt, 1);
                atomic_set(&reply->received, 0);
+               init_waitqueue_head(&reply->wait_q);
        }
        return reply;
 }
@@ -581,6 +582,26 @@ static void qeth_put_reply(struct qeth_reply *reply)
                kfree(reply);
 }
 
+static void qeth_enqueue_reply(struct qeth_card *card, struct qeth_reply *reply)
+{
+       spin_lock_irq(&card->lock);
+       list_add_tail(&reply->list, &card->cmd_waiter_list);
+       spin_unlock_irq(&card->lock);
+}
+
+static void qeth_dequeue_reply(struct qeth_card *card, struct qeth_reply *reply)
+{
+       spin_lock_irq(&card->lock);
+       list_del(&reply->list);
+       spin_unlock_irq(&card->lock);
+}
+
+static void qeth_notify_reply(struct qeth_reply *reply)
+{
+       atomic_inc(&reply->received);
+       wake_up(&reply->wait_q);
+}
+
 static void qeth_issue_ipa_msg(struct qeth_ipa_cmd *cmd, int rc,
                struct qeth_card *card)
 {
@@ -657,19 +678,15 @@ static struct qeth_ipa_cmd *qeth_check_ipa_data(struct qeth_card *card,
 
 void qeth_clear_ipacmd_list(struct qeth_card *card)
 {
-       struct qeth_reply *reply, *r;
+       struct qeth_reply *reply;
        unsigned long flags;
 
        QETH_CARD_TEXT(card, 4, "clipalst");
 
        spin_lock_irqsave(&card->lock, flags);
-       list_for_each_entry_safe(reply, r, &card->cmd_waiter_list, list) {
-               qeth_get_reply(reply);
+       list_for_each_entry(reply, &card->cmd_waiter_list, list) {
                reply->rc = -EIO;
-               atomic_inc(&reply->received);
-               list_del_init(&reply->list);
-               wake_up(&reply->wait_q);
-               qeth_put_reply(reply);
+               qeth_notify_reply(reply);
        }
        spin_unlock_irqrestore(&card->lock, flags);
 }
@@ -726,7 +743,10 @@ void qeth_release_buffer(struct qeth_channel *channel,
        spin_lock_irqsave(&channel->iob_lock, flags);
        iob->state = BUF_STATE_FREE;
        iob->callback = qeth_send_control_data_cb;
-       iob->rc = 0;
+       if (iob->reply) {
+               qeth_put_reply(iob->reply);
+               iob->reply = NULL;
+       }
        spin_unlock_irqrestore(&channel->iob_lock, flags);
        wake_up(&channel->wait_q);
 }
@@ -739,6 +759,17 @@ static void qeth_release_buffer_cb(struct qeth_card *card,
        qeth_release_buffer(channel, iob);
 }
 
+static void qeth_cancel_cmd(struct qeth_cmd_buffer *iob, int rc)
+{
+       struct qeth_reply *reply = iob->reply;
+
+       if (reply) {
+               reply->rc = rc;
+               qeth_notify_reply(reply);
+       }
+       qeth_release_buffer(iob->channel, iob);
+}
+
 static struct qeth_cmd_buffer *qeth_get_buffer(struct qeth_channel *channel)
 {
        struct qeth_cmd_buffer *buffer = NULL;
@@ -774,9 +805,9 @@ static void qeth_send_control_data_cb(struct qeth_card *card,
                                      struct qeth_cmd_buffer *iob)
 {
        struct qeth_ipa_cmd *cmd = NULL;
-       struct qeth_reply *reply, *r;
+       struct qeth_reply *reply = NULL;
+       struct qeth_reply *r;
        unsigned long flags;
-       int keep_reply;
        int rc = 0;
 
        QETH_CARD_TEXT(card, 4, "sndctlcb");
@@ -808,44 +839,40 @@ static void qeth_send_control_data_cb(struct qeth_card *card,
                        goto out;
        }
 
+       /* match against pending cmd requests */
        spin_lock_irqsave(&card->lock, flags);
-       list_for_each_entry_safe(reply, r, &card->cmd_waiter_list, list) {
-               if ((reply->seqno == QETH_IDX_COMMAND_SEQNO) ||
-                   ((cmd) && (reply->seqno == cmd->hdr.seqno))) {
+       list_for_each_entry(r, &card->cmd_waiter_list, list) {
+               if ((r->seqno == QETH_IDX_COMMAND_SEQNO) ||
+                   (cmd && (r->seqno == cmd->hdr.seqno))) {
+                       reply = r;
+                       /* take the object outside the lock */
                        qeth_get_reply(reply);
-                       list_del_init(&reply->list);
-                       spin_unlock_irqrestore(&card->lock, flags);
-                       keep_reply = 0;
-                       if (reply->callback != NULL) {
-                               if (cmd) {
-                                       reply->offset = (__u16)((char *)cmd -
-                                                       (char *)iob->data);
-                                       keep_reply = reply->callback(card,
-                                                       reply,
-                                                       (unsigned long)cmd);
-                               } else
-                                       keep_reply = reply->callback(card,
-                                                       reply,
-                                                       (unsigned long)iob);
-                       }
-                       if (cmd)
-                               reply->rc = (u16) cmd->hdr.return_code;
-                       else if (iob->rc)
-                               reply->rc = iob->rc;
-                       if (keep_reply) {
-                               spin_lock_irqsave(&card->lock, flags);
-                               list_add_tail(&reply->list,
-                                             &card->cmd_waiter_list);
-                               spin_unlock_irqrestore(&card->lock, flags);
-                       } else {
-                               atomic_inc(&reply->received);
-                               wake_up(&reply->wait_q);
-                       }
-                       qeth_put_reply(reply);
-                       goto out;
+                       break;
                }
        }
        spin_unlock_irqrestore(&card->lock, flags);
+
+       if (!reply)
+               goto out;
+
+       if (!reply->callback) {
+               rc = 0;
+       } else {
+               if (cmd) {
+                       reply->offset = (u16)((char *)cmd - (char *)iob->data);
+                       rc = reply->callback(card, reply, (unsigned long)cmd);
+               } else {
+                       rc = reply->callback(card, reply, (unsigned long)iob);
+               }
+       }
+
+       if (rc <= 0) {
+               reply->rc = rc;
+               qeth_notify_reply(reply);
+       }
+
+       qeth_put_reply(reply);
+
 out:
        memcpy(&card->seqno.pdu_hdr_ack,
                QETH_PDU_HEADER_SEQ_NO(iob->data),
@@ -976,9 +1003,8 @@ static int qeth_get_problem(struct qeth_card *card, struct ccw_device *cdev,
        return 0;
 }
 
-static long qeth_check_irb_error(struct qeth_card *card,
-                                struct ccw_device *cdev, unsigned long intparm,
-                                struct irb *irb)
+static int qeth_check_irb_error(struct qeth_card *card, struct ccw_device *cdev,
+                               unsigned long intparm, struct irb *irb)
 {
        if (!IS_ERR(irb))
                return 0;
@@ -989,7 +1015,7 @@ static long qeth_check_irb_error(struct qeth_card *card,
                                 CCW_DEVID(cdev));
                QETH_CARD_TEXT(card, 2, "ckirberr");
                QETH_CARD_TEXT_(card, 2, "  rc%d", -EIO);
-               break;
+               return -EIO;
        case -ETIMEDOUT:
                dev_warn(&cdev->dev, "A hardware operation timed out"
                        " on the device\n");
@@ -1001,14 +1027,14 @@ static long qeth_check_irb_error(struct qeth_card *card,
                                wake_up(&card->wait_q);
                        }
                }
-               break;
+               return -ETIMEDOUT;
        default:
                QETH_DBF_MESSAGE(2, "unknown error %ld on channel %x\n",
                                 PTR_ERR(irb), CCW_DEVID(cdev));
                QETH_CARD_TEXT(card, 2, "ckirberr");
                QETH_CARD_TEXT(card, 2, "  rc???");
+               return PTR_ERR(irb);
        }
-       return PTR_ERR(irb);
 }
 
 static void qeth_irq(struct ccw_device *cdev, unsigned long intparm,
@@ -1043,10 +1069,11 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm,
        if (qeth_intparm_is_iob(intparm))
                iob = (struct qeth_cmd_buffer *) __va((addr_t)intparm);
 
-       if (qeth_check_irb_error(card, cdev, intparm, irb)) {
+       rc = qeth_check_irb_error(card, cdev, intparm, irb);
+       if (rc) {
                /* IO was terminated, free its resources. */
                if (iob)
-                       qeth_release_buffer(iob->channel, iob);
+                       qeth_cancel_cmd(iob, rc);
                atomic_set(&channel->irq_pending, 0);
                wake_up(&card->wait_q);
                return;
@@ -1101,6 +1128,8 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm,
                rc = qeth_get_problem(card, cdev, irb);
                if (rc) {
                        card->read_or_write_problem = 1;
+                       if (iob)
+                               qeth_cancel_cmd(iob, rc);
                        qeth_clear_ipacmd_list(card);
                        qeth_schedule_recovery(card);
                        goto out;
@@ -1262,7 +1291,6 @@ static int qeth_setup_channel(struct qeth_channel *channel, bool alloc_buffers)
                channel->iob[cnt].state = BUF_STATE_FREE;
                channel->iob[cnt].channel = channel;
                channel->iob[cnt].callback = qeth_send_control_data_cb;
-               channel->iob[cnt].rc = 0;
        }
        if (cnt < QETH_CMD_BUFFER_NO) {
                qeth_clean_channel(channel);
@@ -1439,6 +1467,10 @@ static struct qeth_card *qeth_alloc_card(struct ccwgroup_device *gdev)
        CARD_RDEV(card) = gdev->cdev[0];
        CARD_WDEV(card) = gdev->cdev[1];
        CARD_DDEV(card) = gdev->cdev[2];
+
+       card->event_wq = alloc_ordered_workqueue("%s", 0, dev_name(&gdev->dev));
+       if (!card->event_wq)
+               goto out_wq;
        if (qeth_setup_channel(&card->read, true))
                goto out_ip;
        if (qeth_setup_channel(&card->write, true))
@@ -1454,6 +1486,8 @@ out_data:
 out_channel:
        qeth_clean_channel(&card->read);
 out_ip:
+       destroy_workqueue(card->event_wq);
+out_wq:
        dev_set_drvdata(&gdev->dev, NULL);
        kfree(card);
 out:
@@ -1782,6 +1816,7 @@ static int qeth_idx_activate_get_answer(struct qeth_card *card,
                QETH_DBF_MESSAGE(2, "Error2 in activating channel rc=%d\n", rc);
                QETH_DBF_TEXT_(SETUP, 2, "2err%d", rc);
                atomic_set(&channel->irq_pending, 0);
+               qeth_release_buffer(channel, iob);
                wake_up(&card->wait_q);
                return rc;
        }
@@ -1851,6 +1886,7 @@ static int qeth_idx_activate_channel(struct qeth_card *card,
                        rc);
                QETH_DBF_TEXT_(SETUP, 2, "1err%d", rc);
                atomic_set(&channel->irq_pending, 0);
+               qeth_release_buffer(channel, iob);
                wake_up(&card->wait_q);
                return rc;
        }
@@ -1981,7 +2017,7 @@ void qeth_prepare_control_data(struct qeth_card *card, int len,
        card->seqno.pdu_hdr++;
        memcpy(QETH_PDU_HEADER_ACK_SEQ_NO(iob->data),
               &card->seqno.pdu_hdr_ack, QETH_SEQ_NO_LENGTH);
-       QETH_DBF_HEX(CTRL, 2, iob->data, QETH_DBF_CTRL_LEN);
+       QETH_DBF_HEX(CTRL, 2, iob->data, min(len, QETH_DBF_CTRL_LEN));
 }
 EXPORT_SYMBOL_GPL(qeth_prepare_control_data);
 
@@ -1998,24 +2034,22 @@ EXPORT_SYMBOL_GPL(qeth_prepare_control_data);
  *                             for the IPA commands.
  * @reply_param:               private pointer passed to the callback
  *
- * Returns the value of the `return_code' field of the response
- * block returned from the hardware, or other error indication.
- * Value of zero indicates successful execution of the command.
- *
  * Callback function gets called one or more times, with cb_cmd
  * pointing to the response returned by the hardware. Callback
- * function must return non-zero if more reply blocks are expected,
- * and zero if the last or only reply block is received. Callback
- * function can get the value of the reply_param pointer from the
+ * function must return
+ *   > 0 if more reply blocks are expected,
+ *     0 if the last or only reply block is received, and
+ *   < 0 on error.
+ * Callback function can get the value of the reply_param pointer from the
  * field 'param' of the structure qeth_reply.
  */
 
-int qeth_send_control_data(struct qeth_card *card, int len,
-               struct qeth_cmd_buffer *iob,
-               int (*reply_cb)(struct qeth_card *cb_card,
-                               struct qeth_reply *cb_reply,
-                               unsigned long cb_cmd),
-               void *reply_param)
+static int qeth_send_control_data(struct qeth_card *card, int len,
+                                 struct qeth_cmd_buffer *iob,
+                                 int (*reply_cb)(struct qeth_card *cb_card,
+                                                 struct qeth_reply *cb_reply,
+                                                 unsigned long cb_cmd),
+                                 void *reply_param)
 {
        struct qeth_channel *channel = iob->channel;
        int rc;
@@ -2031,12 +2065,15 @@ int qeth_send_control_data(struct qeth_card *card, int len,
        }
        reply = qeth_alloc_reply(card);
        if (!reply) {
+               qeth_release_buffer(channel, iob);
                return -ENOMEM;
        }
        reply->callback = reply_cb;
        reply->param = reply_param;
 
-       init_waitqueue_head(&reply->wait_q);
+       /* pairs with qeth_release_buffer(): */
+       qeth_get_reply(reply);
+       iob->reply = reply;
 
        while (atomic_cmpxchg(&channel->irq_pending, 0, 1)) ;
 
@@ -2051,9 +2088,7 @@ int qeth_send_control_data(struct qeth_card *card, int len,
        }
        qeth_prepare_control_data(card, len, iob);
 
-       spin_lock_irq(&card->lock);
-       list_add_tail(&reply->list, &card->cmd_waiter_list);
-       spin_unlock_irq(&card->lock);
+       qeth_enqueue_reply(card, reply);
 
        timeout = jiffies + event_timeout;
 
@@ -2066,10 +2101,8 @@ int qeth_send_control_data(struct qeth_card *card, int len,
                QETH_DBF_MESSAGE(2, "qeth_send_control_data on device %x: ccw_device_start rc = %i\n",
                                 CARD_DEVID(card), rc);
                QETH_CARD_TEXT_(card, 2, " err%d", rc);
-               spin_lock_irq(&card->lock);
-               list_del_init(&reply->list);
+               qeth_dequeue_reply(card, reply);
                qeth_put_reply(reply);
-               spin_unlock_irq(&card->lock);
                qeth_release_buffer(channel, iob);
                atomic_set(&channel->irq_pending, 0);
                wake_up(&card->wait_q);
@@ -2091,21 +2124,16 @@ int qeth_send_control_data(struct qeth_card *card, int len,
                }
        }
 
+       qeth_dequeue_reply(card, reply);
        rc = reply->rc;
        qeth_put_reply(reply);
        return rc;
 
 time_err:
-       reply->rc = -ETIME;
-       spin_lock_irq(&card->lock);
-       list_del_init(&reply->list);
-       spin_unlock_irq(&card->lock);
-       atomic_inc(&reply->received);
-       rc = reply->rc;
+       qeth_dequeue_reply(card, reply);
        qeth_put_reply(reply);
-       return rc;
+       return -ETIME;
 }
-EXPORT_SYMBOL_GPL(qeth_send_control_data);
 
 static int qeth_cm_enable_cb(struct qeth_card *card, struct qeth_reply *reply,
                unsigned long data)
@@ -2310,9 +2338,8 @@ static int qeth_ulp_setup_cb(struct qeth_card *card, struct qeth_reply *reply,
                QETH_DBF_TEXT(SETUP, 2, "olmlimit");
                dev_err(&card->gdev->dev, "A connection could not be "
                        "established because of an OLM limit\n");
-               iob->rc = -EMLINK;
+               return -EMLINK;
        }
-       QETH_DBF_TEXT_(SETUP, 2, "  rc%d", iob->rc);
        return 0;
 }
 
@@ -2362,11 +2389,12 @@ static int qeth_init_qdio_out_buf(struct qeth_qdio_out_q *q, int bidx)
        return 0;
 }
 
-static void qeth_free_qdio_out_buf(struct qeth_qdio_out_q *q)
+static void qeth_free_output_queue(struct qeth_qdio_out_q *q)
 {
        if (!q)
                return;
 
+       qeth_clear_outq_buffers(q, 1);
        qdio_free_buffers(q->qdio_bufs, QDIO_MAX_BUFFERS_PER_Q);
        kfree(q);
 }
@@ -2405,12 +2433,6 @@ static int qeth_alloc_qdio_buffers(struct qeth_card *card)
                goto out_freeinq;
 
        /* outbound */
-       card->qdio.out_qs =
-               kcalloc(card->qdio.no_out_queues,
-                       sizeof(struct qeth_qdio_out_q *),
-                       GFP_KERNEL);
-       if (!card->qdio.out_qs)
-               goto out_freepool;
        for (i = 0; i < card->qdio.no_out_queues; ++i) {
                card->qdio.out_qs[i] = qeth_alloc_qdio_out_buf();
                if (!card->qdio.out_qs[i])
@@ -2441,12 +2463,9 @@ out_freeoutqbufs:
        }
 out_freeoutq:
        while (i > 0) {
-               qeth_free_qdio_out_buf(card->qdio.out_qs[--i]);
-               qeth_clear_outq_buffers(card->qdio.out_qs[i], 1);
+               qeth_free_output_queue(card->qdio.out_qs[--i]);
+               card->qdio.out_qs[i] = NULL;
        }
-       kfree(card->qdio.out_qs);
-       card->qdio.out_qs = NULL;
-out_freepool:
        qeth_free_buffer_pool(card);
 out_freeinq:
        qeth_free_qdio_queue(card->qdio.in_q);
@@ -2475,13 +2494,9 @@ static void qeth_free_qdio_buffers(struct qeth_card *card)
        /* inbound buffer pool */
        qeth_free_buffer_pool(card);
        /* free outbound qdio_qs */
-       if (card->qdio.out_qs) {
-               for (i = 0; i < card->qdio.no_out_queues; ++i) {
-                       qeth_clear_outq_buffers(card->qdio.out_qs[i], 1);
-                       qeth_free_qdio_out_buf(card->qdio.out_qs[i]);
-               }
-               kfree(card->qdio.out_qs);
-               card->qdio.out_qs = NULL;
+       for (i = 0; i < card->qdio.no_out_queues; i++) {
+               qeth_free_output_queue(card->qdio.out_qs[i]);
+               card->qdio.out_qs[i] = NULL;
        }
 }
 
@@ -2688,8 +2703,7 @@ static struct qeth_buffer_pool_entry *qeth_find_free_buffer_pool_entry(
                        } else {
                                free_page((unsigned long)entry->elements[i]);
                                entry->elements[i] = page_address(page);
-                               if (card->options.performance_stats)
-                                       card->perf_stats.sg_alloc_page_rx++;
+                               QETH_CARD_STAT_INC(card, rx_sg_alloc_page);
                        }
                }
        }
@@ -2808,14 +2822,20 @@ static void qeth_fill_ipacmd_header(struct qeth_card *card,
        cmd->hdr.prot_version = prot;
 }
 
-void qeth_prepare_ipa_cmd(struct qeth_card *card, struct qeth_cmd_buffer *iob)
+void qeth_prepare_ipa_cmd(struct qeth_card *card, struct qeth_cmd_buffer *iob,
+                         u16 cmd_length)
 {
+       u16 total_length = IPA_PDU_HEADER_SIZE + cmd_length;
        u8 prot_type = qeth_mpc_select_prot_type(card);
 
        memcpy(iob->data, IPA_PDU_HEADER, IPA_PDU_HEADER_SIZE);
+       memcpy(QETH_IPA_PDU_LEN_TOTAL(iob->data), &total_length, 2);
        memcpy(QETH_IPA_CMD_PROT_TYPE(iob->data), &prot_type, 1);
+       memcpy(QETH_IPA_PDU_LEN_PDU1(iob->data), &cmd_length, 2);
+       memcpy(QETH_IPA_PDU_LEN_PDU2(iob->data), &cmd_length, 2);
        memcpy(QETH_IPA_CMD_DEST_ADDR(iob->data),
               &card->token.ulp_connection_r, QETH_MPC_TOKEN_LENGTH);
+       memcpy(QETH_IPA_PDU_LEN_PDU3(iob->data), &cmd_length, 2);
 }
 EXPORT_SYMBOL_GPL(qeth_prepare_ipa_cmd);
 
@@ -2826,7 +2846,7 @@ struct qeth_cmd_buffer *qeth_get_ipacmd_buffer(struct qeth_card *card,
 
        iob = qeth_get_buffer(&card->write);
        if (iob) {
-               qeth_prepare_ipa_cmd(card, iob);
+               qeth_prepare_ipa_cmd(card, iob, sizeof(struct qeth_ipa_cmd));
                qeth_fill_ipacmd_header(card, __ipa_cmd(iob), ipacmd, prot);
        } else {
                dev_warn(&card->gdev->dev,
@@ -2839,6 +2859,14 @@ struct qeth_cmd_buffer *qeth_get_ipacmd_buffer(struct qeth_card *card,
 }
 EXPORT_SYMBOL_GPL(qeth_get_ipacmd_buffer);
 
+static int qeth_send_ipa_cmd_cb(struct qeth_card *card,
+                               struct qeth_reply *reply, unsigned long data)
+{
+       struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data;
+
+       return (cmd->hdr.return_code) ? -EIO : 0;
+}
+
 /**
  * qeth_send_ipa_cmd() - send an IPA command
  *
@@ -2850,11 +2878,15 @@ int qeth_send_ipa_cmd(struct qeth_card *card, struct qeth_cmd_buffer *iob,
                        unsigned long),
                void *reply_param)
 {
+       u16 length;
        int rc;
 
        QETH_CARD_TEXT(card, 4, "sendipa");
-       rc = qeth_send_control_data(card, IPA_CMD_LENGTH,
-                                               iob, reply_cb, reply_param);
+
+       if (reply_cb == NULL)
+               reply_cb = qeth_send_ipa_cmd_cb;
+       memcpy(&length, QETH_IPA_PDU_LEN_TOTAL(iob->data), 2);
+       rc = qeth_send_control_data(card, length, iob, reply_cb, reply_param);
        if (rc == -ETIME) {
                qeth_clear_ipacmd_list(card);
                qeth_schedule_recovery(card);
@@ -2863,9 +2895,19 @@ int qeth_send_ipa_cmd(struct qeth_card *card, struct qeth_cmd_buffer *iob,
 }
 EXPORT_SYMBOL_GPL(qeth_send_ipa_cmd);
 
+static int qeth_send_startlan_cb(struct qeth_card *card,
+                                struct qeth_reply *reply, unsigned long data)
+{
+       struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data;
+
+       if (cmd->hdr.return_code == IPA_RC_LAN_OFFLINE)
+               return -ENETDOWN;
+
+       return (cmd->hdr.return_code) ? -EIO : 0;
+}
+
 static int qeth_send_startlan(struct qeth_card *card)
 {
-       int rc;
        struct qeth_cmd_buffer *iob;
 
        QETH_DBF_TEXT(SETUP, 2, "strtlan");
@@ -2873,8 +2915,7 @@ static int qeth_send_startlan(struct qeth_card *card)
        iob = qeth_get_ipacmd_buffer(card, IPA_CMD_STARTLAN, 0);
        if (!iob)
                return -ENOMEM;
-       rc = qeth_send_ipa_cmd(card, iob, NULL, NULL);
-       return rc;
+       return qeth_send_ipa_cmd(card, iob, qeth_send_startlan_cb, NULL);
 }
 
 static int qeth_setadpparms_inspect_rc(struct qeth_ipa_cmd *cmd)
@@ -2892,7 +2933,7 @@ static int qeth_query_setadapterparms_cb(struct qeth_card *card,
 
        QETH_CARD_TEXT(card, 3, "quyadpcb");
        if (qeth_setadpparms_inspect_rc(cmd))
-               return 0;
+               return -EIO;
 
        if (cmd->data.setadapterparms.data.query_cmds_supp.lan_type & 0x7f) {
                card->info.link_type =
@@ -2947,19 +2988,18 @@ static int qeth_query_ipassists_cb(struct qeth_card *card,
        cmd = (struct qeth_ipa_cmd *) data;
 
        switch (cmd->hdr.return_code) {
+       case IPA_RC_SUCCESS:
+               break;
        case IPA_RC_NOTSUPP:
        case IPA_RC_L2_UNSUPPORTED_CMD:
                QETH_DBF_TEXT(SETUP, 2, "ipaunsup");
                card->options.ipa4.supported_funcs |= IPA_SETADAPTERPARMS;
                card->options.ipa6.supported_funcs |= IPA_SETADAPTERPARMS;
-               return 0;
+               return -EOPNOTSUPP;
        default:
-               if (cmd->hdr.return_code) {
-                       QETH_DBF_MESSAGE(1, "IPA_CMD_QIPASSIST on device %x: Unhandled rc=%#x\n",
-                                        CARD_DEVID(card),
-                                        cmd->hdr.return_code);
-                       return 0;
-               }
+               QETH_DBF_MESSAGE(1, "IPA_CMD_QIPASSIST on device %x: Unhandled rc=%#x\n",
+                                CARD_DEVID(card), cmd->hdr.return_code);
+               return -EIO;
        }
 
        if (cmd->hdr.prot_version == QETH_PROT_IPV4) {
@@ -2997,7 +3037,7 @@ static int qeth_query_switch_attributes_cb(struct qeth_card *card,
 
        QETH_CARD_TEXT(card, 2, "qswiatcb");
        if (qeth_setadpparms_inspect_rc(cmd))
-               return 0;
+               return -EIO;
 
        sw_info = (struct qeth_switch_info *)reply->param;
        attrs = &cmd->data.setadapterparms.data.query_switch_attributes;
@@ -3029,15 +3069,15 @@ int qeth_query_switch_attributes(struct qeth_card *card,
 static int qeth_query_setdiagass_cb(struct qeth_card *card,
                struct qeth_reply *reply, unsigned long data)
 {
-       struct qeth_ipa_cmd *cmd;
-       __u16 rc;
+       struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data;
+       u16 rc = cmd->hdr.return_code;
 
-       cmd = (struct qeth_ipa_cmd *)data;
-       rc = cmd->hdr.return_code;
-       if (rc)
+       if (rc) {
                QETH_CARD_TEXT_(card, 2, "diagq:%x", rc);
-       else
-               card->info.diagass_support = cmd->data.diagass.ext;
+               return -EIO;
+       }
+
+       card->info.diagass_support = cmd->data.diagass.ext;
        return 0;
 }
 
@@ -3084,13 +3124,13 @@ static void qeth_get_trap_id(struct qeth_card *card, struct qeth_trap_id *tid)
 static int qeth_hw_trap_cb(struct qeth_card *card,
                struct qeth_reply *reply, unsigned long data)
 {
-       struct qeth_ipa_cmd *cmd;
-       __u16 rc;
+       struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data;
+       u16 rc = cmd->hdr.return_code;
 
-       cmd = (struct qeth_ipa_cmd *)data;
-       rc = cmd->hdr.return_code;
-       if (rc)
+       if (rc) {
                QETH_CARD_TEXT_(card, 2, "trapc:%x", rc);
+               return -EIO;
+       }
        return 0;
 }
 
@@ -3139,7 +3179,7 @@ static int qeth_check_qdio_errors(struct qeth_card *card,
                               buf->element[14].sflags);
                QETH_CARD_TEXT_(card, 2, " qerr=%X", qdio_error);
                if ((buf->element[15].sflags) == 0x12) {
-                       card->stats.rx_dropped++;
+                       QETH_CARD_STAT_INC(card, rx_dropped);
                        return 0;
                } else
                        return 1;
@@ -3203,17 +3243,8 @@ static void qeth_queue_input_buffer(struct qeth_card *card, int index)
                 * 'index') un-requeued -> this buffer is the first buffer that
                 * will be requeued the next time
                 */
-               if (card->options.performance_stats) {
-                       card->perf_stats.inbound_do_qdio_cnt++;
-                       card->perf_stats.inbound_do_qdio_start_time =
-                               qeth_get_micros();
-               }
                rc = do_QDIO(CARD_DDEV(card), QDIO_FLAG_SYNC_INPUT, 0,
                             queue->next_buf_to_init, count);
-               if (card->options.performance_stats)
-                       card->perf_stats.inbound_do_qdio_time +=
-                               qeth_get_micros() -
-                               card->perf_stats.inbound_do_qdio_start_time;
                if (rc) {
                        QETH_CARD_TEXT(card, 2, "qinberr");
                }
@@ -3290,8 +3321,7 @@ static void qeth_switch_to_packing_if_needed(struct qeth_qdio_out_q *queue)
                    >= QETH_HIGH_WATERMARK_PACK){
                        /* switch non-PACKING -> PACKING */
                        QETH_CARD_TEXT(queue->card, 6, "np->pack");
-                       if (queue->card->options.performance_stats)
-                               queue->card->perf_stats.sc_dp_p++;
+                       QETH_TXQ_STAT_INC(queue, packing_mode_switch);
                        queue->do_pack = 1;
                }
        }
@@ -3310,8 +3340,7 @@ static int qeth_switch_to_nonpacking_if_needed(struct qeth_qdio_out_q *queue)
                    <= QETH_LOW_WATERMARK_PACK) {
                        /* switch PACKING -> non-PACKING */
                        QETH_CARD_TEXT(queue->card, 6, "pack->np");
-                       if (queue->card->options.performance_stats)
-                               queue->card->perf_stats.sc_p_dp++;
+                       QETH_TXQ_STAT_INC(queue, packing_mode_switch);
                        queue->do_pack = 0;
                        return qeth_prep_flush_pack_buffer(queue);
                }
@@ -3365,25 +3394,16 @@ static void qeth_flush_buffers(struct qeth_qdio_out_q *queue, int index,
                }
        }
 
+       QETH_TXQ_STAT_ADD(queue, bufs, count);
        netif_trans_update(queue->card->dev);
-       if (queue->card->options.performance_stats) {
-               queue->card->perf_stats.outbound_do_qdio_cnt++;
-               queue->card->perf_stats.outbound_do_qdio_start_time =
-                       qeth_get_micros();
-       }
        qdio_flags = QDIO_FLAG_SYNC_OUTPUT;
        if (atomic_read(&queue->set_pci_flags_count))
                qdio_flags |= QDIO_FLAG_PCI_OUT;
        atomic_add(count, &queue->used_buffers);
-
        rc = do_QDIO(CARD_DDEV(queue->card), qdio_flags,
                     queue->queue_no, index, count);
-       if (queue->card->options.performance_stats)
-               queue->card->perf_stats.outbound_do_qdio_time +=
-                       qeth_get_micros() -
-                       queue->card->perf_stats.outbound_do_qdio_start_time;
        if (rc) {
-               queue->card->stats.tx_errors += count;
+               QETH_TXQ_STAT_ADD(queue, tx_errors, count);
                /* ignore temporary SIGA errors without busy condition */
                if (rc == -ENOBUFS)
                        return;
@@ -3398,8 +3418,6 @@ static void qeth_flush_buffers(struct qeth_qdio_out_q *queue, int index,
                qeth_schedule_recovery(queue->card);
                return;
        }
-       if (queue->card->options.performance_stats)
-               queue->card->perf_stats.bufs_sent += count;
 }
 
 static void qeth_check_outbound_queue(struct qeth_qdio_out_q *queue)
@@ -3430,10 +3448,8 @@ static void qeth_check_outbound_queue(struct qeth_qdio_out_q *queue)
                        if (!flush_cnt &&
                            !atomic_read(&queue->set_pci_flags_count))
                                flush_cnt += qeth_prep_flush_pack_buffer(queue);
-                       if (queue->card->options.performance_stats &&
-                           q_was_packing)
-                               queue->card->perf_stats.bufs_sent_pack +=
-                                       flush_cnt;
+                       if (q_was_packing)
+                               QETH_TXQ_STAT_ADD(queue, bufs_pack, flush_cnt);
                        if (flush_cnt)
                                qeth_flush_buffers(queue, index, flush_cnt);
                        atomic_set(&queue->state, QETH_OUT_Q_UNLOCKED);
@@ -3488,7 +3504,7 @@ static void qeth_qdio_cq_handler(struct qeth_card *card, unsigned int qdio_err,
        int rc;
 
        if (!qeth_is_cq(card, queue))
-               goto out;
+               return;
 
        QETH_CARD_TEXT_(card, 5, "qcqhe%d", first_element);
        QETH_CARD_TEXT_(card, 5, "qcqhc%d", count);
@@ -3497,12 +3513,7 @@ static void qeth_qdio_cq_handler(struct qeth_card *card, unsigned int qdio_err,
        if (qdio_err) {
                netif_stop_queue(card->dev);
                qeth_schedule_recovery(card);
-               goto out;
-       }
-
-       if (card->options.performance_stats) {
-               card->perf_stats.cq_cnt++;
-               card->perf_stats.cq_start_time = qeth_get_micros();
+               return;
        }
 
        for (i = first_element; i < first_element + count; ++i) {
@@ -3530,14 +3541,6 @@ static void qeth_qdio_cq_handler(struct qeth_card *card, unsigned int qdio_err,
        }
        card->qdio.c_q->next_buf_to_init = (card->qdio.c_q->next_buf_to_init
                                   + count) % QDIO_MAX_BUFFERS_PER_Q;
-
-       if (card->options.performance_stats) {
-               int delta_t = qeth_get_micros();
-               delta_t -= card->perf_stats.cq_start_time;
-               card->perf_stats.cq_time += delta_t;
-       }
-out:
-       return;
 }
 
 static void qeth_qdio_input_handler(struct ccw_device *ccwdev,
@@ -3573,11 +3576,7 @@ static void qeth_qdio_output_handler(struct ccw_device *ccwdev,
                qeth_schedule_recovery(card);
                return;
        }
-       if (card->options.performance_stats) {
-               card->perf_stats.outbound_handler_cnt++;
-               card->perf_stats.outbound_handler_start_time =
-                       qeth_get_micros();
-       }
+
        for (i = first_element; i < (first_element + count); ++i) {
                int bidx = i % QDIO_MAX_BUFFERS_PER_Q;
                buffer = queue->bufs[bidx];
@@ -3623,9 +3622,6 @@ static void qeth_qdio_output_handler(struct ccw_device *ccwdev,
                qeth_check_outbound_queue(queue);
 
        netif_wake_queue(queue->card->dev);
-       if (card->options.performance_stats)
-               card->perf_stats.outbound_handler_time += qeth_get_micros() -
-                       card->perf_stats.outbound_handler_start_time;
 }
 
 /* We cannot use outbound queue 3 for unicast packets on HiperSockets */
@@ -3746,11 +3742,12 @@ EXPORT_SYMBOL_GPL(qeth_count_elements);
  * The number of needed buffer elements is returned in @elements.
  * Error to create the hdr is indicated by returning with < 0.
  */
-static int qeth_add_hw_header(struct qeth_card *card, struct sk_buff *skb,
-                             struct qeth_hdr **hdr, unsigned int hdr_len,
-                             unsigned int proto_len, unsigned int *elements)
+static int qeth_add_hw_header(struct qeth_qdio_out_q *queue,
+                             struct sk_buff *skb, struct qeth_hdr **hdr,
+                             unsigned int hdr_len, unsigned int proto_len,
+                             unsigned int *elements)
 {
-       const unsigned int max_elements = QETH_MAX_BUFFER_ELEMENTS(card);
+       const unsigned int max_elements = QETH_MAX_BUFFER_ELEMENTS(queue->card);
        const unsigned int contiguous = proto_len ? proto_len : 1;
        unsigned int __elements;
        addr_t start, end;
@@ -3789,15 +3786,12 @@ check_layout:
                }
 
                rc = skb_linearize(skb);
-               if (card->options.performance_stats) {
-                       if (rc)
-                               card->perf_stats.tx_linfail++;
-                       else
-                               card->perf_stats.tx_lin++;
-               }
-               if (rc)
+               if (rc) {
+                       QETH_TXQ_STAT_INC(queue, skbs_linearized_fail);
                        return rc;
+               }
 
+               QETH_TXQ_STAT_INC(queue, skbs_linearized);
                /* Linearization changed the layout, re-evaluate: */
                goto check_layout;
        }
@@ -3921,9 +3915,8 @@ static int qeth_fill_buffer(struct qeth_qdio_out_q *queue,
                QETH_CARD_TEXT(queue->card, 6, "fillbfnp");
        } else {
                QETH_CARD_TEXT(queue->card, 6, "fillbfpa");
-               if (queue->card->options.performance_stats)
-                       queue->card->perf_stats.skbs_sent_pack++;
 
+               QETH_TXQ_STAT_INC(queue, skbs_pack);
                /* If the buffer still has free elements, keep using it. */
                if (buf->next_element_to_fill <
                    QETH_MAX_BUFFER_ELEMENTS(queue->card))
@@ -4037,8 +4030,8 @@ int qeth_do_send_packet(struct qeth_card *card, struct qeth_qdio_out_q *queue,
        }
 out:
        /* at this point the queue is UNLOCKED again */
-       if (queue->card->options.performance_stats && do_pack)
-               queue->card->perf_stats.bufs_sent_pack += flush_count;
+       if (do_pack)
+               QETH_TXQ_STAT_ADD(queue, bufs_pack, flush_count);
 
        return rc;
 }
@@ -4062,8 +4055,9 @@ static void qeth_fill_tso_ext(struct qeth_hdr_tso *hdr,
 
 int qeth_xmit(struct qeth_card *card, struct sk_buff *skb,
              struct qeth_qdio_out_q *queue, int ipv, int cast_type,
-             void (*fill_header)(struct qeth_card *card, struct qeth_hdr *hdr,
-                                 struct sk_buff *skb, int ipv, int cast_type,
+             void (*fill_header)(struct qeth_qdio_out_q *queue,
+                                 struct qeth_hdr *hdr, struct sk_buff *skb,
+                                 int ipv, int cast_type,
                                  unsigned int data_len))
 {
        unsigned int proto_len, hw_hdr_len;
@@ -4088,7 +4082,7 @@ int qeth_xmit(struct qeth_card *card, struct sk_buff *skb,
        if (rc)
                return rc;
 
-       push_len = qeth_add_hw_header(card, skb, &hdr, hw_hdr_len, proto_len,
+       push_len = qeth_add_hw_header(queue, skb, &hdr, hw_hdr_len, proto_len,
                                      &elements);
        if (push_len < 0)
                return push_len;
@@ -4098,7 +4092,7 @@ int qeth_xmit(struct qeth_card *card, struct sk_buff *skb,
                data_offset = push_len + proto_len;
        }
        memset(hdr, 0, hw_hdr_len);
-       fill_header(card, hdr, skb, ipv, cast_type, frame_len);
+       fill_header(queue, hdr, skb, ipv, cast_type, frame_len);
        if (is_tso)
                qeth_fill_tso_ext((struct qeth_hdr_tso *) hdr,
                                  frame_len - proto_len, skb, proto_len);
@@ -4115,14 +4109,12 @@ int qeth_xmit(struct qeth_card *card, struct sk_buff *skb,
        }
 
        if (!rc) {
-               if (card->options.performance_stats) {
-                       card->perf_stats.buf_elements_sent += elements;
-                       if (is_sg)
-                               card->perf_stats.sg_skbs_sent++;
-                       if (is_tso) {
-                               card->perf_stats.large_send_bytes += frame_len;
-                               card->perf_stats.large_send_cnt++;
-                       }
+               QETH_TXQ_STAT_ADD(queue, buf_elements, elements);
+               if (is_sg)
+                       QETH_TXQ_STAT_INC(queue, skbs_sg);
+               if (is_tso) {
+                       QETH_TXQ_STAT_INC(queue, skbs_tso);
+                       QETH_TXQ_STAT_ADD(queue, tso_bytes, frame_len);
                }
        } else {
                if (!push_len)
@@ -4149,7 +4141,7 @@ static int qeth_setadp_promisc_mode_cb(struct qeth_card *card,
                setparms->data.mode = SET_PROMISC_MODE_OFF;
        }
        card->info.promisc_mode = setparms->data.mode;
-       return 0;
+       return (cmd->hdr.return_code) ? -EIO : 0;
 }
 
 void qeth_setadp_promisc_mode(struct qeth_card *card)
@@ -4181,18 +4173,6 @@ void qeth_setadp_promisc_mode(struct qeth_card *card)
 }
 EXPORT_SYMBOL_GPL(qeth_setadp_promisc_mode);
 
-struct net_device_stats *qeth_get_stats(struct net_device *dev)
-{
-       struct qeth_card *card;
-
-       card = dev->ml_priv;
-
-       QETH_CARD_TEXT(card, 5, "getstat");
-
-       return &card->stats;
-}
-EXPORT_SYMBOL_GPL(qeth_get_stats);
-
 static int qeth_setadpparms_change_macaddr_cb(struct qeth_card *card,
                struct qeth_reply *reply, unsigned long data)
 {
@@ -4201,12 +4181,15 @@ static int qeth_setadpparms_change_macaddr_cb(struct qeth_card *card,
 
        QETH_CARD_TEXT(card, 4, "chgmaccb");
        if (qeth_setadpparms_inspect_rc(cmd))
-               return 0;
+               return -EIO;
 
        adp_cmd = &cmd->data.setadapterparms;
+       if (!is_valid_ether_addr(adp_cmd->data.change_addr.addr))
+               return -EADDRNOTAVAIL;
+
        if (IS_LAYER2(card) && IS_OSD(card) && !IS_VM_NIC(card) &&
            !(adp_cmd->hdr.flags & QETH_SETADP_FLAGS_VIRTUAL_MAC))
-               return 0;
+               return -EADDRNOTAVAIL;
 
        ether_addr_copy(card->dev->dev_addr, adp_cmd->data.change_addr.addr);
        return 0;
@@ -4245,7 +4228,7 @@ static int qeth_setadpparms_set_access_ctrl_cb(struct qeth_card *card,
 
        QETH_CARD_TEXT(card, 4, "setaccb");
        if (cmd->hdr.return_code)
-               return 0;
+               return -EIO;
        qeth_setadpparms_inspect_rc(cmd);
 
        access_ctrl_req = &cmd->data.setadapterparms.data.set_access_ctrl;
@@ -4319,7 +4302,7 @@ static int qeth_setadpparms_set_access_ctrl_cb(struct qeth_card *card,
                        card->options.isolation = card->options.prev_isolation;
                break;
        }
-       return 0;
+       return (cmd->hdr.return_code) ? -EIO : 0;
 }
 
 static int qeth_setadpparms_set_access_ctrl(struct qeth_card *card,
@@ -4383,7 +4366,7 @@ void qeth_tx_timeout(struct net_device *dev)
 
        card = dev->ml_priv;
        QETH_CARD_TEXT(card, 4, "txtimeo");
-       card->stats.tx_errors++;
+       QETH_CARD_STAT_INC(card, tx_errors);
        qeth_schedule_recovery(card);
 }
 EXPORT_SYMBOL_GPL(qeth_tx_timeout);
@@ -4453,27 +4436,6 @@ static int qeth_mdio_read(struct net_device *dev, int phy_id, int regnum)
        return rc;
 }
 
-static int qeth_send_ipa_snmp_cmd(struct qeth_card *card,
-               struct qeth_cmd_buffer *iob, int len,
-               int (*reply_cb)(struct qeth_card *, struct qeth_reply *,
-                       unsigned long),
-               void *reply_param)
-{
-       u16 s1, s2;
-
-       QETH_CARD_TEXT(card, 4, "sendsnmp");
-
-       /* adjust PDU length fields in IPA_PDU_HEADER */
-       s1 = (u32) IPA_PDU_HEADER_SIZE + len;
-       s2 = (u32) len;
-       memcpy(QETH_IPA_PDU_LEN_TOTAL(iob->data), &s1, 2);
-       memcpy(QETH_IPA_PDU_LEN_PDU1(iob->data), &s2, 2);
-       memcpy(QETH_IPA_PDU_LEN_PDU2(iob->data), &s2, 2);
-       memcpy(QETH_IPA_PDU_LEN_PDU3(iob->data), &s2, 2);
-       return qeth_send_control_data(card, IPA_PDU_HEADER_SIZE + len, iob,
-                                     reply_cb, reply_param);
-}
-
 static int qeth_snmp_command_cb(struct qeth_card *card,
                struct qeth_reply *reply, unsigned long sdata)
 {
@@ -4491,13 +4453,13 @@ static int qeth_snmp_command_cb(struct qeth_card *card,
 
        if (cmd->hdr.return_code) {
                QETH_CARD_TEXT_(card, 4, "scer1%x", cmd->hdr.return_code);
-               return 0;
+               return -EIO;
        }
        if (cmd->data.setadapterparms.hdr.return_code) {
                cmd->hdr.return_code =
                        cmd->data.setadapterparms.hdr.return_code;
                QETH_CARD_TEXT_(card, 4, "scer2%x", cmd->hdr.return_code);
-               return 0;
+               return -EIO;
        }
        data_len = *((__u16 *)QETH_IPA_PDU_LEN_PDU1(data));
        if (cmd->data.setadapterparms.hdr.seq_no == 1) {
@@ -4512,9 +4474,8 @@ static int qeth_snmp_command_cb(struct qeth_card *card,
 
        /* check if there is enough room in userspace */
        if ((qinfo->udata_len - qinfo->udata_offset) < data_len) {
-               QETH_CARD_TEXT_(card, 4, "scer3%i", -ENOMEM);
-               cmd->hdr.return_code = IPA_RC_ENOMEM;
-               return 0;
+               QETH_CARD_TEXT_(card, 4, "scer3%i", -ENOSPC);
+               return -ENOSPC;
        }
        QETH_CARD_TEXT_(card, 4, "snore%i",
                       cmd->data.setadapterparms.hdr.used_total);
@@ -4579,10 +4540,13 @@ static int qeth_snmp_command(struct qeth_card *card, char __user *udata)
                rc = -ENOMEM;
                goto out;
        }
+
+       /* for large requests, fix-up the length fields: */
+       qeth_prepare_ipa_cmd(card, iob, QETH_SETADP_BASE_LEN + req_len);
+
        cmd = __ipa_cmd(iob);
        memcpy(&cmd->data.setadapterparms.data.snmp, &ureq->cmd, req_len);
-       rc = qeth_send_ipa_snmp_cmd(card, iob, QETH_SETADP_BASE_LEN + req_len,
-                                   qeth_snmp_command_cb, (void *)&qinfo);
+       rc = qeth_send_ipa_cmd(card, iob, qeth_snmp_command_cb, &qinfo);
        if (rc)
                QETH_DBF_MESSAGE(2, "SNMP command failed on device %x: (%#x)\n",
                                 CARD_DEVID(card), rc);
@@ -4606,16 +4570,14 @@ static int qeth_setadpparms_query_oat_cb(struct qeth_card *card,
 
        QETH_CARD_TEXT(card, 3, "qoatcb");
        if (qeth_setadpparms_inspect_rc(cmd))
-               return 0;
+               return -EIO;
 
        priv = (struct qeth_qoat_priv *)reply->param;
        resdatalen = cmd->data.setadapterparms.hdr.cmdlength;
        resdata = (char *)data + 28;
 
-       if (resdatalen > (priv->buffer_len - priv->response_len)) {
-               cmd->hdr.return_code = IPA_RC_FFFF;
-               return 0;
-       }
+       if (resdatalen > (priv->buffer_len - priv->response_len))
+               return -ENOSPC;
 
        memcpy((priv->buffer + priv->response_len), resdata,
                resdatalen);
@@ -4688,9 +4650,7 @@ static int qeth_query_oat_command(struct qeth_card *card, char __user *udata)
                if (copy_to_user(udata, &oat_data,
                    sizeof(struct qeth_query_oat_data)))
                        rc = -EFAULT;
-       } else
-               if (rc == IPA_RC_FFFF)
-                       rc = -EFAULT;
+       }
 
 out_free:
        vfree(priv.buffer);
@@ -4707,7 +4667,7 @@ static int qeth_query_card_info_cb(struct qeth_card *card,
 
        QETH_CARD_TEXT(card, 2, "qcrdincb");
        if (qeth_setadpparms_inspect_rc(cmd))
-               return 0;
+               return -EIO;
 
        card_info = &cmd->data.setadapterparms.data.card_info;
        carrier_info->card_type = card_info->card_type;
@@ -4716,8 +4676,8 @@ static int qeth_query_card_info_cb(struct qeth_card *card,
        return 0;
 }
 
-static int qeth_query_card_info(struct qeth_card *card,
-                               struct carrier_info *carrier_info)
+int qeth_query_card_info(struct qeth_card *card,
+                        struct carrier_info *carrier_info)
 {
        struct qeth_cmd_buffer *iob;
 
@@ -4994,6 +4954,7 @@ static void qeth_core_free_card(struct qeth_card *card)
        qeth_clean_channel(&card->read);
        qeth_clean_channel(&card->write);
        qeth_clean_channel(&card->data);
+       destroy_workqueue(card->event_wq);
        qeth_free_qdio_buffers(card);
        unregister_service_level(&card->qeth_service_level);
        dev_set_drvdata(&card->gdev->dev, NULL);
@@ -5108,12 +5069,10 @@ retriable:
        rc = qeth_send_startlan(card);
        if (rc) {
                QETH_DBF_TEXT_(SETUP, 2, "6err%d", rc);
-               if (rc == IPA_RC_LAN_OFFLINE) {
-                       dev_warn(&card->gdev->dev,
-                               "The LAN is offline\n");
+               if (rc == -ENETDOWN) {
+                       dev_warn(&card->gdev->dev, "The LAN is offline\n");
                        *carrier_ok = false;
                } else {
-                       rc = -ENODEV;
                        goto out;
                }
        } else {
@@ -5265,7 +5224,7 @@ struct sk_buff *qeth_core_get_next_skb(struct qeth_card *card,
                                QETH_CARD_TEXT(card, 4, "unexeob");
                                QETH_CARD_HEX(card, 2, buffer, sizeof(void *));
                                dev_kfree_skb_any(skb);
-                               card->stats.rx_errors++;
+                               QETH_CARD_STAT_INC(card, rx_errors);
                                return NULL;
                        }
                        element++;
@@ -5277,16 +5236,17 @@ struct sk_buff *qeth_core_get_next_skb(struct qeth_card *card,
        }
        *__element = element;
        *__offset = offset;
-       if (use_rx_sg && card->options.performance_stats) {
-               card->perf_stats.sg_skbs_rx++;
-               card->perf_stats.sg_frags_rx += skb_shinfo(skb)->nr_frags;
+       if (use_rx_sg) {
+               QETH_CARD_STAT_INC(card, rx_sg_skbs);
+               QETH_CARD_STAT_ADD(card, rx_sg_frags,
+                                  skb_shinfo(skb)->nr_frags);
        }
        return skb;
 no_mem:
        if (net_ratelimit()) {
                QETH_CARD_TEXT(card, 2, "noskbmem");
        }
-       card->stats.rx_dropped++;
+       QETH_CARD_STAT_INC(card, rx_dropped);
        return NULL;
 }
 EXPORT_SYMBOL_GPL(qeth_core_get_next_skb);
@@ -5299,11 +5259,6 @@ int qeth_poll(struct napi_struct *napi, int budget)
        int done;
        int new_budget = budget;
 
-       if (card->options.performance_stats) {
-               card->perf_stats.inbound_cnt++;
-               card->perf_stats.inbound_start_time = qeth_get_micros();
-       }
-
        while (1) {
                if (!card->rx.b_count) {
                        card->rx.qdio_err = 0;
@@ -5332,8 +5287,7 @@ int qeth_poll(struct napi_struct *napi, int budget)
                                done = 1;
 
                        if (done) {
-                               if (card->options.performance_stats)
-                                       card->perf_stats.bufs_rec++;
+                               QETH_CARD_STAT_INC(card, rx_bufs);
                                qeth_put_buffer_pool_entry(card,
                                        buffer->pool_entry);
                                qeth_queue_input_buffer(card, card->rx.b_index);
@@ -5361,9 +5315,6 @@ int qeth_poll(struct napi_struct *napi, int budget)
        if (qdio_start_irq(card->data.ccwdev, 0))
                napi_schedule(&card->napi);
 out:
-       if (card->options.performance_stats)
-               card->perf_stats.inbound_time += qeth_get_micros() -
-                       card->perf_stats.inbound_start_time;
        return work_done;
 }
 EXPORT_SYMBOL_GPL(qeth_poll);
@@ -5383,7 +5334,7 @@ static int qeth_setassparms_get_caps_cb(struct qeth_card *card,
        struct qeth_ipa_caps *caps = reply->param;
 
        if (qeth_setassparms_inspect_rc(cmd))
-               return 0;
+               return -EIO;
 
        caps->supported = cmd->data.setassparms.data.caps.supported;
        caps->enabled = cmd->data.setassparms.data.caps.enabled;
@@ -5393,18 +5344,18 @@ static int qeth_setassparms_get_caps_cb(struct qeth_card *card,
 int qeth_setassparms_cb(struct qeth_card *card,
                        struct qeth_reply *reply, unsigned long data)
 {
-       struct qeth_ipa_cmd *cmd;
+       struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data;
 
        QETH_CARD_TEXT(card, 4, "defadpcb");
 
-       cmd = (struct qeth_ipa_cmd *) data;
-       if (cmd->hdr.return_code == 0) {
-               cmd->hdr.return_code = cmd->data.setassparms.hdr.return_code;
-               if (cmd->hdr.prot_version == QETH_PROT_IPV4)
-                       card->options.ipa4.enabled_funcs = cmd->hdr.ipa_enabled;
-               if (cmd->hdr.prot_version == QETH_PROT_IPV6)
-                       card->options.ipa6.enabled_funcs = cmd->hdr.ipa_enabled;
-       }
+       if (cmd->hdr.return_code)
+               return -EIO;
+
+       cmd->hdr.return_code = cmd->data.setassparms.hdr.return_code;
+       if (cmd->hdr.prot_version == QETH_PROT_IPV4)
+               card->options.ipa4.enabled_funcs = cmd->hdr.ipa_enabled;
+       if (cmd->hdr.prot_version == QETH_PROT_IPV6)
+               card->options.ipa6.enabled_funcs = cmd->hdr.ipa_enabled;
        return 0;
 }
 EXPORT_SYMBOL_GPL(qeth_setassparms_cb);
@@ -5650,7 +5601,10 @@ static struct net_device *qeth_alloc_netdev(struct qeth_card *card)
        SET_NETDEV_DEV(dev, &card->gdev->dev);
        netif_carrier_off(dev);
 
-       if (!IS_OSN(card)) {
+       if (IS_OSN(card)) {
+               dev->ethtool_ops = &qeth_osn_ethtool_ops;
+       } else {
+               dev->ethtool_ops = &qeth_ethtool_ops;
                dev->priv_flags &= ~IFF_TX_SKB_SHARING;
                dev->hw_features |= NETIF_F_SG;
                dev->vlan_features |= NETIF_F_SG;
@@ -5896,9 +5850,6 @@ int qeth_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
        if (!card)
                return -ENODEV;
 
-       if (card->info.type == QETH_CARD_TYPE_OSN)
-               return -EPERM;
-
        switch (cmd) {
        case SIOC_QETH_ADP_SET_SNMP_CONTROL:
                rc = qeth_snmp_command(card, rq->ifr_ifru.ifru_data);
@@ -5938,452 +5889,91 @@ int qeth_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 }
 EXPORT_SYMBOL_GPL(qeth_do_ioctl);
 
-static struct {
-       const char str[ETH_GSTRING_LEN];
-} qeth_ethtool_stats_keys[] = {
-/*  0 */{"rx skbs"},
-       {"rx buffers"},
-       {"tx skbs"},
-       {"tx buffers"},
-       {"tx skbs no packing"},
-       {"tx buffers no packing"},
-       {"tx skbs packing"},
-       {"tx buffers packing"},
-       {"tx sg skbs"},
-       {"tx buffer elements"},
-/* 10 */{"rx sg skbs"},
-       {"rx sg frags"},
-       {"rx sg page allocs"},
-       {"tx large kbytes"},
-       {"tx large count"},
-       {"tx pk state ch n->p"},
-       {"tx pk state ch p->n"},
-       {"tx pk watermark low"},
-       {"tx pk watermark high"},
-       {"queue 0 buffer usage"},
-/* 20 */{"queue 1 buffer usage"},
-       {"queue 2 buffer usage"},
-       {"queue 3 buffer usage"},
-       {"rx poll time"},
-       {"rx poll count"},
-       {"rx do_QDIO time"},
-       {"rx do_QDIO count"},
-       {"tx handler time"},
-       {"tx handler count"},
-       {"tx time"},
-/* 30 */{"tx count"},
-       {"tx do_QDIO time"},
-       {"tx do_QDIO count"},
-       {"tx csum"},
-       {"tx lin"},
-       {"tx linfail"},
-       {"cq handler count"},
-       {"cq handler time"},
-       {"rx csum"}
-};
-
-int qeth_core_get_sset_count(struct net_device *dev, int stringset)
-{
-       switch (stringset) {
-       case ETH_SS_STATS:
-               return (sizeof(qeth_ethtool_stats_keys) / ETH_GSTRING_LEN);
-       default:
-               return -EINVAL;
-       }
-}
-EXPORT_SYMBOL_GPL(qeth_core_get_sset_count);
-
-void qeth_core_get_ethtool_stats(struct net_device *dev,
-               struct ethtool_stats *stats, u64 *data)
-{
-       struct qeth_card *card = dev->ml_priv;
-       data[0] = card->stats.rx_packets -
-                               card->perf_stats.initial_rx_packets;
-       data[1] = card->perf_stats.bufs_rec;
-       data[2] = card->stats.tx_packets -
-                               card->perf_stats.initial_tx_packets;
-       data[3] = card->perf_stats.bufs_sent;
-       data[4] = card->stats.tx_packets - card->perf_stats.initial_tx_packets
-                       - card->perf_stats.skbs_sent_pack;
-       data[5] = card->perf_stats.bufs_sent - card->perf_stats.bufs_sent_pack;
-       data[6] = card->perf_stats.skbs_sent_pack;
-       data[7] = card->perf_stats.bufs_sent_pack;
-       data[8] = card->perf_stats.sg_skbs_sent;
-       data[9] = card->perf_stats.buf_elements_sent;
-       data[10] = card->perf_stats.sg_skbs_rx;
-       data[11] = card->perf_stats.sg_frags_rx;
-       data[12] = card->perf_stats.sg_alloc_page_rx;
-       data[13] = (card->perf_stats.large_send_bytes >> 10);
-       data[14] = card->perf_stats.large_send_cnt;
-       data[15] = card->perf_stats.sc_dp_p;
-       data[16] = card->perf_stats.sc_p_dp;
-       data[17] = QETH_LOW_WATERMARK_PACK;
-       data[18] = QETH_HIGH_WATERMARK_PACK;
-       data[19] = atomic_read(&card->qdio.out_qs[0]->used_buffers);
-       data[20] = (card->qdio.no_out_queues > 1) ?
-                       atomic_read(&card->qdio.out_qs[1]->used_buffers) : 0;
-       data[21] = (card->qdio.no_out_queues > 2) ?
-                       atomic_read(&card->qdio.out_qs[2]->used_buffers) : 0;
-       data[22] = (card->qdio.no_out_queues > 3) ?
-                       atomic_read(&card->qdio.out_qs[3]->used_buffers) : 0;
-       data[23] = card->perf_stats.inbound_time;
-       data[24] = card->perf_stats.inbound_cnt;
-       data[25] = card->perf_stats.inbound_do_qdio_time;
-       data[26] = card->perf_stats.inbound_do_qdio_cnt;
-       data[27] = card->perf_stats.outbound_handler_time;
-       data[28] = card->perf_stats.outbound_handler_cnt;
-       data[29] = card->perf_stats.outbound_time;
-       data[30] = card->perf_stats.outbound_cnt;
-       data[31] = card->perf_stats.outbound_do_qdio_time;
-       data[32] = card->perf_stats.outbound_do_qdio_cnt;
-       data[33] = card->perf_stats.tx_csum;
-       data[34] = card->perf_stats.tx_lin;
-       data[35] = card->perf_stats.tx_linfail;
-       data[36] = card->perf_stats.cq_cnt;
-       data[37] = card->perf_stats.cq_time;
-       data[38] = card->perf_stats.rx_csum;
-}
-EXPORT_SYMBOL_GPL(qeth_core_get_ethtool_stats);
-
-void qeth_core_get_strings(struct net_device *dev, u32 stringset, u8 *data)
-{
-       switch (stringset) {
-       case ETH_SS_STATS:
-               memcpy(data, &qeth_ethtool_stats_keys,
-                       sizeof(qeth_ethtool_stats_keys));
-               break;
-       default:
-               WARN_ON(1);
-               break;
-       }
-}
-EXPORT_SYMBOL_GPL(qeth_core_get_strings);
-
-void qeth_core_get_drvinfo(struct net_device *dev,
-               struct ethtool_drvinfo *info)
-{
-       struct qeth_card *card = dev->ml_priv;
-
-       strlcpy(info->driver, IS_LAYER2(card) ? "qeth_l2" : "qeth_l3",
-               sizeof(info->driver));
-       strlcpy(info->version, "1.0", sizeof(info->version));
-       strlcpy(info->fw_version, card->info.mcl_level,
-               sizeof(info->fw_version));
-       snprintf(info->bus_info, sizeof(info->bus_info), "%s/%s/%s",
-                CARD_RDEV_ID(card), CARD_WDEV_ID(card), CARD_DDEV_ID(card));
-}
-EXPORT_SYMBOL_GPL(qeth_core_get_drvinfo);
-
-/* Helper function to fill 'advertising' and 'supported' which are the same. */
-/* Autoneg and full-duplex are supported and advertised unconditionally.     */
-/* Always advertise and support all speeds up to specified, and only one     */
-/* specified port type.                                                             */
-static void qeth_set_cmd_adv_sup(struct ethtool_link_ksettings *cmd,
-                               int maxspeed, int porttype)
-{
-       ethtool_link_ksettings_zero_link_mode(cmd, supported);
-       ethtool_link_ksettings_zero_link_mode(cmd, advertising);
-       ethtool_link_ksettings_zero_link_mode(cmd, lp_advertising);
-
-       ethtool_link_ksettings_add_link_mode(cmd, supported, Autoneg);
-       ethtool_link_ksettings_add_link_mode(cmd, advertising, Autoneg);
-
-       switch (porttype) {
-       case PORT_TP:
-               ethtool_link_ksettings_add_link_mode(cmd, supported, TP);
-               ethtool_link_ksettings_add_link_mode(cmd, advertising, TP);
-               break;
-       case PORT_FIBRE:
-               ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE);
-               ethtool_link_ksettings_add_link_mode(cmd, advertising, FIBRE);
-               break;
-       default:
-               ethtool_link_ksettings_add_link_mode(cmd, supported, TP);
-               ethtool_link_ksettings_add_link_mode(cmd, advertising, TP);
-               WARN_ON_ONCE(1);
-       }
-
-       /* partially does fall through, to also select lower speeds */
-       switch (maxspeed) {
-       case SPEED_25000:
-               ethtool_link_ksettings_add_link_mode(cmd, supported,
-                                                    25000baseSR_Full);
-               ethtool_link_ksettings_add_link_mode(cmd, advertising,
-                                                    25000baseSR_Full);
-               break;
-       case SPEED_10000:
-               ethtool_link_ksettings_add_link_mode(cmd, supported,
-                                                    10000baseT_Full);
-               ethtool_link_ksettings_add_link_mode(cmd, advertising,
-                                                    10000baseT_Full);
-       case SPEED_1000:
-               ethtool_link_ksettings_add_link_mode(cmd, supported,
-                                                    1000baseT_Full);
-               ethtool_link_ksettings_add_link_mode(cmd, advertising,
-                                                    1000baseT_Full);
-               ethtool_link_ksettings_add_link_mode(cmd, supported,
-                                                    1000baseT_Half);
-               ethtool_link_ksettings_add_link_mode(cmd, advertising,
-                                                    1000baseT_Half);
-       case SPEED_100:
-               ethtool_link_ksettings_add_link_mode(cmd, supported,
-                                                    100baseT_Full);
-               ethtool_link_ksettings_add_link_mode(cmd, advertising,
-                                                    100baseT_Full);
-               ethtool_link_ksettings_add_link_mode(cmd, supported,
-                                                    100baseT_Half);
-               ethtool_link_ksettings_add_link_mode(cmd, advertising,
-                                                    100baseT_Half);
-       case SPEED_10:
-               ethtool_link_ksettings_add_link_mode(cmd, supported,
-                                                    10baseT_Full);
-               ethtool_link_ksettings_add_link_mode(cmd, advertising,
-                                                    10baseT_Full);
-               ethtool_link_ksettings_add_link_mode(cmd, supported,
-                                                    10baseT_Half);
-               ethtool_link_ksettings_add_link_mode(cmd, advertising,
-                                                    10baseT_Half);
-               /* end fallthrough */
-               break;
-       default:
-               ethtool_link_ksettings_add_link_mode(cmd, supported,
-                                                    10baseT_Full);
-               ethtool_link_ksettings_add_link_mode(cmd, advertising,
-                                                    10baseT_Full);
-               ethtool_link_ksettings_add_link_mode(cmd, supported,
-                                                    10baseT_Half);
-               ethtool_link_ksettings_add_link_mode(cmd, advertising,
-                                                    10baseT_Half);
-               WARN_ON_ONCE(1);
-       }
-}
-
-int qeth_core_ethtool_get_link_ksettings(struct net_device *netdev,
-               struct ethtool_link_ksettings *cmd)
-{
-       struct qeth_card *card = netdev->ml_priv;
-       enum qeth_link_types link_type;
-       struct carrier_info carrier_info;
-       int rc;
-
-       if ((card->info.type == QETH_CARD_TYPE_IQD) || (card->info.guestlan))
-               link_type = QETH_LINK_TYPE_10GBIT_ETH;
-       else
-               link_type = card->info.link_type;
-
-       cmd->base.duplex = DUPLEX_FULL;
-       cmd->base.autoneg = AUTONEG_ENABLE;
-       cmd->base.phy_address = 0;
-       cmd->base.mdio_support = 0;
-       cmd->base.eth_tp_mdix = ETH_TP_MDI_INVALID;
-       cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID;
-
-       switch (link_type) {
-       case QETH_LINK_TYPE_FAST_ETH:
-       case QETH_LINK_TYPE_LANE_ETH100:
-               cmd->base.speed = SPEED_100;
-               cmd->base.port = PORT_TP;
-               break;
-       case QETH_LINK_TYPE_GBIT_ETH:
-       case QETH_LINK_TYPE_LANE_ETH1000:
-               cmd->base.speed = SPEED_1000;
-               cmd->base.port = PORT_FIBRE;
-               break;
-       case QETH_LINK_TYPE_10GBIT_ETH:
-               cmd->base.speed = SPEED_10000;
-               cmd->base.port = PORT_FIBRE;
-               break;
-       case QETH_LINK_TYPE_25GBIT_ETH:
-               cmd->base.speed = SPEED_25000;
-               cmd->base.port = PORT_FIBRE;
-               break;
-       default:
-               cmd->base.speed = SPEED_10;
-               cmd->base.port = PORT_TP;
-       }
-       qeth_set_cmd_adv_sup(cmd, cmd->base.speed, cmd->base.port);
-
-       /* Check if we can obtain more accurate information.     */
-       /* If QUERY_CARD_INFO command is not supported or fails, */
-       /* just return the heuristics that was filled above.     */
-       rc = qeth_query_card_info(card, &carrier_info);
-       if (rc == -EOPNOTSUPP) /* for old hardware, return heuristic */
-               return 0;
-       if (rc) /* report error from the hardware operation */
-               return rc;
-       /* on success, fill in the information got from the hardware */
-
-       netdev_dbg(netdev,
-       "card info: card_type=0x%02x, port_mode=0x%04x, port_speed=0x%08x\n",
-                       carrier_info.card_type,
-                       carrier_info.port_mode,
-                       carrier_info.port_speed);
-
-       /* Update attributes for which we've obtained more authoritative */
-       /* information, leave the rest the way they where filled above.  */
-       switch (carrier_info.card_type) {
-       case CARD_INFO_TYPE_1G_COPPER_A:
-       case CARD_INFO_TYPE_1G_COPPER_B:
-               cmd->base.port = PORT_TP;
-               qeth_set_cmd_adv_sup(cmd, SPEED_1000, cmd->base.port);
-               break;
-       case CARD_INFO_TYPE_1G_FIBRE_A:
-       case CARD_INFO_TYPE_1G_FIBRE_B:
-               cmd->base.port = PORT_FIBRE;
-               qeth_set_cmd_adv_sup(cmd, SPEED_1000, cmd->base.port);
-               break;
-       case CARD_INFO_TYPE_10G_FIBRE_A:
-       case CARD_INFO_TYPE_10G_FIBRE_B:
-               cmd->base.port = PORT_FIBRE;
-               qeth_set_cmd_adv_sup(cmd, SPEED_10000, cmd->base.port);
-               break;
-       }
-
-       switch (carrier_info.port_mode) {
-       case CARD_INFO_PORTM_FULLDUPLEX:
-               cmd->base.duplex = DUPLEX_FULL;
-               break;
-       case CARD_INFO_PORTM_HALFDUPLEX:
-               cmd->base.duplex = DUPLEX_HALF;
-               break;
-       }
-
-       switch (carrier_info.port_speed) {
-       case CARD_INFO_PORTS_10M:
-               cmd->base.speed = SPEED_10;
-               break;
-       case CARD_INFO_PORTS_100M:
-               cmd->base.speed = SPEED_100;
-               break;
-       case CARD_INFO_PORTS_1G:
-               cmd->base.speed = SPEED_1000;
-               break;
-       case CARD_INFO_PORTS_10G:
-               cmd->base.speed = SPEED_10000;
-               break;
-       case CARD_INFO_PORTS_25G:
-               cmd->base.speed = SPEED_25000;
-               break;
-       }
-
-       return 0;
-}
-EXPORT_SYMBOL_GPL(qeth_core_ethtool_get_link_ksettings);
-
-/* Callback to handle checksum offload command reply from OSA card.
- * Verify that required features have been enabled on the card.
- * Return error in hdr->return_code as this value is checked by caller.
- *
- * Always returns zero to indicate no further messages from the OSA card.
- */
-static int qeth_ipa_checksum_run_cmd_cb(struct qeth_card *card,
-                                       struct qeth_reply *reply,
-                                       unsigned long data)
+static int qeth_start_csum_cb(struct qeth_card *card, struct qeth_reply *reply,
+                             unsigned long data)
 {
        struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data;
-       struct qeth_checksum_cmd *chksum_cb =
-                               (struct qeth_checksum_cmd *)reply->param;
+       u32 *features = reply->param;
 
-       QETH_CARD_TEXT(card, 4, "chkdoccb");
        if (qeth_setassparms_inspect_rc(cmd))
-               return 0;
+               return -EIO;
 
-       memset(chksum_cb, 0, sizeof(*chksum_cb));
-       if (cmd->data.setassparms.hdr.command_code == IPA_CMD_ASS_START) {
-               chksum_cb->supported =
-                               cmd->data.setassparms.data.chksum.supported;
-               QETH_CARD_TEXT_(card, 3, "strt:%x", chksum_cb->supported);
-       }
-       if (cmd->data.setassparms.hdr.command_code == IPA_CMD_ASS_ENABLE) {
-               chksum_cb->supported =
-                               cmd->data.setassparms.data.chksum.supported;
-               chksum_cb->enabled =
-                               cmd->data.setassparms.data.chksum.enabled;
-               QETH_CARD_TEXT_(card, 3, "supp:%x", chksum_cb->supported);
-               QETH_CARD_TEXT_(card, 3, "enab:%x", chksum_cb->enabled);
-       }
+       *features = cmd->data.setassparms.data.flags_32bit;
        return 0;
 }
 
-/* Send command to OSA card and check results. */
-static int qeth_ipa_checksum_run_cmd(struct qeth_card *card,
-                                    enum qeth_ipa_funcs ipa_func,
-                                    __u16 cmd_code, long data,
-                                    struct qeth_checksum_cmd *chksum_cb,
-                                    enum qeth_prot_versions prot)
+static int qeth_set_csum_off(struct qeth_card *card, enum qeth_ipa_funcs cstype,
+                            enum qeth_prot_versions prot)
 {
-       struct qeth_cmd_buffer *iob;
-
-       QETH_CARD_TEXT(card, 4, "chkdocmd");
-       iob = qeth_get_setassparms_cmd(card, ipa_func, cmd_code,
-                                      sizeof(__u32), prot);
-       if (!iob)
-               return -ENOMEM;
-
-       __ipa_cmd(iob)->data.setassparms.data.flags_32bit = (__u32) data;
-       return qeth_send_ipa_cmd(card, iob, qeth_ipa_checksum_run_cmd_cb,
-                                chksum_cb);
+       return qeth_send_simple_setassparms_prot(card, cstype,
+                                                IPA_CMD_ASS_STOP, 0, prot);
 }
 
-static int qeth_send_checksum_on(struct qeth_card *card, int cstype,
-                                enum qeth_prot_versions prot)
+static int qeth_set_csum_on(struct qeth_card *card, enum qeth_ipa_funcs cstype,
+                           enum qeth_prot_versions prot)
 {
        u32 required_features = QETH_IPA_CHECKSUM_UDP | QETH_IPA_CHECKSUM_TCP;
-       struct qeth_checksum_cmd chksum_cb;
+       struct qeth_cmd_buffer *iob;
+       struct qeth_ipa_caps caps;
+       u32 features;
        int rc;
 
-       if (prot == QETH_PROT_IPV4)
+       /* some L3 HW requires combined L3+L4 csum offload: */
+       if (IS_LAYER3(card) && prot == QETH_PROT_IPV4 &&
+           cstype == IPA_OUTBOUND_CHECKSUM)
                required_features |= QETH_IPA_CHECKSUM_IP_HDR;
-       rc = qeth_ipa_checksum_run_cmd(card, cstype, IPA_CMD_ASS_START, 0,
-                                      &chksum_cb, prot);
-       if (!rc) {
-               if ((required_features & chksum_cb.supported) !=
-                   required_features)
-                       rc = -EIO;
-               else if (!(QETH_IPA_CHECKSUM_LP2LP & chksum_cb.supported) &&
-                        cstype == IPA_INBOUND_CHECKSUM)
-                       dev_warn(&card->gdev->dev,
-                                "Hardware checksumming is performed only if %s and its peer use different OSA Express 3 ports\n",
-                                QETH_CARD_IFNAME(card));
-       }
-       if (rc) {
-               qeth_send_simple_setassparms_prot(card, cstype,
-                                                 IPA_CMD_ASS_STOP, 0, prot);
-               dev_warn(&card->gdev->dev,
-                        "Starting HW IPv%d checksumming for %s failed, using SW checksumming\n",
-                        prot, QETH_CARD_IFNAME(card));
+
+       iob = qeth_get_setassparms_cmd(card, cstype, IPA_CMD_ASS_START, 0,
+                                      prot);
+       if (!iob)
+               return -ENOMEM;
+
+       rc = qeth_send_ipa_cmd(card, iob, qeth_start_csum_cb, &features);
+       if (rc)
                return rc;
+
+       if ((required_features & features) != required_features) {
+               qeth_set_csum_off(card, cstype, prot);
+               return -EOPNOTSUPP;
        }
-       rc = qeth_ipa_checksum_run_cmd(card, cstype, IPA_CMD_ASS_ENABLE,
-                                      chksum_cb.supported, &chksum_cb,
+
+       iob = qeth_get_setassparms_cmd(card, cstype, IPA_CMD_ASS_ENABLE, 4,
                                       prot);
-       if (!rc) {
-               if ((required_features & chksum_cb.enabled) !=
-                   required_features)
-                       rc = -EIO;
+       if (!iob) {
+               qeth_set_csum_off(card, cstype, prot);
+               return -ENOMEM;
        }
+
+       if (features & QETH_IPA_CHECKSUM_LP2LP)
+               required_features |= QETH_IPA_CHECKSUM_LP2LP;
+       __ipa_cmd(iob)->data.setassparms.data.flags_32bit = required_features;
+       rc = qeth_send_ipa_cmd(card, iob, qeth_setassparms_get_caps_cb, &caps);
        if (rc) {
-               qeth_send_simple_setassparms_prot(card, cstype,
-                                                 IPA_CMD_ASS_STOP, 0, prot);
-               dev_warn(&card->gdev->dev,
-                        "Enabling HW IPv%d checksumming for %s failed, using SW checksumming\n",
-                        prot, QETH_CARD_IFNAME(card));
+               qeth_set_csum_off(card, cstype, prot);
                return rc;
        }
 
+       if (!qeth_ipa_caps_supported(&caps, required_features) ||
+           !qeth_ipa_caps_enabled(&caps, required_features)) {
+               qeth_set_csum_off(card, cstype, prot);
+               return -EOPNOTSUPP;
+       }
+
        dev_info(&card->gdev->dev, "HW Checksumming (%sbound IPv%d) enabled\n",
                 cstype == IPA_INBOUND_CHECKSUM ? "in" : "out", prot);
+       if (!qeth_ipa_caps_enabled(&caps, QETH_IPA_CHECKSUM_LP2LP) &&
+           cstype == IPA_OUTBOUND_CHECKSUM)
+               dev_warn(&card->gdev->dev,
+                        "Hardware checksumming is performed only if %s and its peer use different OSA Express 3 ports\n",
+                        QETH_CARD_IFNAME(card));
        return 0;
 }
 
 static int qeth_set_ipa_csum(struct qeth_card *card, bool on, int cstype,
                             enum qeth_prot_versions prot)
 {
-       int rc = (on) ? qeth_send_checksum_on(card, cstype, prot)
-                     : qeth_send_simple_setassparms_prot(card, cstype,
-                                                         IPA_CMD_ASS_STOP, 0,
-                                                         prot);
-       return rc ? -EIO : 0;
+       return on ? qeth_set_csum_on(card, cstype, prot) :
+                   qeth_set_csum_off(card, cstype, prot);
 }
 
 static int qeth_start_tso_cb(struct qeth_card *card, struct qeth_reply *reply,
@@ -6393,7 +5983,7 @@ static int qeth_start_tso_cb(struct qeth_card *card, struct qeth_reply *reply,
        struct qeth_tso_start_data *tso_data = reply->param;
 
        if (qeth_setassparms_inspect_rc(cmd))
-               return 0;
+               return -EIO;
 
        tso_data->mss = cmd->data.setassparms.data.tso.mss;
        tso_data->supported = cmd->data.setassparms.data.tso.supported;
@@ -6459,10 +6049,7 @@ static int qeth_set_tso_on(struct qeth_card *card,
 static int qeth_set_ipa_tso(struct qeth_card *card, bool on,
                            enum qeth_prot_versions prot)
 {
-       int rc = on ? qeth_set_tso_on(card, prot) :
-                     qeth_set_tso_off(card, prot);
-
-       return rc ? -EIO : 0;
+       return on ? qeth_set_tso_on(card, prot) : qeth_set_tso_off(card, prot);
 }
 
 static int qeth_set_ipa_rx_csum(struct qeth_card *card, bool on)
@@ -6611,6 +6198,33 @@ netdev_features_t qeth_features_check(struct sk_buff *skb,
 }
 EXPORT_SYMBOL_GPL(qeth_features_check);
 
+void qeth_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
+{
+       struct qeth_card *card = dev->ml_priv;
+       struct qeth_qdio_out_q *queue;
+       unsigned int i;
+
+       QETH_CARD_TEXT(card, 5, "getstat");
+
+       stats->rx_packets = card->stats.rx_packets;
+       stats->rx_bytes = card->stats.rx_bytes;
+       stats->rx_errors = card->stats.rx_errors;
+       stats->rx_dropped = card->stats.rx_dropped;
+       stats->multicast = card->stats.rx_multicast;
+       stats->tx_errors = card->stats.tx_errors;
+
+       for (i = 0; i < card->qdio.no_out_queues; i++) {
+               queue = card->qdio.out_qs[i];
+
+               stats->tx_packets += queue->stats.tx_packets;
+               stats->tx_bytes += queue->stats.tx_bytes;
+               stats->tx_errors += queue->stats.tx_errors;
+               stats->tx_dropped += queue->stats.tx_dropped;
+               stats->tx_carrier_errors += queue->stats.tx_carrier_errors;
+       }
+}
+EXPORT_SYMBOL_GPL(qeth_get_stats64);
+
 int qeth_open(struct net_device *dev)
 {
        struct qeth_card *card = dev->ml_priv;
index 16fc51a..e3f4866 100644 (file)
@@ -125,24 +125,13 @@ unsigned char DM_ACT[] = {
 
 unsigned char IPA_PDU_HEADER[] = {
        0x00, 0xe0, 0x00, 0x00,  0x77, 0x77, 0x77, 0x77,
-       0x00, 0x00, 0x00, 0x14,  0x00, 0x00,
-               (IPA_PDU_HEADER_SIZE+sizeof(struct qeth_ipa_cmd)) / 256,
-               (IPA_PDU_HEADER_SIZE+sizeof(struct qeth_ipa_cmd)) % 256,
+       0x00, 0x00, 0x00, 0x14,  0x00, 0x00, 0x00, 0x00,
        0x10, 0x00, 0x00, 0x01,  0x00, 0x00, 0x00, 0x00,
        0xc1, 0x03, 0x00, 0x01,  0x00, 0x00, 0x00, 0x00,
-       0x00, 0x00, 0x00, 0x00,  0x00, 0x24,
-               sizeof(struct qeth_ipa_cmd) / 256,
-               sizeof(struct qeth_ipa_cmd) % 256,
-       0x00,
-               sizeof(struct qeth_ipa_cmd) / 256,
-               sizeof(struct qeth_ipa_cmd) % 256,
-       0x05,
-       0x77, 0x77, 0x77, 0x77,
+       0x00, 0x00, 0x00, 0x00,  0x00, 0x24, 0x00, 0x00,
+       0x00, 0x00, 0x00, 0x05,  0x77, 0x77, 0x77, 0x77,
        0x00, 0x00, 0x00, 0x00,  0x00, 0x00, 0x00, 0x00,
-       0x01, 0x00,
-               sizeof(struct qeth_ipa_cmd) / 256,
-               sizeof(struct qeth_ipa_cmd) % 256,
-       0x00, 0x00, 0x00, 0x40,
+       0x01, 0x00, 0x00, 0x00,  0x00, 0x00, 0x00, 0x40,
 };
 
 struct ipa_rc_msg {
@@ -212,12 +201,10 @@ static const struct ipa_rc_msg qeth_ipa_rc_msg[] = {
        {IPA_RC_LAN_OFFLINE,            "STRTLAN_LAN_DISABLED - LAN offline"},
        {IPA_RC_VEPA_TO_VEB_TRANSITION, "Adj. switch disabled port mode RR"},
        {IPA_RC_INVALID_IP_VERSION2,    "Invalid IP version"},
-       {IPA_RC_ENOMEM,                 "Memory problem"},
+       /* default for qeth_get_ipa_msg(): */
        {IPA_RC_FFFF,                   "Unknown Error"}
 };
 
-
-
 const char *qeth_get_ipa_msg(enum qeth_ipa_return_codes rc)
 {
        int x;
index c7fb14a..f8c5d4a 100644 (file)
@@ -21,8 +21,6 @@
 extern unsigned char IPA_PDU_HEADER[];
 #define QETH_IPA_CMD_DEST_ADDR(buffer) (buffer + 0x2c)
 
-#define IPA_CMD_LENGTH (IPA_PDU_HEADER_SIZE + sizeof(struct qeth_ipa_cmd))
-
 #define QETH_SEQ_NO_LENGTH     4
 #define QETH_MPC_TOKEN_LENGTH  4
 #define QETH_MCL_LENGTH                4
@@ -83,6 +81,7 @@ enum qeth_card_types {
 #define IS_OSD(card)   ((card)->info.type == QETH_CARD_TYPE_OSD)
 #define IS_OSM(card)   ((card)->info.type == QETH_CARD_TYPE_OSM)
 #define IS_OSN(card)   ((card)->info.type == QETH_CARD_TYPE_OSN)
+#define IS_OSX(card)   ((card)->info.type == QETH_CARD_TYPE_OSX)
 #define IS_VM_NIC(card)        ((card)->info.guestlan)
 
 #define QETH_MPC_DIFINFO_LEN_INDICATES_LINK_TYPE 0x18
@@ -231,7 +230,6 @@ enum qeth_ipa_return_codes {
        IPA_RC_LAN_OFFLINE              = 0xe080,
        IPA_RC_VEPA_TO_VEB_TRANSITION   = 0xe090,
        IPA_RC_INVALID_IP_VERSION2      = 0xf001,
-       IPA_RC_ENOMEM                   = 0xfffe,
        IPA_RC_FFFF                     = 0xffff
 };
 /* for VNIC Characteristics */
@@ -419,12 +417,6 @@ enum qeth_ipa_checksum_bits {
        QETH_IPA_CHECKSUM_LP2LP         = 0x0020
 };
 
-/* IPA Assist checksum offload reply layout. */
-struct qeth_checksum_cmd {
-       __u32 supported;
-       __u32 enabled;
-} __packed;
-
 enum qeth_ipa_large_send_caps {
        QETH_IPA_LARGE_SEND_TCP         = 0x00000001,
 };
@@ -440,7 +432,6 @@ struct qeth_ipacmd_setassparms {
        union {
                __u32 flags_32bit;
                struct qeth_ipa_caps caps;
-               struct qeth_checksum_cmd chksum;
                struct qeth_arp_cache_entry arp_entry;
                struct qeth_arp_query_data query_arp;
                struct qeth_tso_start_data tso;
@@ -834,15 +825,10 @@ enum qeth_ipa_arp_return_codes {
 extern const char *qeth_get_ipa_msg(enum qeth_ipa_return_codes rc);
 extern const char *qeth_get_ipa_cmd_name(enum qeth_ipa_cmds cmd);
 
-#define QETH_SETASS_BASE_LEN (IPA_PDU_HEADER_SIZE + \
-                             sizeof(struct qeth_ipacmd_hdr) + \
-                             sizeof(struct qeth_ipacmd_setassparms_hdr))
 #define QETH_SETADP_BASE_LEN (sizeof(struct qeth_ipacmd_hdr) + \
                              sizeof(struct qeth_ipacmd_setadpparms_hdr))
 #define QETH_SNMP_SETADP_CMDLENGTH 16
 
-#define QETH_ARP_DATA_SIZE 3968
-#define QETH_ARP_CMD_LEN (QETH_ARP_DATA_SIZE + 8)
 /* Helper functions */
 #define IS_IPA_REPLY(cmd) ((cmd->hdr.initiator == IPA_CMD_INITIATOR_HOST) || \
                           (cmd->hdr.initiator == IPA_CMD_INITIATOR_OSA_REPLY))
index 30f6160..8b223cc 100644 (file)
@@ -316,7 +316,7 @@ static ssize_t qeth_dev_recover_store(struct device *dev,
        if (!card)
                return -EINVAL;
 
-       if (card->state != CARD_STATE_UP)
+       if (!qeth_card_hw_is_reachable(card))
                return -EPERM;
 
        i = simple_strtoul(buf, &tmp, 16);
@@ -336,35 +336,36 @@ static ssize_t qeth_dev_performance_stats_show(struct device *dev,
        if (!card)
                return -EINVAL;
 
-       return sprintf(buf, "%i\n", card->options.performance_stats ? 1:0);
+       return sprintf(buf, "1\n");
 }
 
 static ssize_t qeth_dev_performance_stats_store(struct device *dev,
                struct device_attribute *attr, const char *buf, size_t count)
 {
        struct qeth_card *card = dev_get_drvdata(dev);
-       char *tmp;
-       int i, rc = 0;
+       struct qeth_qdio_out_q *queue;
+       unsigned int i;
+       bool reset;
+       int rc;
 
        if (!card)
                return -EINVAL;
 
-       mutex_lock(&card->conf_mutex);
-       i = simple_strtoul(buf, &tmp, 16);
-       if ((i == 0) || (i == 1)) {
-               if (i == card->options.performance_stats)
-                       goto out;
-               card->options.performance_stats = i;
-               if (i == 0)
-                       memset(&card->perf_stats, 0,
-                               sizeof(struct qeth_perf_stats));
-               card->perf_stats.initial_rx_packets = card->stats.rx_packets;
-               card->perf_stats.initial_tx_packets = card->stats.tx_packets;
-       } else
-               rc = -EINVAL;
-out:
-       mutex_unlock(&card->conf_mutex);
-       return rc ? rc : count;
+       rc = kstrtobool(buf, &reset);
+       if (rc)
+               return rc;
+
+       if (reset) {
+               memset(&card->stats, 0, sizeof(card->stats));
+               for (i = 0; i < card->qdio.no_out_queues; i++) {
+                       queue = card->qdio.out_qs[i];
+                       if (!queue)
+                               break;
+                       memset(&queue->stats, 0, sizeof(queue->stats));
+               }
+       }
+
+       return count;
 }
 
 static DEVICE_ATTR(performance_stats, 0644, qeth_dev_performance_stats_show,
diff --git a/drivers/s390/net/qeth_ethtool.c b/drivers/s390/net/qeth_ethtool.c
new file mode 100644 (file)
index 0000000..93a53fe
--- /dev/null
@@ -0,0 +1,370 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2018
+ */
+
+#define KMSG_COMPONENT "qeth"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/ethtool.h>
+#include "qeth_core.h"
+
+
+#define QETH_TXQ_STAT(_name, _stat) { \
+       .name = _name, \
+       .offset = offsetof(struct qeth_out_q_stats, _stat) \
+}
+
+#define QETH_CARD_STAT(_name, _stat) { \
+       .name = _name, \
+       .offset = offsetof(struct qeth_card_stats, _stat) \
+}
+
+struct qeth_stats {
+       char name[ETH_GSTRING_LEN];
+       unsigned int offset;
+};
+
+static const struct qeth_stats txq_stats[] = {
+       QETH_TXQ_STAT("IO buffers", bufs),
+       QETH_TXQ_STAT("IO buffer elements", buf_elements),
+       QETH_TXQ_STAT("packed IO buffers", bufs_pack),
+       QETH_TXQ_STAT("skbs", tx_packets),
+       QETH_TXQ_STAT("packed skbs", skbs_pack),
+       QETH_TXQ_STAT("SG skbs", skbs_sg),
+       QETH_TXQ_STAT("HW csum skbs", skbs_csum),
+       QETH_TXQ_STAT("TSO skbs", skbs_tso),
+       QETH_TXQ_STAT("linearized skbs", skbs_linearized),
+       QETH_TXQ_STAT("linearized+error skbs", skbs_linearized_fail),
+       QETH_TXQ_STAT("TSO bytes", tso_bytes),
+       QETH_TXQ_STAT("Packing mode switches", packing_mode_switch),
+};
+
+static const struct qeth_stats card_stats[] = {
+       QETH_CARD_STAT("rx0 IO buffers", rx_bufs),
+       QETH_CARD_STAT("rx0 HW csum skbs", rx_skb_csum),
+       QETH_CARD_STAT("rx0 SG skbs", rx_sg_skbs),
+       QETH_CARD_STAT("rx0 SG page frags", rx_sg_frags),
+       QETH_CARD_STAT("rx0 SG page allocs", rx_sg_alloc_page),
+};
+
+#define TXQ_STATS_LEN  ARRAY_SIZE(txq_stats)
+#define CARD_STATS_LEN ARRAY_SIZE(card_stats)
+
+static void qeth_add_stat_data(u64 **dst, void *src,
+                              const struct qeth_stats stats[],
+                              unsigned int size)
+{
+       unsigned int i;
+       char *stat;
+
+       for (i = 0; i < size; i++) {
+               stat = (char *)src + stats[i].offset;
+               **dst = *(u64 *)stat;
+               (*dst)++;
+       }
+}
+
+static void qeth_add_stat_strings(u8 **data, const char *prefix,
+                                 const struct qeth_stats stats[],
+                                 unsigned int size)
+{
+       unsigned int i;
+
+       for (i = 0; i < size; i++) {
+               snprintf(*data, ETH_GSTRING_LEN, "%s%s", prefix, stats[i].name);
+               *data += ETH_GSTRING_LEN;
+       }
+}
+
+static int qeth_get_sset_count(struct net_device *dev, int stringset)
+{
+       struct qeth_card *card = dev->ml_priv;
+
+       switch (stringset) {
+       case ETH_SS_STATS:
+               return CARD_STATS_LEN +
+                      card->qdio.no_out_queues * TXQ_STATS_LEN;
+       default:
+               return -EINVAL;
+       }
+}
+
+static void qeth_get_ethtool_stats(struct net_device *dev,
+                                  struct ethtool_stats *stats, u64 *data)
+{
+       struct qeth_card *card = dev->ml_priv;
+       unsigned int i;
+
+       qeth_add_stat_data(&data, &card->stats, card_stats, CARD_STATS_LEN);
+       for (i = 0; i < card->qdio.no_out_queues; i++)
+               qeth_add_stat_data(&data, &card->qdio.out_qs[i]->stats,
+                                  txq_stats, TXQ_STATS_LEN);
+}
+
+static void qeth_get_ringparam(struct net_device *dev,
+                              struct ethtool_ringparam *param)
+{
+       struct qeth_card *card = dev->ml_priv;
+
+       param->rx_max_pending = QDIO_MAX_BUFFERS_PER_Q;
+       param->rx_mini_max_pending = 0;
+       param->rx_jumbo_max_pending = 0;
+       param->tx_max_pending = QDIO_MAX_BUFFERS_PER_Q;
+
+       param->rx_pending = card->qdio.in_buf_pool.buf_count;
+       param->rx_mini_pending = 0;
+       param->rx_jumbo_pending = 0;
+       param->tx_pending = QDIO_MAX_BUFFERS_PER_Q;
+}
+
+static void qeth_get_strings(struct net_device *dev, u32 stringset, u8 *data)
+{
+       struct qeth_card *card = dev->ml_priv;
+       char prefix[ETH_GSTRING_LEN] = "";
+       unsigned int i;
+
+       switch (stringset) {
+       case ETH_SS_STATS:
+               qeth_add_stat_strings(&data, prefix, card_stats,
+                                     CARD_STATS_LEN);
+               for (i = 0; i < card->qdio.no_out_queues; i++) {
+                       snprintf(prefix, ETH_GSTRING_LEN, "tx%u ", i);
+                       qeth_add_stat_strings(&data, prefix, txq_stats,
+                                             TXQ_STATS_LEN);
+               }
+               break;
+       default:
+               WARN_ON(1);
+               break;
+       }
+}
+
+static void qeth_get_drvinfo(struct net_device *dev,
+                            struct ethtool_drvinfo *info)
+{
+       struct qeth_card *card = dev->ml_priv;
+
+       strlcpy(info->driver, IS_LAYER2(card) ? "qeth_l2" : "qeth_l3",
+               sizeof(info->driver));
+       strlcpy(info->version, "1.0", sizeof(info->version));
+       strlcpy(info->fw_version, card->info.mcl_level,
+               sizeof(info->fw_version));
+       snprintf(info->bus_info, sizeof(info->bus_info), "%s/%s/%s",
+                CARD_RDEV_ID(card), CARD_WDEV_ID(card), CARD_DDEV_ID(card));
+}
+
+/* Helper function to fill 'advertising' and 'supported' which are the same. */
+/* Autoneg and full-duplex are supported and advertised unconditionally.     */
+/* Always advertise and support all speeds up to specified, and only one     */
+/* specified port type.                                                             */
+static void qeth_set_cmd_adv_sup(struct ethtool_link_ksettings *cmd,
+                               int maxspeed, int porttype)
+{
+       ethtool_link_ksettings_zero_link_mode(cmd, supported);
+       ethtool_link_ksettings_zero_link_mode(cmd, advertising);
+       ethtool_link_ksettings_zero_link_mode(cmd, lp_advertising);
+
+       ethtool_link_ksettings_add_link_mode(cmd, supported, Autoneg);
+       ethtool_link_ksettings_add_link_mode(cmd, advertising, Autoneg);
+
+       switch (porttype) {
+       case PORT_TP:
+               ethtool_link_ksettings_add_link_mode(cmd, supported, TP);
+               ethtool_link_ksettings_add_link_mode(cmd, advertising, TP);
+               break;
+       case PORT_FIBRE:
+               ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE);
+               ethtool_link_ksettings_add_link_mode(cmd, advertising, FIBRE);
+               break;
+       default:
+               ethtool_link_ksettings_add_link_mode(cmd, supported, TP);
+               ethtool_link_ksettings_add_link_mode(cmd, advertising, TP);
+               WARN_ON_ONCE(1);
+       }
+
+       /* partially does fall through, to also select lower speeds */
+       switch (maxspeed) {
+       case SPEED_25000:
+               ethtool_link_ksettings_add_link_mode(cmd, supported,
+                                                    25000baseSR_Full);
+               ethtool_link_ksettings_add_link_mode(cmd, advertising,
+                                                    25000baseSR_Full);
+               break;
+       case SPEED_10000:
+               ethtool_link_ksettings_add_link_mode(cmd, supported,
+                                                    10000baseT_Full);
+               ethtool_link_ksettings_add_link_mode(cmd, advertising,
+                                                    10000baseT_Full);
+               /* fall through */
+       case SPEED_1000:
+               ethtool_link_ksettings_add_link_mode(cmd, supported,
+                                                    1000baseT_Full);
+               ethtool_link_ksettings_add_link_mode(cmd, advertising,
+                                                    1000baseT_Full);
+               ethtool_link_ksettings_add_link_mode(cmd, supported,
+                                                    1000baseT_Half);
+               ethtool_link_ksettings_add_link_mode(cmd, advertising,
+                                                    1000baseT_Half);
+               /* fall through */
+       case SPEED_100:
+               ethtool_link_ksettings_add_link_mode(cmd, supported,
+                                                    100baseT_Full);
+               ethtool_link_ksettings_add_link_mode(cmd, advertising,
+                                                    100baseT_Full);
+               ethtool_link_ksettings_add_link_mode(cmd, supported,
+                                                    100baseT_Half);
+               ethtool_link_ksettings_add_link_mode(cmd, advertising,
+                                                    100baseT_Half);
+               /* fall through */
+       case SPEED_10:
+               ethtool_link_ksettings_add_link_mode(cmd, supported,
+                                                    10baseT_Full);
+               ethtool_link_ksettings_add_link_mode(cmd, advertising,
+                                                    10baseT_Full);
+               ethtool_link_ksettings_add_link_mode(cmd, supported,
+                                                    10baseT_Half);
+               ethtool_link_ksettings_add_link_mode(cmd, advertising,
+                                                    10baseT_Half);
+               break;
+       default:
+               ethtool_link_ksettings_add_link_mode(cmd, supported,
+                                                    10baseT_Full);
+               ethtool_link_ksettings_add_link_mode(cmd, advertising,
+                                                    10baseT_Full);
+               ethtool_link_ksettings_add_link_mode(cmd, supported,
+                                                    10baseT_Half);
+               ethtool_link_ksettings_add_link_mode(cmd, advertising,
+                                                    10baseT_Half);
+               WARN_ON_ONCE(1);
+       }
+}
+
+
+static int qeth_get_link_ksettings(struct net_device *netdev,
+                                  struct ethtool_link_ksettings *cmd)
+{
+       struct qeth_card *card = netdev->ml_priv;
+       enum qeth_link_types link_type;
+       struct carrier_info carrier_info;
+       int rc;
+
+       if (IS_IQD(card) || IS_VM_NIC(card))
+               link_type = QETH_LINK_TYPE_10GBIT_ETH;
+       else
+               link_type = card->info.link_type;
+
+       cmd->base.duplex = DUPLEX_FULL;
+       cmd->base.autoneg = AUTONEG_ENABLE;
+       cmd->base.phy_address = 0;
+       cmd->base.mdio_support = 0;
+       cmd->base.eth_tp_mdix = ETH_TP_MDI_INVALID;
+       cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID;
+
+       switch (link_type) {
+       case QETH_LINK_TYPE_FAST_ETH:
+       case QETH_LINK_TYPE_LANE_ETH100:
+               cmd->base.speed = SPEED_100;
+               cmd->base.port = PORT_TP;
+               break;
+       case QETH_LINK_TYPE_GBIT_ETH:
+       case QETH_LINK_TYPE_LANE_ETH1000:
+               cmd->base.speed = SPEED_1000;
+               cmd->base.port = PORT_FIBRE;
+               break;
+       case QETH_LINK_TYPE_10GBIT_ETH:
+               cmd->base.speed = SPEED_10000;
+               cmd->base.port = PORT_FIBRE;
+               break;
+       case QETH_LINK_TYPE_25GBIT_ETH:
+               cmd->base.speed = SPEED_25000;
+               cmd->base.port = PORT_FIBRE;
+               break;
+       default:
+               cmd->base.speed = SPEED_10;
+               cmd->base.port = PORT_TP;
+       }
+       qeth_set_cmd_adv_sup(cmd, cmd->base.speed, cmd->base.port);
+
+       /* Check if we can obtain more accurate information.     */
+       /* If QUERY_CARD_INFO command is not supported or fails, */
+       /* just return the heuristics that was filled above.     */
+       rc = qeth_query_card_info(card, &carrier_info);
+       if (rc == -EOPNOTSUPP) /* for old hardware, return heuristic */
+               return 0;
+       if (rc) /* report error from the hardware operation */
+               return rc;
+       /* on success, fill in the information got from the hardware */
+
+       netdev_dbg(netdev,
+       "card info: card_type=0x%02x, port_mode=0x%04x, port_speed=0x%08x\n",
+                       carrier_info.card_type,
+                       carrier_info.port_mode,
+                       carrier_info.port_speed);
+
+       /* Update attributes for which we've obtained more authoritative */
+       /* information, leave the rest the way they where filled above.  */
+       switch (carrier_info.card_type) {
+       case CARD_INFO_TYPE_1G_COPPER_A:
+       case CARD_INFO_TYPE_1G_COPPER_B:
+               cmd->base.port = PORT_TP;
+               qeth_set_cmd_adv_sup(cmd, SPEED_1000, cmd->base.port);
+               break;
+       case CARD_INFO_TYPE_1G_FIBRE_A:
+       case CARD_INFO_TYPE_1G_FIBRE_B:
+               cmd->base.port = PORT_FIBRE;
+               qeth_set_cmd_adv_sup(cmd, SPEED_1000, cmd->base.port);
+               break;
+       case CARD_INFO_TYPE_10G_FIBRE_A:
+       case CARD_INFO_TYPE_10G_FIBRE_B:
+               cmd->base.port = PORT_FIBRE;
+               qeth_set_cmd_adv_sup(cmd, SPEED_10000, cmd->base.port);
+               break;
+       }
+
+       switch (carrier_info.port_mode) {
+       case CARD_INFO_PORTM_FULLDUPLEX:
+               cmd->base.duplex = DUPLEX_FULL;
+               break;
+       case CARD_INFO_PORTM_HALFDUPLEX:
+               cmd->base.duplex = DUPLEX_HALF;
+               break;
+       }
+
+       switch (carrier_info.port_speed) {
+       case CARD_INFO_PORTS_10M:
+               cmd->base.speed = SPEED_10;
+               break;
+       case CARD_INFO_PORTS_100M:
+               cmd->base.speed = SPEED_100;
+               break;
+       case CARD_INFO_PORTS_1G:
+               cmd->base.speed = SPEED_1000;
+               break;
+       case CARD_INFO_PORTS_10G:
+               cmd->base.speed = SPEED_10000;
+               break;
+       case CARD_INFO_PORTS_25G:
+               cmd->base.speed = SPEED_25000;
+               break;
+       }
+
+       return 0;
+}
+
+const struct ethtool_ops qeth_ethtool_ops = {
+       .get_link = ethtool_op_get_link,
+       .get_ringparam = qeth_get_ringparam,
+       .get_strings = qeth_get_strings,
+       .get_ethtool_stats = qeth_get_ethtool_stats,
+       .get_sset_count = qeth_get_sset_count,
+       .get_drvinfo = qeth_get_drvinfo,
+       .get_link_ksettings = qeth_get_link_ksettings,
+};
+
+const struct ethtool_ops qeth_osn_ethtool_ops = {
+       .get_strings = qeth_get_strings,
+       .get_ethtool_stats = qeth_get_ethtool_stats,
+       .get_sset_count = qeth_get_sset_count,
+       .get_drvinfo = qeth_get_drvinfo,
+};
index 82f50cc..2c97142 100644 (file)
@@ -35,7 +35,7 @@ static void qeth_l2_vnicc_init(struct qeth_card *card);
 static bool qeth_l2_vnicc_recover_timeout(struct qeth_card *card, u32 vnicc,
                                          u32 *timeout);
 
-static int qeth_setdelmac_makerc(struct qeth_card *card, int retcode)
+static int qeth_l2_setdelmac_makerc(struct qeth_card *card, u16 retcode)
 {
        int rc;
 
@@ -62,9 +62,6 @@ static int qeth_setdelmac_makerc(struct qeth_card *card, int retcode)
        case IPA_RC_L2_MAC_NOT_FOUND:
                rc = -ENOENT;
                break;
-       case -ENOMEM:
-               rc = -ENOMEM;
-               break;
        default:
                rc = -EIO;
                break;
@@ -72,6 +69,15 @@ static int qeth_setdelmac_makerc(struct qeth_card *card, int retcode)
        return rc;
 }
 
+static int qeth_l2_send_setdelmac_cb(struct qeth_card *card,
+                                    struct qeth_reply *reply,
+                                    unsigned long data)
+{
+       struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data;
+
+       return qeth_l2_setdelmac_makerc(card, cmd->hdr.return_code);
+}
+
 static int qeth_l2_send_setdelmac(struct qeth_card *card, __u8 *mac,
                           enum qeth_ipa_cmds ipacmd)
 {
@@ -85,8 +91,7 @@ static int qeth_l2_send_setdelmac(struct qeth_card *card, __u8 *mac,
        cmd = __ipa_cmd(iob);
        cmd->data.setdelmac.mac_length = ETH_ALEN;
        ether_addr_copy(cmd->data.setdelmac.mac, mac);
-       return qeth_setdelmac_makerc(card, qeth_send_ipa_cmd(card, iob,
-                                          NULL, NULL));
+       return qeth_send_ipa_cmd(card, iob, qeth_l2_send_setdelmac_cb, NULL);
 }
 
 static int qeth_l2_send_setmac(struct qeth_card *card, __u8 *mac)
@@ -169,9 +174,9 @@ static int qeth_l2_get_cast_type(struct qeth_card *card, struct sk_buff *skb)
        return RTN_UNICAST;
 }
 
-static void qeth_l2_fill_header(struct qeth_card *card, struct qeth_hdr *hdr,
-                               struct sk_buff *skb, int ipv, int cast_type,
-                               unsigned int data_len)
+static void qeth_l2_fill_header(struct qeth_qdio_out_q *queue,
+                               struct qeth_hdr *hdr, struct sk_buff *skb,
+                               int ipv, int cast_type, unsigned int data_len)
 {
        struct vlan_ethhdr *veth = vlan_eth_hdr(skb);
 
@@ -183,8 +188,7 @@ static void qeth_l2_fill_header(struct qeth_card *card, struct qeth_hdr *hdr,
                hdr->hdr.l2.id = QETH_HEADER_TYPE_LAYER2;
                if (skb->ip_summed == CHECKSUM_PARTIAL) {
                        qeth_tx_csum(skb, &hdr->hdr.l2.flags[1], ipv);
-                       if (card->options.performance_stats)
-                               card->perf_stats.tx_csum++;
+                       QETH_TXQ_STAT_INC(queue, skbs_csum);
                }
        }
 
@@ -205,7 +209,7 @@ static void qeth_l2_fill_header(struct qeth_card *card, struct qeth_hdr *hdr,
        }
 }
 
-static int qeth_setdelvlan_makerc(struct qeth_card *card, int retcode)
+static int qeth_l2_setdelvlan_makerc(struct qeth_card *card, u16 retcode)
 {
        if (retcode)
                QETH_CARD_TEXT_(card, 2, "err%04x", retcode);
@@ -221,8 +225,6 @@ static int qeth_setdelvlan_makerc(struct qeth_card *card, int retcode)
                return -ENOENT;
        case IPA_RC_L2_VLAN_ID_NOT_ALLOWED:
                return -EPERM;
-       case -ENOMEM:
-               return -ENOMEM;
        default:
                return -EIO;
        }
@@ -240,9 +242,8 @@ static int qeth_l2_send_setdelvlan_cb(struct qeth_card *card,
                                 cmd->data.setdelvlan.vlan_id,
                                 CARD_DEVID(card), cmd->hdr.return_code);
                QETH_CARD_TEXT_(card, 2, "L2VL%4x", cmd->hdr.command);
-               QETH_CARD_TEXT_(card, 2, "err%d", cmd->hdr.return_code);
        }
-       return 0;
+       return qeth_l2_setdelvlan_makerc(card, cmd->hdr.return_code);
 }
 
 static int qeth_l2_send_setdelvlan(struct qeth_card *card, __u16 i,
@@ -257,8 +258,7 @@ static int qeth_l2_send_setdelvlan(struct qeth_card *card, __u16 i,
                return -ENOMEM;
        cmd = __ipa_cmd(iob);
        cmd->data.setdelvlan.vlan_id = i;
-       return qeth_setdelvlan_makerc(card, qeth_send_ipa_cmd(card, iob,
-                                           qeth_l2_send_setdelvlan_cb, NULL));
+       return qeth_send_ipa_cmd(card, iob, qeth_l2_send_setdelvlan_cb, NULL);
 }
 
 static int qeth_l2_vlan_rx_add_vid(struct net_device *dev,
@@ -319,6 +319,8 @@ static void qeth_l2_stop_card(struct qeth_card *card, int recovery_mode)
                qeth_clear_cmd_buffers(&card->read);
                qeth_clear_cmd_buffers(&card->write);
        }
+
+       flush_workqueue(card->event_wq);
 }
 
 static int qeth_l2_process_inbound_buffer(struct qeth_card *card,
@@ -366,8 +368,8 @@ static int qeth_l2_process_inbound_buffer(struct qeth_card *card,
                }
                work_done++;
                budget--;
-               card->stats.rx_packets++;
-               card->stats.rx_bytes += len;
+               QETH_CARD_STAT_INC(card, rx_packets);
+               QETH_CARD_STAT_ADD(card, rx_bytes, len);
        }
        return work_done;
 }
@@ -391,7 +393,7 @@ static int qeth_l2_request_initial_mac(struct qeth_card *card)
 
        if (!IS_OSN(card)) {
                rc = qeth_setadpparms_change_macaddr(card);
-               if (!rc && is_valid_ether_addr(card->dev->dev_addr))
+               if (!rc)
                        goto out;
                QETH_DBF_MESSAGE(2, "READ_MAC Assist failed on device %x: %#x\n",
                                 CARD_DEVID(card), rc);
@@ -423,7 +425,7 @@ static int qeth_l2_validate_addr(struct net_device *dev)
 {
        struct qeth_card *card = dev->ml_priv;
 
-       if (IS_OSN(card) || (card->info.mac_bits & QETH_LAYER2_MAC_REGISTERED))
+       if (card->info.mac_bits & QETH_LAYER2_MAC_REGISTERED)
                return eth_validate_addr(dev);
 
        QETH_CARD_TEXT(card, 4, "nomacadr");
@@ -439,9 +441,7 @@ static int qeth_l2_set_mac_address(struct net_device *dev, void *p)
 
        QETH_CARD_TEXT(card, 3, "setmac");
 
-       if (card->info.type == QETH_CARD_TYPE_OSN ||
-           card->info.type == QETH_CARD_TYPE_OSM ||
-           card->info.type == QETH_CARD_TYPE_OSX) {
+       if (IS_OSM(card) || IS_OSX(card)) {
                QETH_CARD_TEXT(card, 3, "setmcTYP");
                return -EOPNOTSUPP;
        }
@@ -535,9 +535,6 @@ static void qeth_l2_set_rx_mode(struct net_device *dev)
        int i;
        int rc;
 
-       if (card->info.type == QETH_CARD_TYPE_OSN)
-               return;
-
        QETH_CARD_TEXT(card, 3, "setmulti");
 
        spin_lock_bh(&card->mclock);
@@ -623,17 +620,13 @@ static netdev_tx_t qeth_l2_hard_start_xmit(struct sk_buff *skb,
        int tx_bytes = skb->len;
        int rc;
 
+       queue = qeth_get_tx_queue(card, skb, ipv, cast_type);
+
        if (card->state != CARD_STATE_UP) {
-               card->stats.tx_carrier_errors++;
+               QETH_TXQ_STAT_INC(queue, tx_carrier_errors);
                goto tx_drop;
        }
 
-       queue = qeth_get_tx_queue(card, skb, ipv, cast_type);
-
-       if (card->options.performance_stats) {
-               card->perf_stats.outbound_cnt++;
-               card->perf_stats.outbound_start_time = qeth_get_micros();
-       }
        netif_stop_queue(dev);
 
        if (IS_OSN(card))
@@ -643,11 +636,8 @@ static netdev_tx_t qeth_l2_hard_start_xmit(struct sk_buff *skb,
                               qeth_l2_fill_header);
 
        if (!rc) {
-               card->stats.tx_packets++;
-               card->stats.tx_bytes += tx_bytes;
-               if (card->options.performance_stats)
-                       card->perf_stats.outbound_time += qeth_get_micros() -
-                               card->perf_stats.outbound_start_time;
+               QETH_TXQ_STAT_INC(queue, tx_packets);
+               QETH_TXQ_STAT_ADD(queue, tx_bytes, tx_bytes);
                netif_wake_queue(dev);
                return NETDEV_TX_OK;
        } else if (rc == -EBUSY) {
@@ -655,8 +645,8 @@ static netdev_tx_t qeth_l2_hard_start_xmit(struct sk_buff *skb,
        } /* else fall through */
 
 tx_drop:
-       card->stats.tx_dropped++;
-       card->stats.tx_errors++;
+       QETH_TXQ_STAT_INC(queue, tx_dropped);
+       QETH_TXQ_STAT_INC(queue, tx_errors);
        dev_kfree_skb_any(skb);
        netif_wake_queue(dev);
        return NETDEV_TX_OK;
@@ -695,30 +685,16 @@ static void qeth_l2_remove_device(struct ccwgroup_device *cgdev)
 
        if (cgdev->state == CCWGROUP_ONLINE)
                qeth_l2_set_offline(cgdev);
+
+       cancel_work_sync(&card->close_dev_work);
        if (qeth_netdev_is_registered(card->dev))
                unregister_netdev(card->dev);
 }
 
-static const struct ethtool_ops qeth_l2_ethtool_ops = {
-       .get_link = ethtool_op_get_link,
-       .get_strings = qeth_core_get_strings,
-       .get_ethtool_stats = qeth_core_get_ethtool_stats,
-       .get_sset_count = qeth_core_get_sset_count,
-       .get_drvinfo = qeth_core_get_drvinfo,
-       .get_link_ksettings = qeth_core_ethtool_get_link_ksettings,
-};
-
-static const struct ethtool_ops qeth_l2_osn_ops = {
-       .get_strings = qeth_core_get_strings,
-       .get_ethtool_stats = qeth_core_get_ethtool_stats,
-       .get_sset_count = qeth_core_get_sset_count,
-       .get_drvinfo = qeth_core_get_drvinfo,
-};
-
 static const struct net_device_ops qeth_l2_netdev_ops = {
        .ndo_open               = qeth_open,
        .ndo_stop               = qeth_stop,
-       .ndo_get_stats          = qeth_get_stats,
+       .ndo_get_stats64        = qeth_get_stats64,
        .ndo_start_xmit         = qeth_l2_hard_start_xmit,
        .ndo_features_check     = qeth_features_check,
        .ndo_validate_addr      = qeth_l2_validate_addr,
@@ -732,20 +708,29 @@ static const struct net_device_ops qeth_l2_netdev_ops = {
        .ndo_set_features       = qeth_set_features
 };
 
+static const struct net_device_ops qeth_osn_netdev_ops = {
+       .ndo_open               = qeth_open,
+       .ndo_stop               = qeth_stop,
+       .ndo_get_stats64        = qeth_get_stats64,
+       .ndo_start_xmit         = qeth_l2_hard_start_xmit,
+       .ndo_validate_addr      = eth_validate_addr,
+       .ndo_tx_timeout         = qeth_tx_timeout,
+};
+
 static int qeth_l2_setup_netdev(struct qeth_card *card, bool carrier_ok)
 {
        int rc;
 
-       card->dev->priv_flags |= IFF_UNICAST_FLT;
-       card->dev->netdev_ops = &qeth_l2_netdev_ops;
-       if (card->info.type == QETH_CARD_TYPE_OSN) {
-               card->dev->ethtool_ops = &qeth_l2_osn_ops;
+       if (IS_OSN(card)) {
+               card->dev->netdev_ops = &qeth_osn_netdev_ops;
                card->dev->flags |= IFF_NOARP;
-       } else {
-               card->dev->ethtool_ops = &qeth_l2_ethtool_ops;
-               card->dev->needed_headroom = sizeof(struct qeth_hdr);
+               goto add_napi;
        }
 
+       card->dev->needed_headroom = sizeof(struct qeth_hdr);
+       card->dev->netdev_ops = &qeth_l2_netdev_ops;
+       card->dev->priv_flags |= IFF_UNICAST_FLT;
+
        if (IS_OSM(card)) {
                card->dev->features |= NETIF_F_VLAN_CHALLENGED;
        } else {
@@ -786,6 +771,7 @@ static int qeth_l2_setup_netdev(struct qeth_card *card, bool carrier_ok)
                                       PAGE_SIZE * (QDIO_MAX_ELEMENTS_PER_BUFFER - 1));
        }
 
+add_napi:
        netif_napi_add(card->dev, &card->napi, qeth_poll, QETH_NAPI_WEIGHT);
        rc = register_netdev(card->dev);
        if (!rc && carrier_ok)
@@ -1120,20 +1106,14 @@ static int qeth_osn_send_control_data(struct qeth_card *card, int len,
 }
 
 static int qeth_osn_send_ipa_cmd(struct qeth_card *card,
-                       struct qeth_cmd_buffer *iob, int data_len)
+                                struct qeth_cmd_buffer *iob)
 {
-       u16 s1, s2;
+       u16 length;
 
        QETH_CARD_TEXT(card, 4, "osndipa");
 
-       qeth_prepare_ipa_cmd(card, iob);
-       s1 = (u16)(IPA_PDU_HEADER_SIZE + data_len);
-       s2 = (u16)data_len;
-       memcpy(QETH_IPA_PDU_LEN_TOTAL(iob->data), &s1, 2);
-       memcpy(QETH_IPA_PDU_LEN_PDU1(iob->data), &s2, 2);
-       memcpy(QETH_IPA_PDU_LEN_PDU2(iob->data), &s2, 2);
-       memcpy(QETH_IPA_PDU_LEN_PDU3(iob->data), &s2, 2);
-       return qeth_osn_send_control_data(card, s1, iob);
+       memcpy(&length, QETH_IPA_PDU_LEN_TOTAL(iob->data), 2);
+       return qeth_osn_send_control_data(card, length, iob);
 }
 
 int qeth_osn_assist(struct net_device *dev, void *data, int data_len)
@@ -1150,8 +1130,9 @@ int qeth_osn_assist(struct net_device *dev, void *data, int data_len)
        if (!qeth_card_hw_is_reachable(card))
                return -ENODEV;
        iob = qeth_wait_for_buffer(&card->write);
+       qeth_prepare_ipa_cmd(card, iob, (u16) data_len);
        memcpy(__ipa_cmd(iob), data, data_len);
-       return qeth_osn_send_ipa_cmd(card, iob, data_len);
+       return qeth_osn_send_ipa_cmd(card, iob);
 }
 EXPORT_SYMBOL(qeth_osn_assist);
 
@@ -1330,7 +1311,7 @@ static void qeth_bridge_state_change(struct qeth_card *card,
        data->card = card;
        memcpy(&data->qports, qports,
                        sizeof(struct qeth_sbp_state_change) + extrasize);
-       queue_work(qeth_wq, &data->worker);
+       queue_work(card->event_wq, &data->worker);
 }
 
 struct qeth_bridge_host_data {
@@ -1402,14 +1383,12 @@ static void qeth_bridge_host_event(struct qeth_card *card,
        data->card = card;
        memcpy(&data->hostevs, hostevs,
                        sizeof(struct qeth_ipacmd_addr_change) + extrasize);
-       queue_work(qeth_wq, &data->worker);
+       queue_work(card->event_wq, &data->worker);
 }
 
 /* SETBRIDGEPORT support; sending commands */
 
 struct _qeth_sbp_cbctl {
-       u16 ipa_rc;
-       u16 cmd_rc;
        union {
                u32 supported;
                struct {
@@ -1419,23 +1398,21 @@ struct _qeth_sbp_cbctl {
        } data;
 };
 
-/**
- * qeth_bridgeport_makerc() - derive "traditional" error from hardware codes.
- * @card:                    qeth_card structure pointer, for debug messages.
- * @cbctl:                   state structure with hardware return codes.
- * @setcmd:                  IPA command code
- *
- * Returns negative errno-compatible error indication or 0 on success.
- */
 static int qeth_bridgeport_makerc(struct qeth_card *card,
-       struct _qeth_sbp_cbctl *cbctl, enum qeth_ipa_sbp_cmd setcmd)
+                                 struct qeth_ipa_cmd *cmd)
 {
+       struct qeth_ipacmd_setbridgeport *sbp = &cmd->data.sbp;
+       enum qeth_ipa_sbp_cmd setcmd = sbp->hdr.command_code;
+       u16 ipa_rc = cmd->hdr.return_code;
+       u16 sbp_rc = sbp->hdr.return_code;
        int rc;
-       int is_iqd = (card->info.type == QETH_CARD_TYPE_IQD);
 
-       if ((is_iqd && (cbctl->ipa_rc == IPA_RC_SUCCESS)) ||
-           (!is_iqd && (cbctl->ipa_rc == cbctl->cmd_rc)))
-               switch (cbctl->cmd_rc) {
+       if (ipa_rc == IPA_RC_SUCCESS && sbp_rc == IPA_RC_SUCCESS)
+               return 0;
+
+       if ((IS_IQD(card) && ipa_rc == IPA_RC_SUCCESS) ||
+           (!IS_IQD(card) && ipa_rc == sbp_rc)) {
+               switch (sbp_rc) {
                case IPA_RC_SUCCESS:
                        rc = 0;
                        break;
@@ -1499,8 +1476,8 @@ static int qeth_bridgeport_makerc(struct qeth_card *card,
                default:
                        rc = -EIO;
                }
-       else
-               switch (cbctl->ipa_rc) {
+       } else {
+               switch (ipa_rc) {
                case IPA_RC_NOTSUPP:
                        rc = -EOPNOTSUPP;
                        break;
@@ -1510,10 +1487,11 @@ static int qeth_bridgeport_makerc(struct qeth_card *card,
                default:
                        rc = -EIO;
                }
+       }
 
        if (rc) {
-               QETH_CARD_TEXT_(card, 2, "SBPi%04x", cbctl->ipa_rc);
-               QETH_CARD_TEXT_(card, 2, "SBPc%04x", cbctl->cmd_rc);
+               QETH_CARD_TEXT_(card, 2, "SBPi%04x", ipa_rc);
+               QETH_CARD_TEXT_(card, 2, "SBPc%04x", sbp_rc);
        }
        return rc;
 }
@@ -1545,15 +1523,15 @@ static int qeth_bridgeport_query_support_cb(struct qeth_card *card,
 {
        struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data;
        struct _qeth_sbp_cbctl *cbctl = (struct _qeth_sbp_cbctl *)reply->param;
+       int rc;
+
        QETH_CARD_TEXT(card, 2, "brqsupcb");
-       cbctl->ipa_rc = cmd->hdr.return_code;
-       cbctl->cmd_rc = cmd->data.sbp.hdr.return_code;
-       if ((cbctl->ipa_rc == 0) && (cbctl->cmd_rc == 0)) {
-               cbctl->data.supported =
-                       cmd->data.sbp.data.query_cmds_supp.supported_cmds;
-       } else {
-               cbctl->data.supported = 0;
-       }
+       rc = qeth_bridgeport_makerc(card, cmd);
+       if (rc)
+               return rc;
+
+       cbctl->data.supported =
+               cmd->data.sbp.data.query_cmds_supp.supported_cmds;
        return 0;
 }
 
@@ -1574,12 +1552,11 @@ static void qeth_bridgeport_query_support(struct qeth_card *card)
                                 sizeof(struct qeth_sbp_query_cmds_supp));
        if (!iob)
                return;
+
        if (qeth_send_ipa_cmd(card, iob, qeth_bridgeport_query_support_cb,
-                                                       (void *)&cbctl) ||
-           qeth_bridgeport_makerc(card, &cbctl,
-                                       IPA_SBP_QUERY_COMMANDS_SUPPORTED)) {
-               /* non-zero makerc signifies failure, and produce messages */
+                             &cbctl)) {
                card->options.sbp.role = QETH_SBP_ROLE_NONE;
+               card->options.sbp.supported_funcs = 0;
                return;
        }
        card->options.sbp.supported_funcs = cbctl.data.supported;
@@ -1591,16 +1568,16 @@ static int qeth_bridgeport_query_ports_cb(struct qeth_card *card,
        struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data;
        struct qeth_sbp_query_ports *qports = &cmd->data.sbp.data.query_ports;
        struct _qeth_sbp_cbctl *cbctl = (struct _qeth_sbp_cbctl *)reply->param;
+       int rc;
 
        QETH_CARD_TEXT(card, 2, "brqprtcb");
-       cbctl->ipa_rc = cmd->hdr.return_code;
-       cbctl->cmd_rc = cmd->data.sbp.hdr.return_code;
-       if ((cbctl->ipa_rc != 0) || (cbctl->cmd_rc != 0))
-               return 0;
+       rc = qeth_bridgeport_makerc(card, cmd);
+       if (rc)
+               return rc;
+
        if (qports->entry_length != sizeof(struct qeth_sbp_port_entry)) {
-               cbctl->cmd_rc = 0xffff;
                QETH_CARD_TEXT_(card, 2, "SBPs%04x", qports->entry_length);
-               return 0;
+               return -EINVAL;
        }
        /* first entry contains the state of the local port */
        if (qports->num_entries > 0) {
@@ -1625,7 +1602,6 @@ static int qeth_bridgeport_query_ports_cb(struct qeth_card *card,
 int qeth_bridgeport_query_ports(struct qeth_card *card,
        enum qeth_sbp_roles *role, enum qeth_sbp_states *state)
 {
-       int rc = 0;
        struct qeth_cmd_buffer *iob;
        struct _qeth_sbp_cbctl cbctl = {
                .data = {
@@ -1642,22 +1618,18 @@ int qeth_bridgeport_query_ports(struct qeth_card *card,
        iob = qeth_sbp_build_cmd(card, IPA_SBP_QUERY_BRIDGE_PORTS, 0);
        if (!iob)
                return -ENOMEM;
-       rc = qeth_send_ipa_cmd(card, iob, qeth_bridgeport_query_ports_cb,
-                               (void *)&cbctl);
-       if (rc < 0)
-               return rc;
-       return qeth_bridgeport_makerc(card, &cbctl, IPA_SBP_QUERY_BRIDGE_PORTS);
+
+       return qeth_send_ipa_cmd(card, iob, qeth_bridgeport_query_ports_cb,
+                                &cbctl);
 }
 
 static int qeth_bridgeport_set_cb(struct qeth_card *card,
        struct qeth_reply *reply, unsigned long data)
 {
        struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *)data;
-       struct _qeth_sbp_cbctl *cbctl = (struct _qeth_sbp_cbctl *)reply->param;
+
        QETH_CARD_TEXT(card, 2, "brsetrcb");
-       cbctl->ipa_rc = cmd->hdr.return_code;
-       cbctl->cmd_rc = cmd->data.sbp.hdr.return_code;
-       return 0;
+       return qeth_bridgeport_makerc(card, cmd);
 }
 
 /**
@@ -1669,10 +1641,8 @@ static int qeth_bridgeport_set_cb(struct qeth_card *card,
  */
 int qeth_bridgeport_setrole(struct qeth_card *card, enum qeth_sbp_roles role)
 {
-       int rc = 0;
        int cmdlength;
        struct qeth_cmd_buffer *iob;
-       struct _qeth_sbp_cbctl cbctl;
        enum qeth_ipa_sbp_cmd setcmd;
 
        QETH_CARD_TEXT(card, 2, "brsetrol");
@@ -1697,11 +1667,8 @@ int qeth_bridgeport_setrole(struct qeth_card *card, enum qeth_sbp_roles role)
        iob = qeth_sbp_build_cmd(card, setcmd, cmdlength);
        if (!iob)
                return -ENOMEM;
-       rc = qeth_send_ipa_cmd(card, iob, qeth_bridgeport_set_cb,
-                               (void *)&cbctl);
-       if (rc < 0)
-               return rc;
-       return qeth_bridgeport_makerc(card, &cbctl, setcmd);
+
+       return qeth_send_ipa_cmd(card, iob, qeth_bridgeport_set_cb, NULL);
 }
 
 /**
@@ -1805,7 +1772,7 @@ static bool qeth_bridgeport_is_in_use(struct qeth_card *card)
 /* VNIC Characteristics support */
 
 /* handle VNICC IPA command return codes; convert to error codes */
-static int qeth_l2_vnicc_makerc(struct qeth_card *card, int ipa_rc)
+static int qeth_l2_vnicc_makerc(struct qeth_card *card, u16 ipa_rc)
 {
        int rc;
 
@@ -1863,7 +1830,7 @@ static int qeth_l2_vnicc_request_cb(struct qeth_card *card,
 
        QETH_CARD_TEXT(card, 2, "vniccrcb");
        if (cmd->hdr.return_code)
-               return 0;
+               return qeth_l2_vnicc_makerc(card, cmd->hdr.return_code);
        /* return results to caller */
        card->options.vnicc.sup_chars = rep->hdr.sup;
        card->options.vnicc.cur_chars = rep->hdr.cur;
@@ -1884,7 +1851,6 @@ static int qeth_l2_vnicc_request(struct qeth_card *card,
        struct qeth_ipacmd_vnicc *req;
        struct qeth_cmd_buffer *iob;
        struct qeth_ipa_cmd *cmd;
-       int rc;
 
        QETH_CARD_TEXT(card, 2, "vniccreq");
 
@@ -1927,10 +1893,7 @@ static int qeth_l2_vnicc_request(struct qeth_card *card,
        }
 
        /* send request */
-       rc = qeth_send_ipa_cmd(card, iob, qeth_l2_vnicc_request_cb,
-                              (void *) cbctl);
-
-       return qeth_l2_vnicc_makerc(card, rc);
+       return qeth_send_ipa_cmd(card, iob, qeth_l2_vnicc_request_cb, cbctl);
 }
 
 /* VNICC query VNIC characteristics request */
index 59535ec..07c3149 100644 (file)
@@ -253,8 +253,7 @@ static int qeth_l3_add_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr)
                } else
                        rc = qeth_l3_register_addr_entry(card, addr);
 
-               if (!rc || (rc == IPA_RC_DUPLICATE_IP_ADDRESS) ||
-                               (rc == IPA_RC_LAN_OFFLINE)) {
+               if (!rc || rc == -EADDRINUSE || rc == -ENETDOWN) {
                        addr->disp_flag = QETH_DISP_ADDR_DO_NOTHING;
                        if (addr->ref_counter < 1) {
                                qeth_l3_deregister_addr_entry(card, addr);
@@ -338,10 +337,28 @@ static void qeth_l3_recover_ip(struct qeth_card *card)
 
 }
 
+static int qeth_l3_setdelip_cb(struct qeth_card *card, struct qeth_reply *reply,
+                              unsigned long data)
+{
+       struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data;
+
+       switch (cmd->hdr.return_code) {
+       case IPA_RC_SUCCESS:
+               return 0;
+       case IPA_RC_DUPLICATE_IP_ADDRESS:
+               return -EADDRINUSE;
+       case IPA_RC_MC_ADDR_NOT_FOUND:
+               return -ENOENT;
+       case IPA_RC_LAN_OFFLINE:
+               return -ENETDOWN;
+       default:
+               return -EIO;
+       }
+}
+
 static int qeth_l3_send_setdelmc(struct qeth_card *card,
                        struct qeth_ipaddr *addr, int ipacmd)
 {
-       int rc;
        struct qeth_cmd_buffer *iob;
        struct qeth_ipa_cmd *cmd;
 
@@ -358,9 +375,7 @@ static int qeth_l3_send_setdelmc(struct qeth_card *card,
        else
                memcpy(&cmd->data.setdelipm.ip4, &addr->u.a4.addr, 4);
 
-       rc = qeth_send_ipa_cmd(card, iob, NULL, NULL);
-
-       return rc;
+       return qeth_send_ipa_cmd(card, iob, qeth_l3_setdelip_cb, NULL);
 }
 
 static void qeth_l3_fill_netmask(u8 *netmask, unsigned int len)
@@ -422,7 +437,7 @@ static int qeth_l3_send_setdelip(struct qeth_card *card,
                cmd->data.setdelip4.flags = flags;
        }
 
-       return qeth_send_ipa_cmd(card, iob, NULL, NULL);
+       return qeth_send_ipa_cmd(card, iob, qeth_l3_setdelip_cb, NULL);
 }
 
 static int qeth_l3_send_setrouting(struct qeth_card *card,
@@ -942,12 +957,13 @@ static int qeth_l3_start_ipassists(struct qeth_card *card)
 static int qeth_l3_iqd_read_initial_mac_cb(struct qeth_card *card,
                struct qeth_reply *reply, unsigned long data)
 {
-       struct qeth_ipa_cmd *cmd;
+       struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data;
 
-       cmd = (struct qeth_ipa_cmd *) data;
-       if (cmd->hdr.return_code == 0)
-               ether_addr_copy(card->dev->dev_addr,
-                               cmd->data.create_destroy_addr.unique_id);
+       if (cmd->hdr.return_code)
+               return -EIO;
+
+       ether_addr_copy(card->dev->dev_addr,
+                       cmd->data.create_destroy_addr.unique_id);
        return 0;
 }
 
@@ -975,19 +991,18 @@ static int qeth_l3_iqd_read_initial_mac(struct qeth_card *card)
 static int qeth_l3_get_unique_id_cb(struct qeth_card *card,
                struct qeth_reply *reply, unsigned long data)
 {
-       struct qeth_ipa_cmd *cmd;
+       struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data;
 
-       cmd = (struct qeth_ipa_cmd *) data;
-       if (cmd->hdr.return_code == 0)
+       if (cmd->hdr.return_code == 0) {
                card->info.unique_id = *((__u16 *)
                                &cmd->data.create_destroy_addr.unique_id[6]);
-       else {
-               card->info.unique_id =  UNIQUE_ID_IF_CREATE_ADDR_FAILED |
-                                       UNIQUE_ID_NOT_BY_CARD;
-               dev_warn(&card->gdev->dev, "The network adapter failed to "
-                       "generate a unique ID\n");
+               return 0;
        }
-       return 0;
+
+       card->info.unique_id = UNIQUE_ID_IF_CREATE_ADDR_FAILED |
+                              UNIQUE_ID_NOT_BY_CARD;
+       dev_warn(&card->gdev->dev, "The network adapter failed to generate a unique ID\n");
+       return -EIO;
 }
 
 static int qeth_l3_get_unique_id(struct qeth_card *card)
@@ -1070,7 +1085,7 @@ qeth_diags_trace_cb(struct qeth_card *card, struct qeth_reply *reply,
                                 cmd->data.diagass.action, CARD_DEVID(card));
        }
 
-       return 0;
+       return rc ? -EIO : 0;
 }
 
 static int
@@ -1300,12 +1315,11 @@ static void qeth_l3_rebuild_skb(struct qeth_card *card, struct sk_buff *skb,
                                ip_eth_mc_map(ip_hdr(skb)->daddr, tg_addr);
                        else
                                ipv6_eth_mc_map(&ipv6_hdr(skb)->daddr, tg_addr);
-
-                       card->stats.multicast++;
+                       QETH_CARD_STAT_INC(card, rx_multicast);
                        break;
                case QETH_CAST_BROADCAST:
                        ether_addr_copy(tg_addr, card->dev->broadcast);
-                       card->stats.multicast++;
+                       QETH_CARD_STAT_INC(card, rx_multicast);
                        break;
                default:
                        if (card->options.sniffer)
@@ -1386,8 +1400,8 @@ static int qeth_l3_process_inbound_buffer(struct qeth_card *card,
                }
                work_done++;
                budget--;
-               card->stats.rx_packets++;
-               card->stats.rx_bytes += len;
+               QETH_CARD_STAT_INC(card, rx_packets);
+               QETH_CARD_STAT_ADD(card, rx_bytes, len);
        }
        return work_done;
 }
@@ -1428,6 +1442,8 @@ static void qeth_l3_stop_card(struct qeth_card *card, int recovery_mode)
                qeth_clear_cmd_buffers(&card->read);
                qeth_clear_cmd_buffers(&card->write);
        }
+
+       flush_workqueue(card->event_wq);
 }
 
 /*
@@ -1479,14 +1495,14 @@ static void qeth_l3_set_rx_mode(struct net_device *dev)
                        switch (addr->disp_flag) {
                        case QETH_DISP_ADDR_DELETE:
                                rc = qeth_l3_deregister_addr_entry(card, addr);
-                               if (!rc || rc == IPA_RC_MC_ADDR_NOT_FOUND) {
+                               if (!rc || rc == -ENOENT) {
                                        hash_del(&addr->hnode);
                                        kfree(addr);
                                }
                                break;
                        case QETH_DISP_ADDR_ADD:
                                rc = qeth_l3_register_addr_entry(card, addr);
-                               if (rc && rc != IPA_RC_LAN_OFFLINE) {
+                               if (rc && rc != -ENETDOWN) {
                                        hash_del(&addr->hnode);
                                        kfree(addr);
                                        break;
@@ -1507,7 +1523,7 @@ static void qeth_l3_set_rx_mode(struct net_device *dev)
        qeth_l3_handle_promisc_mode(card);
 }
 
-static int qeth_l3_arp_makerc(int rc)
+static int qeth_l3_arp_makerc(u16 rc)
 {
        switch (rc) {
        case IPA_RC_SUCCESS:
@@ -1524,8 +1540,18 @@ static int qeth_l3_arp_makerc(int rc)
        }
 }
 
+static int qeth_l3_arp_cmd_cb(struct qeth_card *card, struct qeth_reply *reply,
+                             unsigned long data)
+{
+       struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data;
+
+       qeth_setassparms_cb(card, reply, data);
+       return qeth_l3_arp_makerc(cmd->hdr.return_code);
+}
+
 static int qeth_l3_arp_set_no_entries(struct qeth_card *card, int no_entries)
 {
+       struct qeth_cmd_buffer *iob;
        int rc;
 
        QETH_CARD_TEXT(card, 3, "arpstnoe");
@@ -1540,13 +1566,19 @@ static int qeth_l3_arp_set_no_entries(struct qeth_card *card, int no_entries)
        if (!qeth_is_supported(card, IPA_ARP_PROCESSING)) {
                return -EOPNOTSUPP;
        }
-       rc = qeth_send_simple_setassparms(card, IPA_ARP_PROCESSING,
-                                         IPA_CMD_ASS_ARP_SET_NO_ENTRIES,
-                                         no_entries);
+
+       iob = qeth_get_setassparms_cmd(card, IPA_ARP_PROCESSING,
+                                      IPA_CMD_ASS_ARP_SET_NO_ENTRIES, 4,
+                                      QETH_PROT_IPV4);
+       if (!iob)
+               return -ENOMEM;
+
+       __ipa_cmd(iob)->data.setassparms.data.flags_32bit = (u32) no_entries;
+       rc = qeth_send_ipa_cmd(card, iob, qeth_l3_arp_cmd_cb, NULL);
        if (rc)
                QETH_DBF_MESSAGE(2, "Could not set number of ARP entries on device %x: %#x\n",
                                 CARD_DEVID(card), rc);
-       return qeth_l3_arp_makerc(rc);
+       return rc;
 }
 
 static __u32 get_arp_entry_size(struct qeth_card *card,
@@ -1597,7 +1629,6 @@ static int qeth_l3_arp_query_cb(struct qeth_card *card,
        struct qeth_ipa_cmd *cmd;
        struct qeth_arp_query_data *qdata;
        struct qeth_arp_query_info *qinfo;
-       int i;
        int e;
        int entrybytes_done;
        int stripped_bytes;
@@ -1611,13 +1642,13 @@ static int qeth_l3_arp_query_cb(struct qeth_card *card,
        if (cmd->hdr.return_code) {
                QETH_CARD_TEXT(card, 4, "arpcberr");
                QETH_CARD_TEXT_(card, 4, "%i", cmd->hdr.return_code);
-               return 0;
+               return qeth_l3_arp_makerc(cmd->hdr.return_code);
        }
        if (cmd->data.setassparms.hdr.return_code) {
                cmd->hdr.return_code = cmd->data.setassparms.hdr.return_code;
                QETH_CARD_TEXT(card, 4, "setaperr");
                QETH_CARD_TEXT_(card, 4, "%i", cmd->hdr.return_code);
-               return 0;
+               return qeth_l3_arp_makerc(cmd->hdr.return_code);
        }
        qdata = &cmd->data.setassparms.data.query_arp;
        QETH_CARD_TEXT_(card, 4, "anoen%i", qdata->no_entries);
@@ -1644,9 +1675,9 @@ static int qeth_l3_arp_query_cb(struct qeth_card *card,
                        break;
 
                if ((qinfo->udata_len - qinfo->udata_offset) < esize) {
-                       QETH_CARD_TEXT_(card, 4, "qaer3%i", -ENOMEM);
-                       cmd->hdr.return_code = IPA_RC_ENOMEM;
-                       goto out_error;
+                       QETH_CARD_TEXT_(card, 4, "qaer3%i", -ENOSPC);
+                       memset(qinfo->udata, 0, 4);
+                       return -ENOSPC;
                }
 
                memcpy(qinfo->udata + qinfo->udata_offset,
@@ -1669,10 +1700,6 @@ static int qeth_l3_arp_query_cb(struct qeth_card *card,
        memcpy(qinfo->udata + QETH_QARP_MASK_OFFSET, &qdata->reply_bits, 2);
        QETH_CARD_TEXT_(card, 4, "rc%i", 0);
        return 0;
-out_error:
-       i = 0;
-       memcpy(qinfo->udata, &i, 4);
-       return 0;
 }
 
 static int qeth_l3_query_arp_cache_info(struct qeth_card *card,
@@ -1694,13 +1721,11 @@ static int qeth_l3_query_arp_cache_info(struct qeth_card *card,
                return -ENOMEM;
        cmd = __ipa_cmd(iob);
        cmd->data.setassparms.data.query_arp.request_bits = 0x000F;
-       rc = qeth_send_control_data(card,
-                                   QETH_SETASS_BASE_LEN + QETH_ARP_CMD_LEN,
-                                   iob, qeth_l3_arp_query_cb, qinfo);
+       rc = qeth_send_ipa_cmd(card, iob, qeth_l3_arp_query_cb, qinfo);
        if (rc)
                QETH_DBF_MESSAGE(2, "Error while querying ARP cache on device %x: %#x\n",
                                 CARD_DEVID(card), rc);
-       return qeth_l3_arp_makerc(rc);
+       return rc;
 }
 
 static int qeth_l3_arp_query(struct qeth_card *card, char __user *udata)
@@ -1782,16 +1807,16 @@ static int qeth_l3_arp_modify_entry(struct qeth_card *card,
        cmd_entry = &__ipa_cmd(iob)->data.setassparms.data.arp_entry;
        ether_addr_copy(cmd_entry->macaddr, entry->macaddr);
        memcpy(cmd_entry->ipaddr, entry->ipaddr, 4);
-       rc = qeth_send_ipa_cmd(card, iob, qeth_setassparms_cb, NULL);
+       rc = qeth_send_ipa_cmd(card, iob, qeth_l3_arp_cmd_cb, NULL);
        if (rc)
                QETH_DBF_MESSAGE(2, "Could not modify (cmd: %#x) ARP entry on device %x: %#x\n",
                                 arp_cmd, CARD_DEVID(card), rc);
-
-       return qeth_l3_arp_makerc(rc);
+       return rc;
 }
 
 static int qeth_l3_arp_flush_cache(struct qeth_card *card)
 {
+       struct qeth_cmd_buffer *iob;
        int rc;
 
        QETH_CARD_TEXT(card, 3, "arpflush");
@@ -1806,12 +1831,18 @@ static int qeth_l3_arp_flush_cache(struct qeth_card *card)
        if (!qeth_is_supported(card, IPA_ARP_PROCESSING)) {
                return -EOPNOTSUPP;
        }
-       rc = qeth_send_simple_setassparms(card, IPA_ARP_PROCESSING,
-                                         IPA_CMD_ASS_ARP_FLUSH_CACHE, 0);
+
+       iob = qeth_get_setassparms_cmd(card, IPA_ARP_PROCESSING,
+                                      IPA_CMD_ASS_ARP_FLUSH_CACHE, 0,
+                                      QETH_PROT_IPV4);
+       if (!iob)
+               return -ENOMEM;
+
+       rc = qeth_send_ipa_cmd(card, iob, qeth_l3_arp_cmd_cb, NULL);
        if (rc)
                QETH_DBF_MESSAGE(2, "Could not flush ARP cache on device %x: %#x\n",
                                 CARD_DEVID(card), rc);
-       return qeth_l3_arp_makerc(rc);
+       return rc;
 }
 
 static int qeth_l3_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
@@ -1913,12 +1944,13 @@ static u8 qeth_l3_cast_type_to_flag(int cast_type)
        return QETH_CAST_UNICAST;
 }
 
-static void qeth_l3_fill_header(struct qeth_card *card, struct qeth_hdr *hdr,
-                               struct sk_buff *skb, int ipv, int cast_type,
-                               unsigned int data_len)
+static void qeth_l3_fill_header(struct qeth_qdio_out_q *queue,
+                               struct qeth_hdr *hdr, struct sk_buff *skb,
+                               int ipv, int cast_type, unsigned int data_len)
 {
        struct qeth_hdr_layer3 *l3_hdr = &hdr->hdr.l3;
        struct vlan_ethhdr *veth = vlan_eth_hdr(skb);
+       struct qeth_card *card = queue->card;
 
        hdr->hdr.l3.length = data_len;
 
@@ -1940,8 +1972,7 @@ static void qeth_l3_fill_header(struct qeth_card *card, struct qeth_hdr *hdr,
                        /* some HW requires combined L3+L4 csum offload: */
                        if (ipv == 4)
                                hdr->hdr.l3.ext_flags |= QETH_HDR_EXT_CSUM_HDR_REQ;
-                       if (card->options.performance_stats)
-                               card->perf_stats.tx_csum++;
+                       QETH_TXQ_STAT_INC(queue, skbs_csum);
                }
        }
 
@@ -2042,6 +2073,8 @@ static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb,
        int tx_bytes = skb->len;
        int rc;
 
+       queue = qeth_get_tx_queue(card, skb, ipv, cast_type);
+
        if (IS_IQD(card)) {
                if (card->options.sniffer)
                        goto tx_drop;
@@ -2052,19 +2085,13 @@ static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb,
        }
 
        if (card->state != CARD_STATE_UP) {
-               card->stats.tx_carrier_errors++;
+               QETH_TXQ_STAT_INC(queue, tx_carrier_errors);
                goto tx_drop;
        }
 
        if (cast_type == RTN_BROADCAST && !card->info.broadcast_capable)
                goto tx_drop;
 
-       queue = qeth_get_tx_queue(card, skb, ipv, cast_type);
-
-       if (card->options.performance_stats) {
-               card->perf_stats.outbound_cnt++;
-               card->perf_stats.outbound_start_time = qeth_get_micros();
-       }
        netif_stop_queue(dev);
 
        if (ipv == 4 || IS_IQD(card))
@@ -2074,11 +2101,8 @@ static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb,
                               qeth_l3_fill_header);
 
        if (!rc) {
-               card->stats.tx_packets++;
-               card->stats.tx_bytes += tx_bytes;
-               if (card->options.performance_stats)
-                       card->perf_stats.outbound_time += qeth_get_micros() -
-                               card->perf_stats.outbound_start_time;
+               QETH_TXQ_STAT_INC(queue, tx_packets);
+               QETH_TXQ_STAT_ADD(queue, tx_bytes, tx_bytes);
                netif_wake_queue(dev);
                return NETDEV_TX_OK;
        } else if (rc == -EBUSY) {
@@ -2086,22 +2110,13 @@ static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb,
        } /* else fall through */
 
 tx_drop:
-       card->stats.tx_dropped++;
-       card->stats.tx_errors++;
+       QETH_TXQ_STAT_INC(queue, tx_dropped);
+       QETH_TXQ_STAT_INC(queue, tx_errors);
        dev_kfree_skb_any(skb);
        netif_wake_queue(dev);
        return NETDEV_TX_OK;
 }
 
-static const struct ethtool_ops qeth_l3_ethtool_ops = {
-       .get_link = ethtool_op_get_link,
-       .get_strings = qeth_core_get_strings,
-       .get_ethtool_stats = qeth_core_get_ethtool_stats,
-       .get_sset_count = qeth_core_get_sset_count,
-       .get_drvinfo = qeth_core_get_drvinfo,
-       .get_link_ksettings = qeth_core_ethtool_get_link_ksettings,
-};
-
 /*
  * we need NOARP for IPv4 but we want neighbor solicitation for IPv6. Setting
  * NOARP on the netdevice is no option because it also turns off neighbor
@@ -2138,7 +2153,7 @@ static netdev_features_t qeth_l3_osa_features_check(struct sk_buff *skb,
 static const struct net_device_ops qeth_l3_netdev_ops = {
        .ndo_open               = qeth_open,
        .ndo_stop               = qeth_stop,
-       .ndo_get_stats          = qeth_get_stats,
+       .ndo_get_stats64        = qeth_get_stats64,
        .ndo_start_xmit         = qeth_l3_hard_start_xmit,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_rx_mode        = qeth_l3_set_rx_mode,
@@ -2153,7 +2168,7 @@ static const struct net_device_ops qeth_l3_netdev_ops = {
 static const struct net_device_ops qeth_l3_osa_netdev_ops = {
        .ndo_open               = qeth_open,
        .ndo_stop               = qeth_stop,
-       .ndo_get_stats          = qeth_get_stats,
+       .ndo_get_stats64        = qeth_get_stats64,
        .ndo_start_xmit         = qeth_l3_hard_start_xmit,
        .ndo_features_check     = qeth_l3_osa_features_check,
        .ndo_validate_addr      = eth_validate_addr,
@@ -2223,7 +2238,6 @@ static int qeth_l3_setup_netdev(struct qeth_card *card, bool carrier_ok)
                return -ENODEV;
 
        card->dev->needed_headroom = headroom;
-       card->dev->ethtool_ops = &qeth_l3_ethtool_ops;
        card->dev->features |=  NETIF_F_HW_VLAN_CTAG_TX |
                                NETIF_F_HW_VLAN_CTAG_RX |
                                NETIF_F_HW_VLAN_CTAG_FILTER;
@@ -2278,6 +2292,7 @@ static void qeth_l3_remove_device(struct ccwgroup_device *cgdev)
        if (cgdev->state == CCWGROUP_ONLINE)
                qeth_l3_set_offline(cgdev);
 
+       cancel_work_sync(&card->close_dev_work);
        if (qeth_netdev_is_registered(card->dev))
                unregister_netdev(card->dev);
        qeth_l3_clear_ip_htable(card, 0);
index 9cf30d1..e390f8c 100644 (file)
@@ -403,7 +403,6 @@ struct zfcp_adapter *zfcp_adapter_enqueue(struct ccw_device *ccw_device)
                goto failed;
 
        /* report size limit per scatter-gather segment */
-       adapter->dma_parms.max_segment_size = ZFCP_QDIO_SBALE_LEN;
        adapter->ccw_device->dev.dma_parms = &adapter->dma_parms;
 
        adapter->stat_read_buf_num = FSF_STATUS_READS_RECOM;
index 00acc71..f4f6a07 100644 (file)
@@ -428,6 +428,8 @@ static struct scsi_host_template zfcp_scsi_host_template = {
        .max_sectors             = (((QDIO_MAX_ELEMENTS_PER_BUFFER - 1)
                                     * ZFCP_QDIO_MAX_SBALS_PER_REQ) - 2) * 8,
                                   /* GCD, adjusted later */
+       /* report size limit per scatter-gather segment */
+       .max_segment_size        = ZFCP_QDIO_SBALE_LEN,
        .dma_boundary            = ZFCP_QDIO_SBALE_LEN - 1,
        .shost_attrs             = zfcp_sysfs_shost_attrs,
        .sdev_attrs              = zfcp_sysfs_sdev_attrs,
index 128d658..16957d7 100644 (file)
@@ -295,7 +295,7 @@ NCR_700_detect(struct scsi_host_template *tpnt,
        if(tpnt->sdev_attrs == NULL)
                tpnt->sdev_attrs = NCR_700_dev_attrs;
 
-       memory = dma_alloc_attrs(hostdata->dev, TOTAL_MEM_SIZE, &pScript,
+       memory = dma_alloc_attrs(dev, TOTAL_MEM_SIZE, &pScript,
                                 GFP_KERNEL, DMA_ATTR_NON_CONSISTENT);
        if(memory == NULL) {
                printk(KERN_ERR "53c700: Failed to allocate memory for driver, detaching\n");
index f83f79b..07efcb9 100644 (file)
@@ -280,7 +280,7 @@ static ssize_t asd_show_dev_rev(struct device *dev,
        return snprintf(buf, PAGE_SIZE, "%s\n",
                        asd_dev_rev[asd_ha->revision_id]);
 }
-static DEVICE_ATTR(revision, S_IRUGO, asd_show_dev_rev, NULL);
+static DEVICE_ATTR(aic_revision, S_IRUGO, asd_show_dev_rev, NULL);
 
 static ssize_t asd_show_dev_bios_build(struct device *dev,
                                       struct device_attribute *attr,char *buf)
@@ -477,7 +477,7 @@ static int asd_create_dev_attrs(struct asd_ha_struct *asd_ha)
 {
        int err;
 
-       err = device_create_file(&asd_ha->pcidev->dev, &dev_attr_revision);
+       err = device_create_file(&asd_ha->pcidev->dev, &dev_attr_aic_revision);
        if (err)
                return err;
 
@@ -499,13 +499,13 @@ err_update_bios:
 err_biosb:
        device_remove_file(&asd_ha->pcidev->dev, &dev_attr_bios_build);
 err_rev:
-       device_remove_file(&asd_ha->pcidev->dev, &dev_attr_revision);
+       device_remove_file(&asd_ha->pcidev->dev, &dev_attr_aic_revision);
        return err;
 }
 
 static void asd_remove_dev_attrs(struct asd_ha_struct *asd_ha)
 {
-       device_remove_file(&asd_ha->pcidev->dev, &dev_attr_revision);
+       device_remove_file(&asd_ha->pcidev->dev, &dev_attr_aic_revision);
        device_remove_file(&asd_ha->pcidev->dev, &dev_attr_bios_build);
        device_remove_file(&asd_ha->pcidev->dev, &dev_attr_pcba_sn);
        device_remove_file(&asd_ha->pcidev->dev, &dev_attr_update_bios);
index 350257c..bc9f2a2 100644 (file)
@@ -240,6 +240,7 @@ struct bnx2fc_cmd_mgr *bnx2fc_cmd_mgr_alloc(struct bnx2fc_hba *hba)
                return NULL;
        }
 
+       cmgr->hba = hba;
        cmgr->free_list = kcalloc(arr_sz, sizeof(*cmgr->free_list),
                                  GFP_KERNEL);
        if (!cmgr->free_list) {
@@ -256,7 +257,6 @@ struct bnx2fc_cmd_mgr *bnx2fc_cmd_mgr_alloc(struct bnx2fc_hba *hba)
                goto mem_err;
        }
 
-       cmgr->hba = hba;
        cmgr->cmds = (struct bnx2fc_cmd **)(cmgr + 1);
 
        for (i = 0; i < arr_sz; i++)  {
@@ -295,7 +295,7 @@ struct bnx2fc_cmd_mgr *bnx2fc_cmd_mgr_alloc(struct bnx2fc_hba *hba)
 
        /* Allocate pool of io_bdts - one for each bnx2fc_cmd */
        mem_size = num_ios * sizeof(struct io_bdt *);
-       cmgr->io_bdt_pool = kmalloc(mem_size, GFP_KERNEL);
+       cmgr->io_bdt_pool = kzalloc(mem_size, GFP_KERNEL);
        if (!cmgr->io_bdt_pool) {
                printk(KERN_ERR PFX "failed to alloc io_bdt_pool\n");
                goto mem_err;
index bfa13e3..c8bad2c 100644 (file)
@@ -3687,6 +3687,7 @@ static int cxlflash_probe(struct pci_dev *pdev,
        host->max_cmd_len = CXLFLASH_MAX_CDB_LEN;
 
        cfg = shost_priv(host);
+       cfg->state = STATE_PROBING;
        cfg->host = host;
        rc = alloc_mem(cfg);
        if (rc) {
@@ -3775,6 +3776,7 @@ out:
        return rc;
 
 out_remove:
+       cfg->state = STATE_PROBED;
        cxlflash_remove(pdev);
        goto out;
 }
index be83590..ff943f4 100644 (file)
@@ -1726,14 +1726,14 @@ void fc_lport_flogi_resp(struct fc_seq *sp, struct fc_frame *fp,
            fc_frame_payload_op(fp) != ELS_LS_ACC) {
                FC_LPORT_DBG(lport, "FLOGI not accepted or bad response\n");
                fc_lport_error(lport, fp);
-               goto err;
+               goto out;
        }
 
        flp = fc_frame_payload_get(fp, sizeof(*flp));
        if (!flp) {
                FC_LPORT_DBG(lport, "FLOGI bad response\n");
                fc_lport_error(lport, fp);
-               goto err;
+               goto out;
        }
 
        mfs = ntohs(flp->fl_csp.sp_bb_data) &
@@ -1743,7 +1743,7 @@ void fc_lport_flogi_resp(struct fc_seq *sp, struct fc_frame *fp,
                FC_LPORT_DBG(lport, "FLOGI bad mfs:%hu response, "
                             "lport->mfs:%hu\n", mfs, lport->mfs);
                fc_lport_error(lport, fp);
-               goto err;
+               goto out;
        }
 
        if (mfs <= lport->mfs) {
index 9192a1d..dfba492 100644 (file)
@@ -184,7 +184,6 @@ void fc_rport_destroy(struct kref *kref)
        struct fc_rport_priv *rdata;
 
        rdata = container_of(kref, struct fc_rport_priv, kref);
-       WARN_ON(!list_empty(&rdata->peers));
        kfree_rcu(rdata, rcu);
 }
 EXPORT_SYMBOL(fc_rport_destroy);
index 661512b..e27f4df 100644 (file)
@@ -62,7 +62,7 @@
 
 /* make sure inq_product_rev string corresponds to this version */
 #define SDEBUG_VERSION "0188"  /* format to fit INQUIRY revision field */
-static const char *sdebug_version_date = "20180128";
+static const char *sdebug_version_date = "20190125";
 
 #define MY_NAME "scsi_debug"
 
@@ -735,7 +735,7 @@ static inline bool scsi_debug_lbp(void)
                (sdebug_lbpu || sdebug_lbpws || sdebug_lbpws10);
 }
 
-static void *fake_store(unsigned long long lba)
+static void *lba2fake_store(unsigned long long lba)
 {
        lba = do_div(lba, sdebug_store_sectors);
 
@@ -2514,8 +2514,8 @@ static int do_device_access(struct scsi_cmnd *scmd, u32 sg_skip, u64 lba,
        return ret;
 }
 
-/* If fake_store(lba,num) compares equal to arr(num), then copy top half of
- * arr into fake_store(lba,num) and return true. If comparison fails then
+/* If lba2fake_store(lba,num) compares equal to arr(num), then copy top half of
+ * arr into lba2fake_store(lba,num) and return true. If comparison fails then
  * return false. */
 static bool comp_write_worker(u64 lba, u32 num, const u8 *arr)
 {
@@ -2643,7 +2643,7 @@ static int prot_verify_read(struct scsi_cmnd *SCpnt, sector_t start_sec,
                if (sdt->app_tag == cpu_to_be16(0xffff))
                        continue;
 
-               ret = dif_verify(sdt, fake_store(sector), sector, ei_lba);
+               ret = dif_verify(sdt, lba2fake_store(sector), sector, ei_lba);
                if (ret) {
                        dif_errors++;
                        return ret;
@@ -3261,10 +3261,12 @@ err_out:
 static int resp_write_same(struct scsi_cmnd *scp, u64 lba, u32 num,
                           u32 ei_lba, bool unmap, bool ndob)
 {
+       int ret;
        unsigned long iflags;
        unsigned long long i;
-       int ret;
-       u64 lba_off;
+       u32 lb_size = sdebug_sector_size;
+       u64 block, lbaa;
+       u8 *fs1p;
 
        ret = check_device_access_params(scp, lba, num);
        if (ret)
@@ -3276,31 +3278,30 @@ static int resp_write_same(struct scsi_cmnd *scp, u64 lba, u32 num,
                unmap_region(lba, num);
                goto out;
        }
-
-       lba_off = lba * sdebug_sector_size;
+       lbaa = lba;
+       block = do_div(lbaa, sdebug_store_sectors);
        /* if ndob then zero 1 logical block, else fetch 1 logical block */
+       fs1p = fake_storep + (block * lb_size);
        if (ndob) {
-               memset(fake_storep + lba_off, 0, sdebug_sector_size);
+               memset(fs1p, 0, lb_size);
                ret = 0;
        } else
-               ret = fetch_to_dev_buffer(scp, fake_storep + lba_off,
-                                         sdebug_sector_size);
+               ret = fetch_to_dev_buffer(scp, fs1p, lb_size);
 
        if (-1 == ret) {
                write_unlock_irqrestore(&atomic_rw, iflags);
                return DID_ERROR << 16;
-       } else if (sdebug_verbose && !ndob && (ret < sdebug_sector_size))
+       } else if (sdebug_verbose && !ndob && (ret < lb_size))
                sdev_printk(KERN_INFO, scp->device,
                            "%s: %s: lb size=%u, IO sent=%d bytes\n",
-                           my_name, "write same",
-                           sdebug_sector_size, ret);
+                           my_name, "write same", lb_size, ret);
 
        /* Copy first sector to remaining blocks */
-       for (i = 1 ; i < num ; i++)
-               memcpy(fake_storep + ((lba + i) * sdebug_sector_size),
-                      fake_storep + lba_off,
-                      sdebug_sector_size);
-
+       for (i = 1 ; i < num ; i++) {
+               lbaa = lba + i;
+               block = do_div(lbaa, sdebug_store_sectors);
+               memmove(fake_storep + (block * lb_size), fs1p, lb_size);
+       }
        if (scsi_debug_lbp())
                map_region(lba, num);
 out:
index 83365b2..fff8694 100644 (file)
@@ -462,12 +462,16 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
        sdkp->device->use_10_for_rw = 0;
 
        /*
-        * If something changed, revalidate the disk zone bitmaps once we have
-        * the capacity, that is on the second revalidate execution during disk
-        * scan and always during normal revalidate.
+        * Revalidate the disk zone bitmaps once the block device capacity is
+        * set on the second revalidate execution during disk scan and if
+        * something changed when executing a normal revalidate.
         */
-       if (sdkp->first_scan)
+       if (sdkp->first_scan) {
+               sdkp->zone_blocks = zone_blocks;
+               sdkp->nr_zones = nr_zones;
                return 0;
+       }
+
        if (sdkp->zone_blocks != zone_blocks ||
            sdkp->nr_zones != nr_zones ||
            disk->queue->nr_zones != nr_zones) {
index 52c153c..636f83f 100644 (file)
@@ -1143,18 +1143,19 @@ static void qm_mr_process_task(struct work_struct *work);
 static irqreturn_t portal_isr(int irq, void *ptr)
 {
        struct qman_portal *p = ptr;
-
-       u32 clear = QM_DQAVAIL_MASK | p->irq_sources;
        u32 is = qm_in(&p->p, QM_REG_ISR) & p->irq_sources;
+       u32 clear = 0;
 
        if (unlikely(!is))
                return IRQ_NONE;
 
        /* DQRR-handling if it's interrupt-driven */
-       if (is & QM_PIRQ_DQRI)
+       if (is & QM_PIRQ_DQRI) {
                __poll_portal_fast(p, QMAN_POLL_LIMIT);
+               clear = QM_DQAVAIL_MASK | QM_PIRQ_DQRI;
+       }
        /* Handling of anything else that's interrupt-driven */
-       clear |= __poll_portal_slow(p, is);
+       clear |= __poll_portal_slow(p, is) & QM_PIRQ_SLOW;
        qm_out(&p->p, QM_REG_ISR, clear);
        return IRQ_HANDLED;
 }
index e559f4c..1b3943b 100644 (file)
@@ -643,14 +643,7 @@ static void ethsw_teardown_irqs(struct fsl_mc_device *sw_dev)
 static int swdev_port_attr_get(struct net_device *netdev,
                               struct switchdev_attr *attr)
 {
-       struct ethsw_port_priv *port_priv = netdev_priv(netdev);
-
        switch (attr->id) {
-       case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS:
-               attr->u.brport_flags =
-                       (port_priv->ethsw_data->learning ? BR_LEARNING : 0) |
-                       (port_priv->flood ? BR_FLOOD : 0);
-               break;
        case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT:
                attr->u.brport_flags_support = BR_LEARNING | BR_FLOOD;
                break;
index 2848fa7..d6248ee 100644 (file)
@@ -170,7 +170,7 @@ int cvm_oct_phy_setup_device(struct net_device *dev)
                return -ENODEV;
 
        priv->last_link = 0;
-       phy_start_aneg(phydev);
+       phy_start(phydev);
 
        return 0;
 no_phy:
index c92bbd0..005de00 100644 (file)
@@ -265,7 +265,8 @@ static void spk_ttyio_send_xchar(char ch)
                return;
        }
 
-       speakup_tty->ops->send_xchar(speakup_tty, ch);
+       if (speakup_tty->ops->send_xchar)
+               speakup_tty->ops->send_xchar(speakup_tty, ch);
        mutex_unlock(&speakup_tty_mutex);
 }
 
@@ -277,7 +278,8 @@ static void spk_ttyio_tiocmset(unsigned int set, unsigned int clear)
                return;
        }
 
-       speakup_tty->ops->tiocmset(speakup_tty, set, clear);
+       if (speakup_tty->ops->tiocmset)
+               speakup_tty->ops->tiocmset(speakup_tty, set, clear);
        mutex_unlock(&speakup_tty_mutex);
 }
 
index 72016d0..8e7fffb 100644 (file)
@@ -852,6 +852,12 @@ static ssize_t pi_prot_type_store(struct config_item *item,
        return count;
 }
 
+/* always zero, but attr needs to remain RW to avoid userspace breakage */
+static ssize_t pi_prot_format_show(struct config_item *item, char *page)
+{
+       return snprintf(page, PAGE_SIZE, "0\n");
+}
+
 static ssize_t pi_prot_format_store(struct config_item *item,
                const char *page, size_t count)
 {
@@ -1132,7 +1138,7 @@ CONFIGFS_ATTR(, emulate_3pc);
 CONFIGFS_ATTR(, emulate_pr);
 CONFIGFS_ATTR(, pi_prot_type);
 CONFIGFS_ATTR_RO(, hw_pi_prot_type);
-CONFIGFS_ATTR_WO(, pi_prot_format);
+CONFIGFS_ATTR(, pi_prot_format);
 CONFIGFS_ATTR(, pi_prot_verify);
 CONFIGFS_ATTR(, enforce_pr_isids);
 CONFIGFS_ATTR(, is_nonrot);
index dfd2324..6fff161 100644 (file)
@@ -774,7 +774,7 @@ of_cpufreq_cooling_register(struct cpufreq_policy *policy)
 
                cdev = __cpufreq_cooling_register(np, policy, capacitance);
                if (IS_ERR(cdev)) {
-                       pr_err("cpu_cooling: cpu%d is not running as cooling device: %ld\n",
+                       pr_err("cpu_cooling: cpu%d failed to register as cooling device: %ld\n",
                               policy->cpu, PTR_ERR(cdev));
                        cdev = NULL;
                }
index 4bfdb4a..2df059c 100644 (file)
@@ -867,14 +867,14 @@ __init *thermal_of_build_thermal_zone(struct device_node *np)
 
        ret = of_property_read_u32(np, "polling-delay-passive", &prop);
        if (ret < 0) {
-               pr_err("missing polling-delay-passive property\n");
+               pr_err("%pOFn: missing polling-delay-passive property\n", np);
                goto free_tz;
        }
        tz->passive_delay = prop;
 
        ret = of_property_read_u32(np, "polling-delay", &prop);
        if (ret < 0) {
-               pr_err("missing polling-delay property\n");
+               pr_err("%pOFn: missing polling-delay property\n", np);
                goto free_tz;
        }
        tz->polling_delay = prop;
index e2c4076..c1fdbc0 100644 (file)
@@ -357,6 +357,9 @@ static int mtk8250_probe_of(struct platform_device *pdev, struct uart_port *p,
        if (dmacnt == 2) {
                data->dma = devm_kzalloc(&pdev->dev, sizeof(*data->dma),
                                         GFP_KERNEL);
+               if (!data->dma)
+                       return -ENOMEM;
+
                data->dma->fn = mtk8250_dma_filter;
                data->dma->rx_size = MTK_UART_RX_SIZE;
                data->dma->rxconf.src_maxburst = MTK_UART_RX_TRIGGER;
index f80a300..48bd694 100644 (file)
@@ -3420,6 +3420,11 @@ static int
 serial_pci_guess_board(struct pci_dev *dev, struct pciserial_board *board)
 {
        int num_iomem, num_port, first_port = -1, i;
+       int rc;
+
+       rc = serial_pci_is_class_communication(dev);
+       if (rc)
+               return rc;
 
        /*
         * Should we try to make guesses for multiport serial devices later?
@@ -3647,10 +3652,6 @@ pciserial_init_one(struct pci_dev *dev, const struct pci_device_id *ent)
 
        board = &pci_boards[ent->driver_data];
 
-       rc = serial_pci_is_class_communication(dev);
-       if (rc)
-               return rc;
-
        rc = serial_pci_is_blacklisted(dev);
        if (rc)
                return rc;
index e1a551a..ce81523 100644 (file)
 #include <linux/serial_core.h>
 #include <asm/sbi.h>
 
-static void sbi_console_write(struct console *con,
-                             const char *s, unsigned int n)
+static void sbi_putc(struct uart_port *port, int c)
 {
-       int i;
+       sbi_console_putchar(c);
+}
 
-       for (i = 0; i < n; ++i)
-               sbi_console_putchar(s[i]);
+static void sbi_console_write(struct console *con,
+                             const char *s, unsigned n)
+{
+       struct earlycon_device *dev = con->data;
+       uart_console_write(&dev->port, s, n, sbi_putc);
 }
 
 static int __init early_sbi_setup(struct earlycon_device *device,
index 5c01bb6..556f50a 100644 (file)
@@ -130,6 +130,9 @@ static void uart_start(struct tty_struct *tty)
        struct uart_port *port;
        unsigned long flags;
 
+       if (!state)
+               return;
+
        port = uart_port_lock(state, flags);
        __uart_start(tty);
        uart_port_unlock(port, flags);
@@ -727,6 +730,9 @@ static void uart_unthrottle(struct tty_struct *tty)
        upstat_t mask = UPSTAT_SYNC_FIFO;
        struct uart_port *port;
 
+       if (!state)
+               return;
+
        port = uart_port_ref(state);
        if (!port)
                return;
index 8df0fd8..64bbeb7 100644 (file)
@@ -1921,7 +1921,7 @@ out_nomem:
 
 static void sci_free_irq(struct sci_port *port)
 {
-       int i;
+       int i, j;
 
        /*
         * Intentionally in reverse order so we iterate over the muxed
@@ -1937,6 +1937,13 @@ static void sci_free_irq(struct sci_port *port)
                if (unlikely(irq < 0))
                        continue;
 
+               /* Check if already freed (irq was muxed) */
+               for (j = 0; j < i; j++)
+                       if (port->irqs[j] == irq)
+                               j = i + 1;
+               if (j > i)
+                       continue;
+
                free_irq(port->irqs[i], port);
                kfree(port->irqstr[i]);
 
index cb7fcd7..c1e9ea6 100644 (file)
@@ -78,7 +78,7 @@ static int dwc3_exynos_probe(struct platform_device *pdev)
        for (i = 0; i < exynos->num_clks; i++) {
                ret = clk_prepare_enable(exynos->clks[i]);
                if (ret) {
-                       while (--i > 0)
+                       while (i-- > 0)
                                clk_disable_unprepare(exynos->clks[i]);
                        return ret;
                }
@@ -223,7 +223,7 @@ static int dwc3_exynos_resume(struct device *dev)
        for (i = 0; i < exynos->num_clks; i++) {
                ret = clk_prepare_enable(exynos->clks[i]);
                if (ret) {
-                       while (--i > 0)
+                       while (i-- > 0)
                                clk_disable_unprepare(exynos->clks[i]);
                        return ret;
                }
index bed2ff4..6c9b76b 100644 (file)
@@ -1119,7 +1119,7 @@ static void dwc3_prepare_one_trb_linear(struct dwc3_ep *dep,
        unsigned int maxp = usb_endpoint_maxp(dep->endpoint.desc);
        unsigned int rem = length % maxp;
 
-       if (rem && usb_endpoint_dir_out(dep->endpoint.desc)) {
+       if ((!length || rem) && usb_endpoint_dir_out(dep->endpoint.desc)) {
                struct dwc3     *dwc = dep->dwc;
                struct dwc3_trb *trb;
 
index 660878a..b77f312 100644 (file)
@@ -2083,7 +2083,7 @@ static irqreturn_t net2272_irq(int irq, void *_dev)
 #if defined(PLX_PCI_RDK2)
        /* see if PCI int for us by checking irqstat */
        intcsr = readl(dev->rdk2.fpga_base_addr + RDK2_IRQSTAT);
-       if (!intcsr & (1 << NET2272_PCI_IRQ)) {
+       if (!(intcsr & (1 << NET2272_PCI_IRQ))) {
                spin_unlock(&dev->lock);
                return IRQ_NONE;
        }
index eae8b1b..ffe462a 100644 (file)
@@ -452,13 +452,10 @@ void musb_g_tx(struct musb *musb, u8 epnum)
        }
 
        if (request) {
-               u8      is_dma = 0;
-               bool    short_packet = false;
 
                trace_musb_req_tx(req);
 
                if (dma && (csr & MUSB_TXCSR_DMAENAB)) {
-                       is_dma = 1;
                        csr |= MUSB_TXCSR_P_WZC_BITS;
                        csr &= ~(MUSB_TXCSR_DMAENAB | MUSB_TXCSR_P_UNDERRUN |
                                 MUSB_TXCSR_TXPKTRDY | MUSB_TXCSR_AUTOSET);
@@ -476,16 +473,8 @@ void musb_g_tx(struct musb *musb, u8 epnum)
                 */
                if ((request->zero && request->length)
                        && (request->length % musb_ep->packet_sz == 0)
-                       && (request->actual == request->length))
-                               short_packet = true;
+                       && (request->actual == request->length)) {
 
-               if ((musb_dma_inventra(musb) || musb_dma_ux500(musb)) &&
-                       (is_dma && (!dma->desired_mode ||
-                               (request->actual &
-                                       (musb_ep->packet_sz - 1)))))
-                               short_packet = true;
-
-               if (short_packet) {
                        /*
                         * On DMA completion, FIFO may not be
                         * available yet...
index a688f7f..5fc6825 100644 (file)
@@ -346,12 +346,10 @@ static irqreturn_t dma_controller_irq(int irq, void *private_data)
                                channel->status = MUSB_DMA_STATUS_FREE;
 
                                /* completed */
-                               if ((devctl & MUSB_DEVCTL_HM)
-                                       && (musb_channel->transmit)
-                                       && ((channel->desired_mode == 0)
-                                           || (channel->actual_len &
-                                           (musb_channel->max_packet_sz - 1)))
-                                   ) {
+                               if (musb_channel->transmit &&
+                                       (!channel->desired_mode ||
+                                       (channel->actual_len %
+                                           musb_channel->max_packet_sz))) {
                                        u8  epnum  = musb_channel->epnum;
                                        int offset = musb->io.ep_offset(epnum,
                                                                    MUSB_TXCSR);
@@ -363,11 +361,14 @@ static irqreturn_t dma_controller_irq(int irq, void *private_data)
                                         */
                                        musb_ep_select(mbase, epnum);
                                        txcsr = musb_readw(mbase, offset);
-                                       txcsr &= ~(MUSB_TXCSR_DMAENAB
+                                       if (channel->desired_mode == 1) {
+                                               txcsr &= ~(MUSB_TXCSR_DMAENAB
                                                        | MUSB_TXCSR_AUTOSET);
-                                       musb_writew(mbase, offset, txcsr);
-                                       /* Send out the packet */
-                                       txcsr &= ~MUSB_TXCSR_DMAMODE;
+                                               musb_writew(mbase, offset, txcsr);
+                                               /* Send out the packet */
+                                               txcsr &= ~MUSB_TXCSR_DMAMODE;
+                                               txcsr |= MUSB_TXCSR_DMAENAB;
+                                       }
                                        txcsr |=  MUSB_TXCSR_TXPKTRDY;
                                        musb_writew(mbase, offset, txcsr);
                                }
index d7312ee..91ea308 100644 (file)
@@ -21,7 +21,7 @@ config AB8500_USB
 
 config FSL_USB2_OTG
        bool "Freescale USB OTG Transceiver Driver"
-       depends on USB_EHCI_FSL && USB_FSL_USB2 && USB_OTG_FSM && PM
+       depends on USB_EHCI_FSL && USB_FSL_USB2 && USB_OTG_FSM=y && PM
        depends on USB_GADGET || !USB_GADGET # if USB_GADGET=m, this can't be 'y'
        select USB_PHY
        help
index 27bdb72..f5f0568 100644 (file)
@@ -61,9 +61,6 @@ static int am335x_phy_probe(struct platform_device *pdev)
        if (ret)
                return ret;
 
-       ret = usb_add_phy_dev(&am_phy->usb_phy_gen.phy);
-       if (ret)
-               return ret;
        am_phy->usb_phy_gen.phy.init = am335x_init;
        am_phy->usb_phy_gen.phy.shutdown = am335x_shutdown;
 
@@ -82,7 +79,7 @@ static int am335x_phy_probe(struct platform_device *pdev)
        device_set_wakeup_enable(dev, false);
        phy_ctrl_power(am_phy->phy_ctrl, am_phy->id, am_phy->dr_mode, false);
 
-       return 0;
+       return usb_add_phy_dev(&am_phy->usb_phy_gen.phy);
 }
 
 static int am335x_phy_remove(struct platform_device *pdev)
index 4bc29b5..f1c39a3 100644 (file)
@@ -2297,7 +2297,8 @@ static unsigned int tcpm_pd_select_pps_apdo(struct tcpm_port *port)
                                              pdo_pps_apdo_max_voltage(snk));
                port->pps_data.max_curr = min_pps_apdo_current(src, snk);
                port->pps_data.out_volt = min(port->pps_data.max_volt,
-                                             port->pps_data.out_volt);
+                                             max(port->pps_data.min_volt,
+                                                 port->pps_data.out_volt));
                port->pps_data.op_curr = min(port->pps_data.max_curr,
                                             port->pps_data.op_curr);
        }
index cd7e755..a0b07c3 100644 (file)
@@ -152,7 +152,12 @@ struct vring_virtqueue {
                /* Available for packed ring */
                struct {
                        /* Actual memory layout for this queue. */
-                       struct vring_packed vring;
+                       struct {
+                               unsigned int num;
+                               struct vring_packed_desc *desc;
+                               struct vring_packed_desc_event *driver;
+                               struct vring_packed_desc_event *device;
+                       } vring;
 
                        /* Driver ring wrap counter. */
                        bool avail_wrap_counter;
@@ -1609,6 +1614,9 @@ static struct virtqueue *vring_create_virtqueue_packed(
                !context;
        vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
 
+       if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
+               vq->weak_barriers = false;
+
        vq->packed.ring_dma_addr = ring_dma_addr;
        vq->packed.driver_event_dma_addr = driver_event_dma_addr;
        vq->packed.device_event_dma_addr = device_event_dma_addr;
@@ -2079,6 +2087,9 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
                !context;
        vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
 
+       if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
+               vq->weak_barriers = false;
+
        vq->split.queue_dma_addr = 0;
        vq->split.queue_size_in_bytes = 0;
 
@@ -2213,6 +2224,8 @@ void vring_transport_features(struct virtio_device *vdev)
                        break;
                case VIRTIO_F_RING_PACKED:
                        break;
+               case VIRTIO_F_ORDER_PLATFORM:
+                       break;
                default:
                        /* We don't understand this bit. */
                        __virtio_clear_bit(vdev, i);
index b906ff7..aaaaf4d 100644 (file)
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1436,6 +1436,7 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
        if (unlikely(!req->ki_filp))
                return -EBADF;
        req->ki_complete = aio_complete_rw;
+       req->private = NULL;
        req->ki_pos = iocb->aio_offset;
        req->ki_flags = iocb_flags(req->ki_filp);
        if (iocb->aio_flags & IOCB_FLAG_RESFD)
index d441244..28d9c2b 100644 (file)
@@ -596,7 +596,6 @@ int autofs_expire_run(struct super_block *sb,
        pkt.len = dentry->d_name.len;
        memcpy(pkt.name, dentry->d_name.name, pkt.len);
        pkt.name[pkt.len] = '\0';
-       dput(dentry);
 
        if (copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire)))
                ret = -EFAULT;
@@ -609,6 +608,8 @@ int autofs_expire_run(struct super_block *sb,
        complete_all(&ino->expire_complete);
        spin_unlock(&sbi->fs_lock);
 
+       dput(dentry);
+
        return ret;
 }
 
index 0e8ea2d..078992e 100644 (file)
@@ -266,8 +266,10 @@ int autofs_fill_super(struct super_block *s, void *data, int silent)
        }
        root_inode = autofs_get_inode(s, S_IFDIR | 0755);
        root = d_make_root(root_inode);
-       if (!root)
+       if (!root) {
+               ret = -ENOMEM;
                goto fail_ino;
+       }
        pipe = NULL;
 
        root->d_fsdata = ino;
index d0078cb..7cde3f4 100644 (file)
@@ -42,14 +42,10 @@ static int load_script(struct linux_binprm *bprm)
        fput(bprm->file);
        bprm->file = NULL;
 
-       for (cp = bprm->buf+2;; cp++) {
-               if (cp >= bprm->buf + BINPRM_BUF_SIZE)
-                       return -ENOEXEC;
-               if (!*cp || (*cp == '\n'))
-                       break;
-       }
+       bprm->buf[BINPRM_BUF_SIZE - 1] = '\0';
+       if ((cp = strchr(bprm->buf, '\n')) == NULL)
+               cp = bprm->buf+BINPRM_BUF_SIZE-1;
        *cp = '\0';
-
        while (cp > bprm->buf) {
                cp--;
                if ((*cp == ' ') || (*cp == '\t'))
index f64aad6..5a6c39b 100644 (file)
@@ -968,6 +968,48 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
        return 0;
 }
 
+static struct extent_buffer *alloc_tree_block_no_bg_flush(
+                                         struct btrfs_trans_handle *trans,
+                                         struct btrfs_root *root,
+                                         u64 parent_start,
+                                         const struct btrfs_disk_key *disk_key,
+                                         int level,
+                                         u64 hint,
+                                         u64 empty_size)
+{
+       struct btrfs_fs_info *fs_info = root->fs_info;
+       struct extent_buffer *ret;
+
+       /*
+        * If we are COWing a node/leaf from the extent, chunk, device or free
+        * space trees, make sure that we do not finish block group creation of
+        * pending block groups. We do this to avoid a deadlock.
+        * COWing can result in allocation of a new chunk, and flushing pending
+        * block groups (btrfs_create_pending_block_groups()) can be triggered
+        * when finishing allocation of a new chunk. Creation of a pending block
+        * group modifies the extent, chunk, device and free space trees,
+        * therefore we could deadlock with ourselves since we are holding a
+        * lock on an extent buffer that btrfs_create_pending_block_groups() may
+        * try to COW later.
+        * For similar reasons, we also need to delay flushing pending block
+        * groups when splitting a leaf or node, from one of those trees, since
+        * we are holding a write lock on it and its parent or when inserting a
+        * new root node for one of those trees.
+        */
+       if (root == fs_info->extent_root ||
+           root == fs_info->chunk_root ||
+           root == fs_info->dev_root ||
+           root == fs_info->free_space_root)
+               trans->can_flush_pending_bgs = false;
+
+       ret = btrfs_alloc_tree_block(trans, root, parent_start,
+                                    root->root_key.objectid, disk_key, level,
+                                    hint, empty_size);
+       trans->can_flush_pending_bgs = true;
+
+       return ret;
+}
+
 /*
  * does the dirty work in cow of a single block.  The parent block (if
  * supplied) is updated to point to the new cow copy.  The new buffer is marked
@@ -1015,28 +1057,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
        if ((root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && parent)
                parent_start = parent->start;
 
-       /*
-        * If we are COWing a node/leaf from the extent, chunk, device or free
-        * space trees, make sure that we do not finish block group creation of
-        * pending block groups. We do this to avoid a deadlock.
-        * COWing can result in allocation of a new chunk, and flushing pending
-        * block groups (btrfs_create_pending_block_groups()) can be triggered
-        * when finishing allocation of a new chunk. Creation of a pending block
-        * group modifies the extent, chunk, device and free space trees,
-        * therefore we could deadlock with ourselves since we are holding a
-        * lock on an extent buffer that btrfs_create_pending_block_groups() may
-        * try to COW later.
-        */
-       if (root == fs_info->extent_root ||
-           root == fs_info->chunk_root ||
-           root == fs_info->dev_root ||
-           root == fs_info->free_space_root)
-               trans->can_flush_pending_bgs = false;
-
-       cow = btrfs_alloc_tree_block(trans, root, parent_start,
-                       root->root_key.objectid, &disk_key, level,
-                       search_start, empty_size);
-       trans->can_flush_pending_bgs = true;
+       cow = alloc_tree_block_no_bg_flush(trans, root, parent_start, &disk_key,
+                                          level, search_start, empty_size);
        if (IS_ERR(cow))
                return PTR_ERR(cow);
 
@@ -3345,8 +3367,8 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
        else
                btrfs_node_key(lower, &lower_key, 0);
 
-       c = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid,
-                                  &lower_key, level, root->node->start, 0);
+       c = alloc_tree_block_no_bg_flush(trans, root, 0, &lower_key, level,
+                                        root->node->start, 0);
        if (IS_ERR(c))
                return PTR_ERR(c);
 
@@ -3475,8 +3497,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
        mid = (c_nritems + 1) / 2;
        btrfs_node_key(c, &disk_key, mid);
 
-       split = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid,
-                       &disk_key, level, c->start, 0);
+       split = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, level,
+                                            c->start, 0);
        if (IS_ERR(split))
                return PTR_ERR(split);
 
@@ -4260,8 +4282,8 @@ again:
        else
                btrfs_item_key(l, &disk_key, mid);
 
-       right = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid,
-                       &disk_key, 0, l->start, 0);
+       right = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, 0,
+                                            l->start, 0);
        if (IS_ERR(right))
                return PTR_ERR(right);
 
index c5586ff..0a3f122 100644 (file)
@@ -1621,6 +1621,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
                                flags | SB_RDONLY, device_name, data);
                        if (IS_ERR(mnt_root)) {
                                root = ERR_CAST(mnt_root);
+                               kfree(subvol_name);
                                goto out;
                        }
 
@@ -1630,12 +1631,14 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
                        if (error < 0) {
                                root = ERR_PTR(error);
                                mntput(mnt_root);
+                               kfree(subvol_name);
                                goto out;
                        }
                }
        }
        if (IS_ERR(mnt_root)) {
                root = ERR_CAST(mnt_root);
+               kfree(subvol_name);
                goto out;
        }
 
index 127fa15..4ec2b66 100644 (file)
@@ -850,14 +850,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
 
        btrfs_trans_release_chunk_metadata(trans);
 
-       if (lock && should_end_transaction(trans) &&
-           READ_ONCE(cur_trans->state) == TRANS_STATE_RUNNING) {
-               spin_lock(&info->trans_lock);
-               if (cur_trans->state == TRANS_STATE_RUNNING)
-                       cur_trans->state = TRANS_STATE_BLOCKED;
-               spin_unlock(&info->trans_lock);
-       }
-
        if (lock && READ_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) {
                if (throttle)
                        return btrfs_commit_transaction(trans);
@@ -1879,6 +1871,21 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, int err)
        kmem_cache_free(btrfs_trans_handle_cachep, trans);
 }
 
+/*
+ * Release reserved delayed ref space of all pending block groups of the
+ * transaction and remove them from the list
+ */
+static void btrfs_cleanup_pending_block_groups(struct btrfs_trans_handle *trans)
+{
+       struct btrfs_fs_info *fs_info = trans->fs_info;
+       struct btrfs_block_group_cache *block_group, *tmp;
+
+       list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) {
+               btrfs_delayed_refs_rsv_release(fs_info, 1);
+               list_del_init(&block_group->bg_list);
+       }
+}
+
 static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
 {
        /*
@@ -2270,6 +2277,7 @@ scrub_continue:
        btrfs_scrub_continue(fs_info);
 cleanup_transaction:
        btrfs_trans_release_metadata(trans);
+       btrfs_cleanup_pending_block_groups(trans);
        btrfs_trans_release_chunk_metadata(trans);
        trans->block_rsv = NULL;
        btrfs_warn(fs_info, "Skipping commit of aborted transaction.");
index 3e4f8f8..1556192 100644 (file)
@@ -957,11 +957,11 @@ static noinline struct btrfs_device *device_list_add(const char *path,
                else
                        fs_devices = alloc_fs_devices(disk_super->fsid, NULL);
 
-               fs_devices->fsid_change = fsid_change_in_progress;
-
                if (IS_ERR(fs_devices))
                        return ERR_CAST(fs_devices);
 
+               fs_devices->fsid_change = fsid_change_in_progress;
+
                mutex_lock(&fs_devices->device_list_mutex);
                list_add(&fs_devices->fs_list, &fs_uuids);
 
index 52d024b..48318fb 100644 (file)
@@ -200,6 +200,7 @@ __find_get_block_slow(struct block_device *bdev, sector_t block)
        struct buffer_head *head;
        struct page *page;
        int all_mapped = 1;
+       static DEFINE_RATELIMIT_STATE(last_warned, HZ, 1);
 
        index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
        page = find_get_page_flags(bd_mapping, index, FGP_ACCESSED);
@@ -227,15 +228,15 @@ __find_get_block_slow(struct block_device *bdev, sector_t block)
         * file io on the block device and getblk.  It gets dealt with
         * elsewhere, don't buffer_error if we had some unmapped buffers
         */
-       if (all_mapped) {
-               printk("__find_get_block_slow() failed. "
-                       "block=%llu, b_blocknr=%llu\n",
-                       (unsigned long long)block,
-                       (unsigned long long)bh->b_blocknr);
-               printk("b_state=0x%08lx, b_size=%zu\n",
-                       bh->b_state, bh->b_size);
-               printk("device %pg blocksize: %d\n", bdev,
-                       1 << bd_inode->i_blkbits);
+       ratelimit_set_flags(&last_warned, RATELIMIT_MSG_ON_RELEASE);
+       if (all_mapped && __ratelimit(&last_warned)) {
+               printk("__find_get_block_slow() failed. block=%llu, "
+                      "b_blocknr=%llu, b_state=0x%08lx, b_size=%zu, "
+                      "device %pg blocksize: %d\n",
+                      (unsigned long long)block,
+                      (unsigned long long)bh->b_blocknr,
+                      bh->b_state, bh->b_size, bdev,
+                      1 << bd_inode->i_blkbits);
        }
 out_unlock:
        spin_unlock(&bd_mapping->private_lock);
index d1f9c2f..7652551 100644 (file)
@@ -150,5 +150,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
 extern const struct export_operations cifs_export_ops;
 #endif /* CONFIG_CIFS_NFSD_EXPORT */
 
-#define CIFS_VERSION   "2.16"
+#define CIFS_VERSION   "2.17"
 #endif                         /* _CIFSFS_H */
index 2c7689f..659ce1b 100644 (file)
@@ -2696,6 +2696,7 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
 
                        rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
                        if (rc) {
+                               kvfree(wdata->pages);
                                kfree(wdata);
                                add_credits_and_wake_if(server, credits, 0);
                                break;
@@ -2707,6 +2708,7 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
                        if (rc) {
                                for (i = 0; i < nr_pages; i++)
                                        put_page(wdata->pages[i]);
+                               kvfree(wdata->pages);
                                kfree(wdata);
                                add_credits_and_wake_if(server, credits, 0);
                                break;
@@ -3386,8 +3388,12 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
                        }
 
                        rc = cifs_read_allocate_pages(rdata, npages);
-                       if (rc)
-                               goto error;
+                       if (rc) {
+                               kvfree(rdata->pages);
+                               kfree(rdata);
+                               add_credits_and_wake_if(server, credits, 0);
+                               break;
+                       }
 
                        rdata->tailsz = PAGE_SIZE;
                }
@@ -3407,7 +3413,6 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
                if (!rdata->cfile->invalidHandle ||
                    !(rc = cifs_reopen_file(rdata->cfile, true)))
                        rc = server->ops->async_readv(rdata);
-error:
                if (rc) {
                        add_credits_and_wake_if(server, rdata->credits, 0);
                        kref_put(&rdata->refcount,
index 153238f..6f96e22 100644 (file)
@@ -866,7 +866,9 @@ smb2_query_eas(const unsigned int xid, struct cifs_tcon *tcon,
                                      FILE_READ_EA,
                                      FILE_FULL_EA_INFORMATION,
                                      SMB2_O_INFO_FILE,
-                                     SMB2_MAX_EA_BUF,
+                                     CIFSMaxBufSize -
+                                     MAX_SMB2_CREATE_RESPONSE_SIZE -
+                                     MAX_SMB2_CLOSE_RESPONSE_SIZE,
                                      &rsp_iov, &buftype, cifs_sb);
        if (rc) {
                /*
index 2ff209e..77b3aaa 100644 (file)
@@ -3241,8 +3241,17 @@ smb2_readv_callback(struct mid_q_entry *mid)
                rdata->mr = NULL;
        }
 #endif
-       if (rdata->result)
+       if (rdata->result && rdata->result != -ENODATA) {
                cifs_stats_fail_inc(tcon, SMB2_READ_HE);
+               trace_smb3_read_err(0 /* xid */,
+                                   rdata->cfile->fid.persistent_fid,
+                                   tcon->tid, tcon->ses->Suid, rdata->offset,
+                                   rdata->bytes, rdata->result);
+       } else
+               trace_smb3_read_done(0 /* xid */,
+                                    rdata->cfile->fid.persistent_fid,
+                                    tcon->tid, tcon->ses->Suid,
+                                    rdata->offset, rdata->got_bytes);
 
        queue_work(cifsiod_wq, &rdata->work);
        DeleteMidQEntry(mid);
@@ -3317,13 +3326,11 @@ smb2_async_readv(struct cifs_readdata *rdata)
        if (rc) {
                kref_put(&rdata->refcount, cifs_readdata_release);
                cifs_stats_fail_inc(io_parms.tcon, SMB2_READ_HE);
-               trace_smb3_read_err(rc, 0 /* xid */, io_parms.persistent_fid,
-                                  io_parms.tcon->tid, io_parms.tcon->ses->Suid,
-                                  io_parms.offset, io_parms.length);
-       } else
-               trace_smb3_read_done(0 /* xid */, io_parms.persistent_fid,
-                                  io_parms.tcon->tid, io_parms.tcon->ses->Suid,
-                                  io_parms.offset, io_parms.length);
+               trace_smb3_read_err(0 /* xid */, io_parms.persistent_fid,
+                                   io_parms.tcon->tid,
+                                   io_parms.tcon->ses->Suid,
+                                   io_parms.offset, io_parms.length, rc);
+       }
 
        cifs_small_buf_release(buf);
        return rc;
@@ -3367,10 +3374,11 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms,
                if (rc != -ENODATA) {
                        cifs_stats_fail_inc(io_parms->tcon, SMB2_READ_HE);
                        cifs_dbg(VFS, "Send error in read = %d\n", rc);
+                       trace_smb3_read_err(xid, req->PersistentFileId,
+                                           io_parms->tcon->tid, ses->Suid,
+                                           io_parms->offset, io_parms->length,
+                                           rc);
                }
-               trace_smb3_read_err(rc, xid, req->PersistentFileId,
-                                   io_parms->tcon->tid, ses->Suid,
-                                   io_parms->offset, io_parms->length);
                free_rsp_buf(resp_buftype, rsp_iov.iov_base);
                return rc == -ENODATA ? 0 : rc;
        } else
@@ -3459,8 +3467,17 @@ smb2_writev_callback(struct mid_q_entry *mid)
                wdata->mr = NULL;
        }
 #endif
-       if (wdata->result)
+       if (wdata->result) {
                cifs_stats_fail_inc(tcon, SMB2_WRITE_HE);
+               trace_smb3_write_err(0 /* no xid */,
+                                    wdata->cfile->fid.persistent_fid,
+                                    tcon->tid, tcon->ses->Suid, wdata->offset,
+                                    wdata->bytes, wdata->result);
+       } else
+               trace_smb3_write_done(0 /* no xid */,
+                                     wdata->cfile->fid.persistent_fid,
+                                     tcon->tid, tcon->ses->Suid,
+                                     wdata->offset, wdata->bytes);
 
        queue_work(cifsiod_wq, &wdata->work);
        DeleteMidQEntry(mid);
@@ -3602,10 +3619,7 @@ smb2_async_writev(struct cifs_writedata *wdata,
                                     wdata->bytes, rc);
                kref_put(&wdata->refcount, release);
                cifs_stats_fail_inc(tcon, SMB2_WRITE_HE);
-       } else
-               trace_smb3_write_done(0 /* no xid */, req->PersistentFileId,
-                                    tcon->tid, tcon->ses->Suid, wdata->offset,
-                                    wdata->bytes);
+       }
 
 async_writev_out:
        cifs_small_buf_release(req);
@@ -3831,8 +3845,8 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
                    rsp->sync_hdr.Status == STATUS_NO_MORE_FILES) {
                        srch_inf->endOfSearch = true;
                        rc = 0;
-               }
-               cifs_stats_fail_inc(tcon, SMB2_QUERY_DIRECTORY_HE);
+               } else
+                       cifs_stats_fail_inc(tcon, SMB2_QUERY_DIRECTORY_HE);
                goto qdir_exit;
        }
 
@@ -4427,8 +4441,8 @@ SMB2_lease_break(const unsigned int xid, struct cifs_tcon *tcon,
        rc = cifs_send_recv(xid, ses, &rqst, &resp_buf_type, flags, &rsp_iov);
        cifs_small_buf_release(req);
 
-       please_key_low = (__u64 *)req->LeaseKey;
-       please_key_high = (__u64 *)(req->LeaseKey+8);
+       please_key_low = (__u64 *)lease_key;
+       please_key_high = (__u64 *)(lease_key+8);
        if (rc) {
                cifs_stats_fail_inc(tcon, SMB2_OPLOCK_BREAK_HE);
                trace_smb3_lease_err(le32_to_cpu(lease_state), tcon->tid,
index 7a2d0a2..538e229 100644 (file)
@@ -84,8 +84,9 @@
 
 #define NUMBER_OF_SMB2_COMMANDS        0x0013
 
-/* 4 len + 52 transform hdr + 64 hdr + 56 create rsp */
-#define MAX_SMB2_HDR_SIZE 0x00b0
+/* 52 transform hdr + 64 hdr + 88 create rsp */
+#define SMB2_TRANSFORM_HEADER_SIZE 52
+#define MAX_SMB2_HDR_SIZE 204
 
 #define SMB2_PROTO_NUMBER cpu_to_le32(0x424d53fe)
 #define SMB2_TRANSFORM_PROTO_NUM cpu_to_le32(0x424d53fd)
@@ -648,6 +649,13 @@ struct smb2_create_req {
        __u8   Buffer[0];
 } __packed;
 
+/*
+ * Maximum size of a SMB2_CREATE response is 64 (smb2 header) +
+ * 88 (fixed part of create response) + 520 (path) + 150 (contexts) +
+ * 2 bytes of padding.
+ */
+#define MAX_SMB2_CREATE_RESPONSE_SIZE 824
+
 struct smb2_create_rsp {
        struct smb2_sync_hdr sync_hdr;
        __le16 StructureSize;   /* Must be 89 */
@@ -996,6 +1004,11 @@ struct smb2_close_req {
        __u64  VolatileFileId; /* opaque endianness */
 } __packed;
 
+/*
+ * Maximum size of a SMB2_CLOSE response is 64 (smb2 header) + 60 (data)
+ */
+#define MAX_SMB2_CLOSE_RESPONSE_SIZE 124
+
 struct smb2_close_rsp {
        struct smb2_sync_hdr sync_hdr;
        __le16 StructureSize; /* 60 */
@@ -1398,8 +1411,6 @@ struct smb2_file_link_info { /* encoding of request for level 11 */
        char   FileName[0];     /* Name to be assigned to new link */
 } __packed; /* level 11 Set */
 
-#define SMB2_MAX_EA_BUF 65536
-
 struct smb2_file_full_ea_info { /* encoding of response for level 15 */
        __le32 next_entry_offset;
        __u8   flags;
index 2593153..aac41ad 100644 (file)
@@ -119,6 +119,7 @@ struct dentry_stat_t dentry_stat = {
 
 static DEFINE_PER_CPU(long, nr_dentry);
 static DEFINE_PER_CPU(long, nr_dentry_unused);
+static DEFINE_PER_CPU(long, nr_dentry_negative);
 
 #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
 
@@ -152,11 +153,22 @@ static long get_nr_dentry_unused(void)
        return sum < 0 ? 0 : sum;
 }
 
+static long get_nr_dentry_negative(void)
+{
+       int i;
+       long sum = 0;
+
+       for_each_possible_cpu(i)
+               sum += per_cpu(nr_dentry_negative, i);
+       return sum < 0 ? 0 : sum;
+}
+
 int proc_nr_dentry(struct ctl_table *table, int write, void __user *buffer,
                   size_t *lenp, loff_t *ppos)
 {
        dentry_stat.nr_dentry = get_nr_dentry();
        dentry_stat.nr_unused = get_nr_dentry_unused();
+       dentry_stat.nr_negative = get_nr_dentry_negative();
        return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
 }
 #endif
@@ -317,6 +329,8 @@ static inline void __d_clear_type_and_inode(struct dentry *dentry)
        flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU);
        WRITE_ONCE(dentry->d_flags, flags);
        dentry->d_inode = NULL;
+       if (dentry->d_flags & DCACHE_LRU_LIST)
+               this_cpu_inc(nr_dentry_negative);
 }
 
 static void dentry_free(struct dentry *dentry)
@@ -371,6 +385,11 @@ static void dentry_unlink_inode(struct dentry * dentry)
  * The per-cpu "nr_dentry_unused" counters are updated with
  * the DCACHE_LRU_LIST bit.
  *
+ * The per-cpu "nr_dentry_negative" counters are only updated
+ * when deleted from or added to the per-superblock LRU list, not
+ * from/to the shrink list. That is to avoid an unneeded dec/inc
+ * pair when moving from LRU to shrink list in select_collect().
+ *
  * These helper functions make sure we always follow the
  * rules. d_lock must be held by the caller.
  */
@@ -380,6 +399,8 @@ static void d_lru_add(struct dentry *dentry)
        D_FLAG_VERIFY(dentry, 0);
        dentry->d_flags |= DCACHE_LRU_LIST;
        this_cpu_inc(nr_dentry_unused);
+       if (d_is_negative(dentry))
+               this_cpu_inc(nr_dentry_negative);
        WARN_ON_ONCE(!list_lru_add(&dentry->d_sb->s_dentry_lru, &dentry->d_lru));
 }
 
@@ -388,6 +409,8 @@ static void d_lru_del(struct dentry *dentry)
        D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
        dentry->d_flags &= ~DCACHE_LRU_LIST;
        this_cpu_dec(nr_dentry_unused);
+       if (d_is_negative(dentry))
+               this_cpu_dec(nr_dentry_negative);
        WARN_ON_ONCE(!list_lru_del(&dentry->d_sb->s_dentry_lru, &dentry->d_lru));
 }
 
@@ -418,6 +441,8 @@ static void d_lru_isolate(struct list_lru_one *lru, struct dentry *dentry)
        D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
        dentry->d_flags &= ~DCACHE_LRU_LIST;
        this_cpu_dec(nr_dentry_unused);
+       if (d_is_negative(dentry))
+               this_cpu_dec(nr_dentry_negative);
        list_lru_isolate(lru, &dentry->d_lru);
 }
 
@@ -426,6 +451,8 @@ static void d_lru_shrink_move(struct list_lru_one *lru, struct dentry *dentry,
 {
        D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
        dentry->d_flags |= DCACHE_SHRINK_LIST;
+       if (d_is_negative(dentry))
+               this_cpu_dec(nr_dentry_negative);
        list_lru_isolate_move(lru, &dentry->d_lru, list);
 }
 
@@ -1188,15 +1215,11 @@ static enum lru_status dentry_lru_isolate_shrink(struct list_head *item,
  */
 void shrink_dcache_sb(struct super_block *sb)
 {
-       long freed;
-
        do {
                LIST_HEAD(dispose);
 
-               freed = list_lru_walk(&sb->s_dentry_lru,
+               list_lru_walk(&sb->s_dentry_lru,
                        dentry_lru_isolate_shrink, &dispose, 1024);
-
-               this_cpu_sub(nr_dentry_unused, freed);
                shrink_dentry_list(&dispose);
        } while (list_lru_count(&sb->s_dentry_lru) > 0);
 }
@@ -1820,6 +1843,11 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
        WARN_ON(d_in_lookup(dentry));
 
        spin_lock(&dentry->d_lock);
+       /*
+        * Decrement negative dentry count if it was in the LRU list.
+        */
+       if (dentry->d_flags & DCACHE_LRU_LIST)
+               this_cpu_dec(nr_dentry_negative);
        hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
        raw_write_seqcount_begin(&dentry->d_seq);
        __d_set_inode_and_type(dentry, inode, add_flags);
index 13b0135..29c68c5 100644 (file)
@@ -324,7 +324,7 @@ static struct dentry *failed_creating(struct dentry *dentry)
        inode_unlock(d_inode(dentry->d_parent));
        dput(dentry);
        simple_release_fs(&debugfs_mount, &debugfs_mount_count);
-       return NULL;
+       return ERR_PTR(-ENOMEM);
 }
 
 static struct dentry *end_creating(struct dentry *dentry)
@@ -347,7 +347,7 @@ static struct dentry *__debugfs_create_file(const char *name, umode_t mode,
        dentry = start_creating(name, parent);
 
        if (IS_ERR(dentry))
-               return NULL;
+               return dentry;
 
        inode = debugfs_get_inode(dentry->d_sb);
        if (unlikely(!inode))
@@ -386,7 +386,8 @@ static struct dentry *__debugfs_create_file(const char *name, umode_t mode,
  * This function will return a pointer to a dentry if it succeeds.  This
  * pointer must be passed to the debugfs_remove() function when the file is
  * to be removed (no automatic cleanup happens if your module is unloaded,
- * you are responsible here.)  If an error occurs, %NULL will be returned.
+ * you are responsible here.)  If an error occurs, %ERR_PTR(-ERROR) will be
+ * returned.
  *
  * If debugfs is not enabled in the kernel, the value -%ENODEV will be
  * returned.
@@ -464,7 +465,8 @@ EXPORT_SYMBOL_GPL(debugfs_create_file_unsafe);
  * This function will return a pointer to a dentry if it succeeds.  This
  * pointer must be passed to the debugfs_remove() function when the file is
  * to be removed (no automatic cleanup happens if your module is unloaded,
- * you are responsible here.)  If an error occurs, %NULL will be returned.
+ * you are responsible here.)  If an error occurs, %ERR_PTR(-ERROR) will be
+ * returned.
  *
  * If debugfs is not enabled in the kernel, the value -%ENODEV will be
  * returned.
@@ -495,7 +497,8 @@ EXPORT_SYMBOL_GPL(debugfs_create_file_size);
  * This function will return a pointer to a dentry if it succeeds.  This
  * pointer must be passed to the debugfs_remove() function when the file is
  * to be removed (no automatic cleanup happens if your module is unloaded,
- * you are responsible here.)  If an error occurs, %NULL will be returned.
+ * you are responsible here.)  If an error occurs, %ERR_PTR(-ERROR) will be
+ * returned.
  *
  * If debugfs is not enabled in the kernel, the value -%ENODEV will be
  * returned.
@@ -506,7 +509,7 @@ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent)
        struct inode *inode;
 
        if (IS_ERR(dentry))
-               return NULL;
+               return dentry;
 
        inode = debugfs_get_inode(dentry->d_sb);
        if (unlikely(!inode))
@@ -545,7 +548,7 @@ struct dentry *debugfs_create_automount(const char *name,
        struct inode *inode;
 
        if (IS_ERR(dentry))
-               return NULL;
+               return dentry;
 
        inode = debugfs_get_inode(dentry->d_sb);
        if (unlikely(!inode))
@@ -581,8 +584,8 @@ EXPORT_SYMBOL(debugfs_create_automount);
  * This function will return a pointer to a dentry if it succeeds.  This
  * pointer must be passed to the debugfs_remove() function when the symbolic
  * link is to be removed (no automatic cleanup happens if your module is
- * unloaded, you are responsible here.)  If an error occurs, %NULL will be
- * returned.
+ * unloaded, you are responsible here.)  If an error occurs, %ERR_PTR(-ERROR)
+ * will be returned.
  *
  * If debugfs is not enabled in the kernel, the value -%ENODEV will be
  * returned.
@@ -594,12 +597,12 @@ struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent,
        struct inode *inode;
        char *link = kstrdup(target, GFP_KERNEL);
        if (!link)
-               return NULL;
+               return ERR_PTR(-ENOMEM);
 
        dentry = start_creating(name, parent);
        if (IS_ERR(dentry)) {
                kfree(link);
-               return NULL;
+               return dentry;
        }
 
        inode = debugfs_get_inode(dentry->d_sb);
@@ -787,6 +790,13 @@ struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry,
        struct dentry *dentry = NULL, *trap;
        struct name_snapshot old_name;
 
+       if (IS_ERR(old_dir))
+               return old_dir;
+       if (IS_ERR(new_dir))
+               return new_dir;
+       if (IS_ERR_OR_NULL(old_dentry))
+               return old_dentry;
+
        trap = lock_rename(new_dir, old_dir);
        /* Source or destination directories don't exist? */
        if (d_really_is_negative(old_dir) || d_really_is_negative(new_dir))
@@ -820,7 +830,9 @@ exit:
        if (dentry && !IS_ERR(dentry))
                dput(dentry);
        unlock_rename(new_dir, old_dir);
-       return NULL;
+       if (IS_ERR(dentry))
+               return dentry;
+       return ERR_PTR(-EINVAL);
 }
 EXPORT_SYMBOL_GPL(debugfs_rename);
 
index 8237701..d31b6c7 100644 (file)
@@ -21,8 +21,13 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
        spin_lock(&sb->s_inode_list_lock);
        list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
                spin_lock(&inode->i_lock);
+               /*
+                * We must skip inodes in unusual state. We may also skip
+                * inodes without pages but we deliberately won't in case
+                * we need to reschedule to avoid softlockups.
+                */
                if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
-                   (inode->i_mapping->nrpages == 0)) {
+                   (inode->i_mapping->nrpages == 0 && !need_resched())) {
                        spin_unlock(&inode->i_lock);
                        continue;
                }
@@ -30,6 +35,7 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
                spin_unlock(&inode->i_lock);
                spin_unlock(&sb->s_inode_list_lock);
 
+               cond_resched();
                invalidate_mapping_pages(inode->i_mapping, 0, -1);
                iput(toput_inode);
                toput_inode = inode;
index 712f009..5508baa 100644 (file)
@@ -116,16 +116,8 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
                goto out;
        }
 
-       ret = file_write_and_wait_range(file, start, end);
-       if (ret)
-               return ret;
-
        if (!journal) {
-               struct writeback_control wbc = {
-                       .sync_mode = WB_SYNC_ALL
-               };
-
-               ret = ext4_write_inode(inode, &wbc);
+               ret = __generic_file_fsync(file, start, end, datasync);
                if (!ret)
                        ret = ext4_sync_parent(inode);
                if (test_opt(inode->i_sb, BARRIER))
@@ -133,6 +125,9 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
                goto out;
        }
 
+       ret = file_write_and_wait_range(file, start, end);
+       if (ret)
+               return ret;
        /*
         * data=writeback,ordered:
         *  The caller's filemap_fdatawrite()/wait will sync the data.
index a5e516a..809c0f2 100644 (file)
@@ -1742,7 +1742,6 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
        req->in.h.nodeid = outarg->nodeid;
        req->in.numargs = 2;
        req->in.argpages = 1;
-       req->page_descs[0].offset = offset;
        req->end = fuse_retrieve_end;
 
        index = outarg->offset >> PAGE_SHIFT;
@@ -1757,6 +1756,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
 
                this_num = min_t(unsigned, num, PAGE_SIZE - offset);
                req->pages[req->num_pages] = page;
+               req->page_descs[req->num_pages].offset = offset;
                req->page_descs[req->num_pages].length = this_num;
                req->num_pages++;
 
@@ -2077,8 +2077,10 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
 
        ret = fuse_dev_do_write(fud, &cs, len);
 
+       pipe_lock(pipe);
        for (idx = 0; idx < nbuf; idx++)
                pipe_buf_release(pipe, &bufs[idx]);
+       pipe_unlock(pipe);
 
 out:
        kvfree(bufs);
index ffaffe1..a59c16b 100644 (file)
@@ -1782,7 +1782,7 @@ static bool fuse_writepage_in_flight(struct fuse_req *new_req,
                spin_unlock(&fc->lock);
 
                dec_wb_stat(&bdi->wb, WB_WRITEBACK);
-               dec_node_page_state(page, NR_WRITEBACK_TEMP);
+               dec_node_page_state(new_req->pages[0], NR_WRITEBACK_TEMP);
                wb_writeout_inc(&bdi->wb);
                fuse_writepage_free(fc, new_req);
                fuse_request_free(new_req);
index 76baaa6..c2d4099 100644 (file)
@@ -628,6 +628,7 @@ void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns)
        get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
        fc->pid_ns = get_pid_ns(task_active_pid_ns(current));
        fc->user_ns = get_user_ns(user_ns);
+       fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ;
 }
 EXPORT_SYMBOL_GPL(fuse_conn_init);
 
@@ -1162,7 +1163,6 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
        fc->user_id = d.user_id;
        fc->group_id = d.group_id;
        fc->max_read = max_t(unsigned, 4096, d.max_read);
-       fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ;
 
        /* Used by get_root_inode() */
        sb->s_fs_info = fc;
index f15b4c5..78510ab 100644 (file)
@@ -28,7 +28,6 @@
 #include "util.h"
 #include "trans.h"
 #include "dir.h"
-#include "lops.h"
 
 struct workqueue_struct *gfs2_freeze_wq;
 
index 5bfaf38..b8830fd 100644 (file)
@@ -733,7 +733,7 @@ void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
        lh->lh_crc = cpu_to_be32(crc);
 
        gfs2_log_write(sdp, page, sb->s_blocksize, 0, addr);
-       gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE | op_flags);
+       gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE, op_flags);
        log_flush_wait(sdp);
 }
 
@@ -810,7 +810,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
 
        gfs2_ordered_write(sdp);
        lops_before_commit(sdp, tr);
-       gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE);
+       gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE, 0);
 
        if (sdp->sd_log_head != sdp->sd_log_flush_head) {
                log_flush_wait(sdp);
index 94dcab6..2295042 100644 (file)
@@ -17,9 +17,7 @@
 #include <linux/bio.h>
 #include <linux/fs.h>
 #include <linux/list_sort.h>
-#include <linux/blkdev.h>
 
-#include "bmap.h"
 #include "dir.h"
 #include "gfs2.h"
 #include "incore.h"
@@ -195,6 +193,7 @@ static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp, struct bio_vec *bvec,
 /**
  * gfs2_end_log_write - end of i/o to the log
  * @bio: The bio
+ * @error: Status of i/o request
  *
  * Each bio_vec contains either data from the pagecache or data
  * relating to the log itself. Here we iterate over the bio_vec
@@ -231,19 +230,20 @@ static void gfs2_end_log_write(struct bio *bio)
 /**
  * gfs2_log_submit_bio - Submit any pending log bio
  * @biop: Address of the bio pointer
- * @opf: REQ_OP | op_flags
+ * @op: REQ_OP
+ * @op_flags: req_flag_bits
  *
  * Submit any pending part-built or full bio to the block device. If
  * there is no pending bio, then this is a no-op.
  */
 
-void gfs2_log_submit_bio(struct bio **biop, int opf)
+void gfs2_log_submit_bio(struct bio **biop, int op, int op_flags)
 {
        struct bio *bio = *biop;
        if (bio) {
                struct gfs2_sbd *sdp = bio->bi_private;
                atomic_inc(&sdp->sd_log_in_flight);
-               bio->bi_opf = opf;
+               bio_set_op_attrs(bio, op, op_flags);
                submit_bio(bio);
                *biop = NULL;
        }
@@ -304,7 +304,7 @@ static struct bio *gfs2_log_get_bio(struct gfs2_sbd *sdp, u64 blkno,
                nblk >>= sdp->sd_fsb2bb_shift;
                if (blkno == nblk && !flush)
                        return bio;
-               gfs2_log_submit_bio(biop, op);
+               gfs2_log_submit_bio(biop, op, 0);
        }
 
        *biop = gfs2_log_alloc_bio(sdp, blkno, end_io);
@@ -375,184 +375,6 @@ void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page)
                       gfs2_log_bmap(sdp));
 }
 
-/**
- * gfs2_end_log_read - end I/O callback for reads from the log
- * @bio: The bio
- *
- * Simply unlock the pages in the bio. The main thread will wait on them and
- * process them in order as necessary.
- */
-
-static void gfs2_end_log_read(struct bio *bio)
-{
-       struct page *page;
-       struct bio_vec *bvec;
-       int i;
-
-       bio_for_each_segment_all(bvec, bio, i) {
-               page = bvec->bv_page;
-               if (bio->bi_status) {
-                       int err = blk_status_to_errno(bio->bi_status);
-
-                       SetPageError(page);
-                       mapping_set_error(page->mapping, err);
-               }
-               unlock_page(page);
-       }
-
-       bio_put(bio);
-}
-
-/**
- * gfs2_jhead_pg_srch - Look for the journal head in a given page.
- * @jd: The journal descriptor
- * @page: The page to look in
- *
- * Returns: 1 if found, 0 otherwise.
- */
-
-static bool gfs2_jhead_pg_srch(struct gfs2_jdesc *jd,
-                             struct gfs2_log_header_host *head,
-                             struct page *page)
-{
-       struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
-       struct gfs2_log_header_host uninitialized_var(lh);
-       void *kaddr = kmap_atomic(page);
-       unsigned int offset;
-       bool ret = false;
-
-       for (offset = 0; offset < PAGE_SIZE; offset += sdp->sd_sb.sb_bsize) {
-               if (!__get_log_header(sdp, kaddr + offset, 0, &lh)) {
-                       if (lh.lh_sequence > head->lh_sequence)
-                               *head = lh;
-                       else {
-                               ret = true;
-                               break;
-                       }
-               }
-       }
-       kunmap_atomic(kaddr);
-       return ret;
-}
-
-/**
- * gfs2_jhead_process_page - Search/cleanup a page
- * @jd: The journal descriptor
- * @index: Index of the page to look into
- * @done: If set, perform only cleanup, else search and set if found.
- *
- * Find the page with 'index' in the journal's mapping. Search the page for
- * the journal head if requested (cleanup == false). Release refs on the
- * page so the page cache can reclaim it (put_page() twice). We grabbed a
- * reference on this page two times, first when we did a find_or_create_page()
- * to obtain the page to add it to the bio and second when we do a
- * find_get_page() here to get the page to wait on while I/O on it is being
- * completed.
- * This function is also used to free up a page we might've grabbed but not
- * used. Maybe we added it to a bio, but not submitted it for I/O. Or we
- * submitted the I/O, but we already found the jhead so we only need to drop
- * our references to the page.
- */
-
-static void gfs2_jhead_process_page(struct gfs2_jdesc *jd, unsigned long index,
-                                   struct gfs2_log_header_host *head,
-                                   bool *done)
-{
-       struct page *page;
-
-       page = find_get_page(jd->jd_inode->i_mapping, index);
-       wait_on_page_locked(page);
-
-       if (PageError(page))
-               *done = true;
-
-       if (!*done)
-               *done = gfs2_jhead_pg_srch(jd, head, page);
-
-       put_page(page); /* Once for find_get_page */
-       put_page(page); /* Once more for find_or_create_page */
-}
-
-/**
- * gfs2_find_jhead - find the head of a log
- * @jd: The journal descriptor
- * @head: The log descriptor for the head of the log is returned here
- *
- * Do a search of a journal by reading it in large chunks using bios and find
- * the valid log entry with the highest sequence number.  (i.e. the log head)
- *
- * Returns: 0 on success, errno otherwise
- */
-
-int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
-{
-       struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
-       struct address_space *mapping = jd->jd_inode->i_mapping;
-       struct gfs2_journal_extent *je;
-       u32 block, read_idx = 0, submit_idx = 0, index = 0;
-       int shift = PAGE_SHIFT - sdp->sd_sb.sb_bsize_shift;
-       int blocks_per_page = 1 << shift, sz, ret = 0;
-       struct bio *bio = NULL;
-       struct page *page;
-       bool done = false;
-       errseq_t since;
-
-       memset(head, 0, sizeof(*head));
-       if (list_empty(&jd->extent_list))
-               gfs2_map_journal_extents(sdp, jd);
-
-       since = filemap_sample_wb_err(mapping);
-       list_for_each_entry(je, &jd->extent_list, list) {
-               for (block = 0; block < je->blocks; block += blocks_per_page) {
-                       index = (je->lblock + block) >> shift;
-
-                       page = find_or_create_page(mapping, index, GFP_NOFS);
-                       if (!page) {
-                               ret = -ENOMEM;
-                               done = true;
-                               goto out;
-                       }
-
-                       if (bio) {
-                               sz = bio_add_page(bio, page, PAGE_SIZE, 0);
-                               if (sz == PAGE_SIZE)
-                                       goto page_added;
-                               submit_idx = index;
-                               submit_bio(bio);
-                               bio = NULL;
-                       }
-
-                       bio = gfs2_log_alloc_bio(sdp,
-                                                je->dblock + (index << shift),
-                                                gfs2_end_log_read);
-                       bio->bi_opf = REQ_OP_READ;
-                       sz = bio_add_page(bio, page, PAGE_SIZE, 0);
-                       gfs2_assert_warn(sdp, sz == PAGE_SIZE);
-
-page_added:
-                       if (submit_idx <= read_idx + BIO_MAX_PAGES) {
-                               /* Keep at least one bio in flight */
-                               continue;
-                       }
-
-                       gfs2_jhead_process_page(jd, read_idx++, head, &done);
-                       if (done)
-                               goto out;  /* found */
-               }
-       }
-
-out:
-       if (bio)
-               submit_bio(bio);
-       while (read_idx <= index)
-               gfs2_jhead_process_page(jd, read_idx++, head, &done);
-
-       if (!ret)
-               ret = filemap_check_wb_err(mapping, since);
-
-       return ret;
-}
-
 static struct page *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type,
                                      u32 ld_length, u32 ld_data1)
 {
index 331160f..711c4d8 100644 (file)
@@ -30,10 +30,8 @@ extern u64 gfs2_log_bmap(struct gfs2_sbd *sdp);
 extern void gfs2_log_write(struct gfs2_sbd *sdp, struct page *page,
                           unsigned size, unsigned offset, u64 blkno);
 extern void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page);
-extern void gfs2_log_submit_bio(struct bio **biop, int opf);
+extern void gfs2_log_submit_bio(struct bio **biop, int op, int op_flags);
 extern void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh);
-extern int gfs2_find_jhead(struct gfs2_jdesc *jd,
-                          struct gfs2_log_header_host *head);
 
 static inline unsigned int buf_limit(struct gfs2_sbd *sdp)
 {
index 1179763..b041cb8 100644 (file)
@@ -41,7 +41,6 @@
 #include "dir.h"
 #include "meta_io.h"
 #include "trace_gfs2.h"
-#include "lops.h"
 
 #define DO 0
 #define UNDO 1
index 7389e44..2dac430 100644 (file)
@@ -182,6 +182,129 @@ static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
 }
 
 /**
+ * find_good_lh - find a good log header
+ * @jd: the journal
+ * @blk: the segment to start searching from
+ * @lh: the log header to fill in
+ * @forward: if true search forward in the log, else search backward
+ *
+ * Call get_log_header() to get a log header for a segment, but if the
+ * segment is bad, either scan forward or backward until we find a good one.
+ *
+ * Returns: errno
+ */
+
+static int find_good_lh(struct gfs2_jdesc *jd, unsigned int *blk,
+                       struct gfs2_log_header_host *head)
+{
+       unsigned int orig_blk = *blk;
+       int error;
+
+       for (;;) {
+               error = get_log_header(jd, *blk, head);
+               if (error <= 0)
+                       return error;
+
+               if (++*blk == jd->jd_blocks)
+                       *blk = 0;
+
+               if (*blk == orig_blk) {
+                       gfs2_consist_inode(GFS2_I(jd->jd_inode));
+                       return -EIO;
+               }
+       }
+}
+
+/**
+ * jhead_scan - make sure we've found the head of the log
+ * @jd: the journal
+ * @head: this is filled in with the log descriptor of the head
+ *
+ * At this point, seg and lh should be either the head of the log or just
+ * before.  Scan forward until we find the head.
+ *
+ * Returns: errno
+ */
+
+static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
+{
+       unsigned int blk = head->lh_blkno;
+       struct gfs2_log_header_host lh;
+       int error;
+
+       for (;;) {
+               if (++blk == jd->jd_blocks)
+                       blk = 0;
+
+               error = get_log_header(jd, blk, &lh);
+               if (error < 0)
+                       return error;
+               if (error == 1)
+                       continue;
+
+               if (lh.lh_sequence == head->lh_sequence) {
+                       gfs2_consist_inode(GFS2_I(jd->jd_inode));
+                       return -EIO;
+               }
+               if (lh.lh_sequence < head->lh_sequence)
+                       break;
+
+               *head = lh;
+       }
+
+       return 0;
+}
+
+/**
+ * gfs2_find_jhead - find the head of a log
+ * @jd: the journal
+ * @head: the log descriptor for the head of the log is returned here
+ *
+ * Do a binary search of a journal and find the valid log entry with the
+ * highest sequence number.  (i.e. the log head)
+ *
+ * Returns: errno
+ */
+
+int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
+{
+       struct gfs2_log_header_host lh_1, lh_m;
+       u32 blk_1, blk_2, blk_m;
+       int error;
+
+       blk_1 = 0;
+       blk_2 = jd->jd_blocks - 1;
+
+       for (;;) {
+               blk_m = (blk_1 + blk_2) / 2;
+
+               error = find_good_lh(jd, &blk_1, &lh_1);
+               if (error)
+                       return error;
+
+               error = find_good_lh(jd, &blk_m, &lh_m);
+               if (error)
+                       return error;
+
+               if (blk_1 == blk_m || blk_m == blk_2)
+                       break;
+
+               if (lh_1.lh_sequence <= lh_m.lh_sequence)
+                       blk_1 = blk_m;
+               else
+                       blk_2 = blk_m;
+       }
+
+       error = jhead_scan(jd, &lh_1);
+       if (error)
+               return error;
+
+       *head = lh_1;
+
+       return error;
+}
+
+/**
  * foreach_descriptor - go through the active part of the log
  * @jd: the journal
  * @start: the first log header in the active region
index 99575ab..11d8124 100644 (file)
@@ -27,6 +27,8 @@ extern int gfs2_revoke_add(struct gfs2_jdesc *jd, u64 blkno, unsigned int where)
 extern int gfs2_revoke_check(struct gfs2_jdesc *jd, u64 blkno, unsigned int where);
 extern void gfs2_revoke_clean(struct gfs2_jdesc *jd);
 
+extern int gfs2_find_jhead(struct gfs2_jdesc *jd,
+                   struct gfs2_log_header_host *head);
 extern int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd, bool wait);
 extern void gfs2_recover_func(struct work_struct *work);
 extern int __get_log_header(struct gfs2_sbd *sdp,
index 831d7cb..17a8d3b 100644 (file)
@@ -1780,9 +1780,9 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
                        goto next_iter;
                }
                if (ret == -E2BIG) {
-                       n += rbm->bii - initial_bii;
                        rbm->bii = 0;
                        rbm->offset = 0;
+                       n += (rbm->bii - initial_bii);
                        goto res_covered_end_of_rgrp;
                }
                return ret;
index d4b11c9..ca71163 100644 (file)
@@ -45,7 +45,6 @@
 #include "util.h"
 #include "sys.h"
 #include "xattr.h"
-#include "lops.h"
 
 #define args_neq(a1, a2, x) ((a1)->ar_##x != (a2)->ar_##x)
 
index 0cd47fe..73432e6 100644 (file)
@@ -730,11 +730,8 @@ static enum lru_status inode_lru_isolate(struct list_head *item,
                return LRU_REMOVED;
        }
 
-       /*
-        * Recently referenced inodes and inodes with many attached pages
-        * get one more pass.
-        */
-       if (inode->i_state & I_REFERENCED || inode->i_data.nrpages > 1) {
+       /* recently referenced inodes get one more pass */
+       if (inode->i_state & I_REFERENCED) {
                inode->i_state &= ~I_REFERENCED;
                spin_unlock(&inode->i_lock);
                return LRU_ROTATE;
index a3088fa..897c602 100644 (file)
@@ -116,6 +116,12 @@ iomap_page_create(struct inode *inode, struct page *page)
        atomic_set(&iop->read_count, 0);
        atomic_set(&iop->write_count, 0);
        bitmap_zero(iop->uptodate, PAGE_SIZE / SECTOR_SIZE);
+
+       /*
+        * migrate_page_move_mapping() assumes that pages with private data have
+        * their count elevated by 1.
+        */
+       get_page(page);
        set_page_private(page, (unsigned long)iop);
        SetPagePrivate(page);
        return iop;
@@ -132,6 +138,7 @@ iomap_page_release(struct page *page)
        WARN_ON_ONCE(atomic_read(&iop->write_count));
        ClearPagePrivate(page);
        set_page_private(page, 0);
+       put_page(page);
        kfree(iop);
 }
 
@@ -569,8 +576,10 @@ iomap_migrate_page(struct address_space *mapping, struct page *newpage,
 
        if (page_has_private(page)) {
                ClearPagePrivate(page);
+               get_page(newpage);
                set_page_private(newpage, page_private(page));
                set_page_private(page, 0);
+               put_page(page);
                SetPagePrivate(newpage);
        }
 
@@ -1804,6 +1813,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
        loff_t pos = iocb->ki_pos, start = pos;
        loff_t end = iocb->ki_pos + count - 1, ret = 0;
        unsigned int flags = IOMAP_DIRECT;
+       bool wait_for_completion = is_sync_kiocb(iocb);
        struct blk_plug plug;
        struct iomap_dio *dio;
 
@@ -1823,7 +1833,6 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
        dio->end_io = end_io;
        dio->error = 0;
        dio->flags = 0;
-       dio->wait_for_completion = is_sync_kiocb(iocb);
 
        dio->submit.iter = iter;
        dio->submit.waiter = current;
@@ -1878,7 +1887,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
                dio_warn_stale_pagecache(iocb->ki_filp);
        ret = 0;
 
-       if (iov_iter_rw(iter) == WRITE && !dio->wait_for_completion &&
+       if (iov_iter_rw(iter) == WRITE && !wait_for_completion &&
            !inode->i_sb->s_dio_done_wq) {
                ret = sb_init_dio_done_wq(inode->i_sb);
                if (ret < 0)
@@ -1894,7 +1903,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
                if (ret <= 0) {
                        /* magic error code to fall back to buffered I/O */
                        if (ret == -ENOTBLK) {
-                               dio->wait_for_completion = true;
+                               wait_for_completion = true;
                                ret = 0;
                        }
                        break;
@@ -1916,8 +1925,24 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
        if (dio->flags & IOMAP_DIO_WRITE_FUA)
                dio->flags &= ~IOMAP_DIO_NEED_SYNC;
 
+       /*
+        * We are about to drop our additional submission reference, which
+        * might be the last reference to the dio.  There are three three
+        * different ways we can progress here:
+        *
+        *  (a) If this is the last reference we will always complete and free
+        *      the dio ourselves.
+        *  (b) If this is not the last reference, and we serve an asynchronous
+        *      iocb, we must never touch the dio after the decrement, the
+        *      I/O completion handler will complete and free it.
+        *  (c) If this is not the last reference, but we serve a synchronous
+        *      iocb, the I/O completion handler will wake us up on the drop
+        *      of the final reference, and we will complete and free it here
+        *      after we got woken by the I/O completion handler.
+        */
+       dio->wait_for_completion = wait_for_completion;
        if (!atomic_dec_and_test(&dio->ref)) {
-               if (!dio->wait_for_completion)
+               if (!wait_for_completion)
                        return -EIOCBQUEUED;
 
                for (;;) {
@@ -1934,9 +1959,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
                __set_current_state(TASK_RUNNING);
        }
 
-       ret = iomap_dio_complete(dio);
-
-       return ret;
+       return iomap_dio_complete(dio);
 
 out_free_dio:
        kfree(dio);
index 22ce3c8..0570391 100644 (file)
@@ -1895,6 +1895,11 @@ static int nfs_parse_devname(const char *dev_name,
        size_t len;
        char *end;
 
+       if (unlikely(!dev_name || !*dev_name)) {
+               dfprintk(MOUNT, "NFS: device name not specified\n");
+               return -EINVAL;
+       }
+
        /* Is the host name protected with square brakcets? */
        if (*dev_name == '[') {
                end = strchr(++dev_name, ']');
index 5a0bbf9..f12cb31 100644 (file)
@@ -621,11 +621,12 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
        nfs_set_page_writeback(page);
        WARN_ON_ONCE(test_bit(PG_CLEAN, &req->wb_flags));
 
-       ret = 0;
+       ret = req->wb_context->error;
        /* If there is a fatal error that covers this write, just exit */
-       if (nfs_error_is_fatal_on_server(req->wb_context->error))
+       if (nfs_error_is_fatal_on_server(ret))
                goto out_launder;
 
+       ret = 0;
        if (!nfs_pageio_add_request(pgio, req)) {
                ret = pgio->pg_error;
                /*
@@ -635,9 +636,9 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
                        nfs_context_set_write_error(req->wb_context, ret);
                        if (nfs_error_is_fatal_on_server(ret))
                                goto out_launder;
-               }
+               } else
+                       ret = -EAGAIN;
                nfs_redirty_request(req);
-               ret = -EAGAIN;
        } else
                nfs_add_stats(page_file_mapping(page)->host,
                                NFSIOS_WRITEPAGES, 1);
index 9824e32..7dc98e1 100644 (file)
@@ -557,9 +557,11 @@ __be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst,
        loff_t cloned;
 
        cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count, 0);
+       if (cloned < 0)
+               return nfserrno(cloned);
        if (count && cloned != count)
-               cloned = -EINVAL;
-       return nfserrno(cloned < 0 ? cloned : 0);
+               return nfserrno(-EINVAL);
+       return 0;
 }
 
 ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst,
index 8ae1094..e39bac9 100644 (file)
@@ -256,7 +256,7 @@ struct dentry *proc_lookup_de(struct inode *dir, struct dentry *dentry,
                inode = proc_get_inode(dir->i_sb, de);
                if (!inode)
                        return ERR_PTR(-ENOMEM);
-               d_set_d_op(dentry, &proc_misc_dentry_ops);
+               d_set_d_op(dentry, de->proc_dops);
                return d_splice_alias(inode, dentry);
        }
        read_unlock(&proc_subdir_lock);
@@ -429,6 +429,8 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
        INIT_LIST_HEAD(&ent->pde_openers);
        proc_set_user(ent, (*parent)->uid, (*parent)->gid);
 
+       ent->proc_dops = &proc_misc_dentry_ops;
+
 out:
        return ent;
 }
index 5185d7f..95b1419 100644 (file)
@@ -44,6 +44,7 @@ struct proc_dir_entry {
        struct completion *pde_unload_completion;
        const struct inode_operations *proc_iops;
        const struct file_operations *proc_fops;
+       const struct dentry_operations *proc_dops;
        union {
                const struct seq_operations *seq_ops;
                int (*single_show)(struct seq_file *, void *);
index d5e0fcb..a7b1243 100644 (file)
@@ -38,6 +38,22 @@ static struct net *get_proc_net(const struct inode *inode)
        return maybe_get_net(PDE_NET(PDE(inode)));
 }
 
+static int proc_net_d_revalidate(struct dentry *dentry, unsigned int flags)
+{
+       return 0;
+}
+
+static const struct dentry_operations proc_net_dentry_ops = {
+       .d_revalidate   = proc_net_d_revalidate,
+       .d_delete       = always_delete_dentry,
+};
+
+static void pde_force_lookup(struct proc_dir_entry *pde)
+{
+       /* /proc/net/ entries can be changed under us by setns(CLONE_NEWNET) */
+       pde->proc_dops = &proc_net_dentry_ops;
+}
+
 static int seq_open_net(struct inode *inode, struct file *file)
 {
        unsigned int state_size = PDE(inode)->state_size;
@@ -90,6 +106,7 @@ struct proc_dir_entry *proc_create_net_data(const char *name, umode_t mode,
        p = proc_create_reg(name, mode, &parent, data);
        if (!p)
                return NULL;
+       pde_force_lookup(p);
        p->proc_fops = &proc_net_seq_fops;
        p->seq_ops = ops;
        p->state_size = state_size;
@@ -133,6 +150,7 @@ struct proc_dir_entry *proc_create_net_data_write(const char *name, umode_t mode
        p = proc_create_reg(name, mode, &parent, data);
        if (!p)
                return NULL;
+       pde_force_lookup(p);
        p->proc_fops = &proc_net_seq_fops;
        p->seq_ops = ops;
        p->state_size = state_size;
@@ -181,6 +199,7 @@ struct proc_dir_entry *proc_create_net_single(const char *name, umode_t mode,
        p = proc_create_reg(name, mode, &parent, data);
        if (!p)
                return NULL;
+       pde_force_lookup(p);
        p->proc_fops = &proc_net_single_fops;
        p->single_show = show;
        return proc_register(parent, p);
@@ -223,6 +242,7 @@ struct proc_dir_entry *proc_create_net_single_write(const char *name, umode_t mo
        p = proc_create_reg(name, mode, &parent, data);
        if (!p)
                return NULL;
+       pde_force_lookup(p);
        p->proc_fops = &proc_net_single_fops;
        p->single_show = show;
        p->write = write;
index f0ec9ed..85b0ef8 100644 (file)
@@ -423,7 +423,7 @@ struct mem_size_stats {
 };
 
 static void smaps_account(struct mem_size_stats *mss, struct page *page,
-               bool compound, bool young, bool dirty)
+               bool compound, bool young, bool dirty, bool locked)
 {
        int i, nr = compound ? 1 << compound_order(page) : 1;
        unsigned long size = nr * PAGE_SIZE;
@@ -450,24 +450,31 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page,
                else
                        mss->private_clean += size;
                mss->pss += (u64)size << PSS_SHIFT;
+               if (locked)
+                       mss->pss_locked += (u64)size << PSS_SHIFT;
                return;
        }
 
        for (i = 0; i < nr; i++, page++) {
                int mapcount = page_mapcount(page);
+               unsigned long pss = (PAGE_SIZE << PSS_SHIFT);
 
                if (mapcount >= 2) {
                        if (dirty || PageDirty(page))
                                mss->shared_dirty += PAGE_SIZE;
                        else
                                mss->shared_clean += PAGE_SIZE;
-                       mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount;
+                       mss->pss += pss / mapcount;
+                       if (locked)
+                               mss->pss_locked += pss / mapcount;
                } else {
                        if (dirty || PageDirty(page))
                                mss->private_dirty += PAGE_SIZE;
                        else
                                mss->private_clean += PAGE_SIZE;
-                       mss->pss += PAGE_SIZE << PSS_SHIFT;
+                       mss->pss += pss;
+                       if (locked)
+                               mss->pss_locked += pss;
                }
        }
 }
@@ -490,6 +497,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
 {
        struct mem_size_stats *mss = walk->private;
        struct vm_area_struct *vma = walk->vma;
+       bool locked = !!(vma->vm_flags & VM_LOCKED);
        struct page *page = NULL;
 
        if (pte_present(*pte)) {
@@ -532,7 +540,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
        if (!page)
                return;
 
-       smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte));
+       smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte), locked);
 }
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -541,6 +549,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
 {
        struct mem_size_stats *mss = walk->private;
        struct vm_area_struct *vma = walk->vma;
+       bool locked = !!(vma->vm_flags & VM_LOCKED);
        struct page *page;
 
        /* FOLL_DUMP will return -EFAULT on huge zero page */
@@ -555,7 +564,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
                /* pass */;
        else
                VM_BUG_ON_PAGE(1, page);
-       smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd));
+       smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd), locked);
 }
 #else
 static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
@@ -737,11 +746,8 @@ static void smap_gather_stats(struct vm_area_struct *vma,
                }
        }
 #endif
-
        /* mmap_sem is held in m_start */
        walk_page_vma(vma, &smaps_walk);
-       if (vma->vm_flags & VM_LOCKED)
-               mss->pss_locked += mss->pss;
 }
 
 #define SEQ_PUT_DEC(str, val) \
index 1c8eecf..6acf1bf 100644 (file)
@@ -768,18 +768,23 @@ xrep_findroot_block(
                if (!uuid_equal(&btblock->bb_u.s.bb_uuid,
                                &mp->m_sb.sb_meta_uuid))
                        goto out;
+               /*
+                * Read verifiers can reference b_ops, so we set the pointer
+                * here.  If the verifier fails we'll reset the buffer state
+                * to what it was before we touched the buffer.
+                */
+               bp->b_ops = fab->buf_ops;
                fab->buf_ops->verify_read(bp);
                if (bp->b_error) {
+                       bp->b_ops = NULL;
                        bp->b_error = 0;
                        goto out;
                }
 
                /*
                 * Some read verifiers will (re)set b_ops, so we must be
-                * careful not to blow away any such assignment.
+                * careful not to change b_ops after running the verifier.
                 */
-               if (!bp->b_ops)
-                       bp->b_ops = fab->buf_ops;
        }
 
        /*
index 338b9d9..d9048bc 100644 (file)
@@ -449,6 +449,7 @@ xfs_map_blocks(
        }
 
        wpc->imap = imap;
+       xfs_trim_extent_eof(&wpc->imap, ip);
        trace_xfs_map_blocks_found(ip, offset, count, wpc->io_type, &imap);
        return 0;
 allocate_blocks:
@@ -459,6 +460,7 @@ allocate_blocks:
        ASSERT(whichfork == XFS_COW_FORK || cow_fsb == NULLFILEOFF ||
               imap.br_startoff + imap.br_blockcount <= cow_fsb);
        wpc->imap = imap;
+       xfs_trim_extent_eof(&wpc->imap, ip);
        trace_xfs_map_blocks_alloc(ip, offset, count, wpc->io_type, &imap);
        return 0;
 }
index eedc5e0..4f5f2ff 100644 (file)
@@ -776,10 +776,26 @@ _xfs_buf_read(
 }
 
 /*
+ * Set buffer ops on an unchecked buffer and validate it, if possible.
+ *
  * If the caller passed in an ops structure and the buffer doesn't have ops
  * assigned, set the ops and use them to verify the contents.  If the contents
  * cannot be verified, we'll clear XBF_DONE.  We assume the buffer has no
  * recorded errors and is already in XBF_DONE state.
+ *
+ * Under normal operations, every in-core buffer must have buffer ops assigned
+ * to them when the buffer is read in from disk so that we can validate the
+ * metadata.
+ *
+ * However, there are two scenarios where one can encounter in-core buffers
+ * that don't have buffer ops.  The first is during log recovery of buffers on
+ * a V4 filesystem, though these buffers are purged at the end of recovery.
+ *
+ * The other is online repair, which tries to match arbitrary metadata blocks
+ * with btree types in order to find the root.  If online repair doesn't match
+ * the buffer with /any/ btree type, the buffer remains in memory in DONE state
+ * with no ops, and a subsequent read_buf call from elsewhere will not set the
+ * ops.  This function helps us fix this situation.
  */
 int
 xfs_buf_ensure_ops(
@@ -1536,8 +1552,7 @@ __xfs_buf_submit(
                xfs_buf_ioerror(bp, -EIO);
                bp->b_flags &= ~XBF_DONE;
                xfs_buf_stale(bp);
-               if (bp->b_flags & XBF_ASYNC)
-                       xfs_buf_ioend(bp);
+               xfs_buf_ioend(bp);
                return -EIO;
        }
 
index b53be41..04f7ac3 100644 (file)
 #define IMX8MQ_CLK_VPU_G2_ROOT                 241
 
 /* SCCG PLL GATE */
-#define IMX8MQ_SYS1_PLL_OUT                    232
+#define IMX8MQ_SYS1_PLL_OUT                    242
 #define IMX8MQ_SYS2_PLL_OUT                    243
 #define IMX8MQ_SYS3_PLL_OUT                    244
 #define IMX8MQ_DRAM_PLL_OUT                    245
 /* txesc clock */
 #define IMX8MQ_CLK_DSI_IPG_DIV                  256
 
-#define IMX8MQ_CLK_TMU_ROOT                    265
+#define IMX8MQ_CLK_TMU_ROOT                    257
 
 /* Display root clocks */
-#define IMX8MQ_CLK_DISP_AXI_ROOT               266
-#define IMX8MQ_CLK_DISP_APB_ROOT               267
-#define IMX8MQ_CLK_DISP_RTRM_ROOT              268
+#define IMX8MQ_CLK_DISP_AXI_ROOT               258
+#define IMX8MQ_CLK_DISP_APB_ROOT               259
+#define IMX8MQ_CLK_DISP_RTRM_ROOT              260
 
-#define IMX8MQ_CLK_OCOTP_ROOT                  269
+#define IMX8MQ_CLK_OCOTP_ROOT                  261
 
-#define IMX8MQ_CLK_DRAM_ALT_ROOT               270
-#define IMX8MQ_CLK_DRAM_CORE                   271
+#define IMX8MQ_CLK_DRAM_ALT_ROOT               262
+#define IMX8MQ_CLK_DRAM_CORE                   263
 
-#define IMX8MQ_CLK_MU_ROOT                     272
-#define IMX8MQ_VIDEO2_PLL_OUT                  273
+#define IMX8MQ_CLK_MU_ROOT                     264
+#define IMX8MQ_VIDEO2_PLL_OUT                  265
 
-#define IMX8MQ_CLK_CLKO2                       274
+#define IMX8MQ_CLK_CLKO2                       266
 
-#define IMX8MQ_CLK_NAND_USDHC_BUS_RAWNAND_CLK  275
+#define IMX8MQ_CLK_NAND_USDHC_BUS_RAWNAND_CLK  267
 
-#define IMX8MQ_CLK_END                         276
+#define IMX8MQ_CLK_END                         268
 #endif /* __DT_BINDINGS_CLOCK_IMX8MQ_H */
index 7b24fc7..228a5e2 100644 (file)
@@ -71,7 +71,6 @@
 #define MMP2_CLK_CCIC1_MIX             117
 #define MMP2_CLK_CCIC1_PHY             118
 #define MMP2_CLK_CCIC1_SPHY            119
-#define MMP2_CLK_SP                    120
 
 #define MMP2_NR_CLKS                   200
 #endif
index 8804753..7bb2d8d 100644 (file)
@@ -116,7 +116,13 @@ extern void blk_fill_rwbs(char *rwbs, unsigned int op, int bytes);
 
 static inline sector_t blk_rq_trace_sector(struct request *rq)
 {
-       return blk_rq_is_passthrough(rq) ? 0 : blk_rq_pos(rq);
+       /*
+        * Tracing should ignore starting sector for passthrough requests and
+        * requests where starting sector didn't get set.
+        */
+       if (blk_rq_is_passthrough(rq) || blk_rq_pos(rq) == (sector_t)-1)
+               return 0;
+       return blk_rq_pos(rq);
 }
 
 static inline unsigned int blk_rq_trace_nr_sectors(struct request *rq)
index 218df7f..5041357 100644 (file)
@@ -180,12 +180,10 @@ enum cpuhp_smt_control {
 #if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT)
 extern enum cpuhp_smt_control cpu_smt_control;
 extern void cpu_smt_disable(bool force);
-extern void cpu_smt_check_topology_early(void);
 extern void cpu_smt_check_topology(void);
 #else
 # define cpu_smt_control               (CPU_SMT_ENABLED)
 static inline void cpu_smt_disable(bool force) { }
-static inline void cpu_smt_check_topology_early(void) { }
 static inline void cpu_smt_check_topology(void) { }
 #endif
 
index ef4b70f..60996e6 100644 (file)
@@ -62,9 +62,10 @@ extern const struct qstr slash_name;
 struct dentry_stat_t {
        long nr_dentry;
        long nr_unused;
-       long age_limit;          /* age in seconds */
-       long want_pages;         /* pages requested by system */
-       long dummy[2];
+       long age_limit;         /* age in seconds */
+       long want_pages;        /* pages requested by system */
+       long nr_negative;       /* # of unused negative dentries */
+       long dummy;             /* Reserved for future use */
 };
 extern struct dentry_stat_t dentry_stat;
 
index 7317376..95e2d7e 100644 (file)
@@ -611,8 +611,8 @@ static inline u8 *bpf_skb_cb(struct sk_buff *skb)
        return qdisc_skb_cb(skb)->data;
 }
 
-static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog,
-                                      struct sk_buff *skb)
+static inline u32 __bpf_prog_run_save_cb(const struct bpf_prog *prog,
+                                        struct sk_buff *skb)
 {
        u8 *cb_data = bpf_skb_cb(skb);
        u8 cb_saved[BPF_SKB_CB_LEN];
@@ -631,15 +631,30 @@ static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog,
        return res;
 }
 
+static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog,
+                                      struct sk_buff *skb)
+{
+       u32 res;
+
+       preempt_disable();
+       res = __bpf_prog_run_save_cb(prog, skb);
+       preempt_enable();
+       return res;
+}
+
 static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog,
                                        struct sk_buff *skb)
 {
        u8 *cb_data = bpf_skb_cb(skb);
+       u32 res;
 
        if (unlikely(prog->cb_access))
                memset(cb_data, 0, BPF_SKB_CB_LEN);
 
-       return BPF_PROG_RUN(prog, skb);
+       preempt_disable();
+       res = BPF_PROG_RUN(prog, skb);
+       preempt_enable();
+       return res;
 }
 
 static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
index 811c777..29d8e2c 100644 (file)
@@ -1479,11 +1479,12 @@ struct super_block {
        struct user_namespace *s_user_ns;
 
        /*
-        * Keep the lru lists last in the structure so they always sit on their
-        * own individual cachelines.
+        * The list_lru structure is essentially just a pointer to a table
+        * of per-node lru lists, each of which has its own spinlock.
+        * There is no need to put them into separate cachelines.
         */
-       struct list_lru         s_dentry_lru ____cacheline_aligned_in_smp;
-       struct list_lru         s_inode_lru ____cacheline_aligned_in_smp;
+       struct list_lru         s_dentry_lru;
+       struct list_lru         s_inode_lru;
        struct rcu_head         rcu;
        struct work_struct      destroy_work;
 
index 94e9797..f127adb 100644 (file)
@@ -7,6 +7,7 @@
 #define __PTP_QORIQ_H__
 
 #include <linux/io.h>
+#include <linux/interrupt.h>
 #include <linux/ptp_clock_kernel.h>
 
 /*
@@ -49,7 +50,7 @@ struct etts_regs {
        u32 tmr_etts2_l;  /* Timestamp of general purpose external trigger */
 };
 
-struct qoriq_ptp_registers {
+struct ptp_qoriq_registers {
        struct ctrl_regs __iomem *ctrl_regs;
        struct alarm_regs __iomem *alarm_regs;
        struct fiper_regs __iomem *fiper_regs;
@@ -57,15 +58,15 @@ struct qoriq_ptp_registers {
 };
 
 /* Offset definitions for the four register groups */
-#define CTRL_REGS_OFFSET       0x0
-#define ALARM_REGS_OFFSET      0x40
-#define FIPER_REGS_OFFSET      0x80
-#define ETTS_REGS_OFFSET       0xa0
+#define ETSEC_CTRL_REGS_OFFSET 0x0
+#define ETSEC_ALARM_REGS_OFFSET        0x40
+#define ETSEC_FIPER_REGS_OFFSET        0x80
+#define ETSEC_ETTS_REGS_OFFSET 0xa0
 
-#define FMAN_CTRL_REGS_OFFSET  0x80
-#define FMAN_ALARM_REGS_OFFSET 0xb8
-#define FMAN_FIPER_REGS_OFFSET 0xd0
-#define FMAN_ETTS_REGS_OFFSET  0xe0
+#define CTRL_REGS_OFFSET       0x80
+#define ALARM_REGS_OFFSET      0xb8
+#define FIPER_REGS_OFFSET      0xd0
+#define ETTS_REGS_OFFSET       0xe0
 
 
 /* Bit definitions for the TMR_CTRL register */
@@ -136,9 +137,9 @@ struct qoriq_ptp_registers {
 #define DEFAULT_FIPER1_PERIOD  1000000000
 #define DEFAULT_FIPER2_PERIOD  100000
 
-struct qoriq_ptp {
+struct ptp_qoriq {
        void __iomem *base;
-       struct qoriq_ptp_registers regs;
+       struct ptp_qoriq_registers regs;
        spinlock_t lock; /* protects regs */
        struct ptp_clock *clock;
        struct ptp_clock_info caps;
@@ -156,28 +157,48 @@ struct qoriq_ptp {
        u32 cksel;
        u32 tmr_fiper1;
        u32 tmr_fiper2;
+       u32 (*read)(unsigned __iomem *addr);
+       void (*write)(unsigned __iomem *addr, u32 val);
 };
 
-static inline u32 qoriq_read(unsigned __iomem *addr)
+static inline u32 qoriq_read_be(unsigned __iomem *addr)
 {
-       u32 val;
-
-       val = ioread32be(addr);
-       return val;
+       return ioread32be(addr);
 }
 
-static inline void qoriq_write(unsigned __iomem *addr, u32 val)
+static inline void qoriq_write_be(unsigned __iomem *addr, u32 val)
 {
        iowrite32be(val, addr);
 }
 
+static inline u32 qoriq_read_le(unsigned __iomem *addr)
+{
+       return ioread32(addr);
+}
+
+static inline void qoriq_write_le(unsigned __iomem *addr, u32 val)
+{
+       iowrite32(val, addr);
+}
+
+irqreturn_t ptp_qoriq_isr(int irq, void *priv);
+int ptp_qoriq_init(struct ptp_qoriq *ptp_qoriq, void __iomem *base,
+                  const struct ptp_clock_info caps);
+void ptp_qoriq_free(struct ptp_qoriq *ptp_qoriq);
+int ptp_qoriq_adjfine(struct ptp_clock_info *ptp, long scaled_ppm);
+int ptp_qoriq_adjtime(struct ptp_clock_info *ptp, s64 delta);
+int ptp_qoriq_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts);
+int ptp_qoriq_settime(struct ptp_clock_info *ptp,
+                     const struct timespec64 *ts);
+int ptp_qoriq_enable(struct ptp_clock_info *ptp,
+                    struct ptp_clock_request *rq, int on);
 #ifdef CONFIG_DEBUG_FS
-void ptp_qoriq_create_debugfs(struct qoriq_ptp *qoriq_ptp);
-void ptp_qoriq_remove_debugfs(struct qoriq_ptp *qoriq_ptp);
+void ptp_qoriq_create_debugfs(struct ptp_qoriq *ptp_qoriq);
+void ptp_qoriq_remove_debugfs(struct ptp_qoriq *ptp_qoriq);
 #else
-static inline void ptp_qoriq_create_debugfs(struct qoriq_ptp *qoriq_ptp)
+static inline void ptp_qoriq_create_debugfs(struct ptp_qoriq *ptp_qoriq)
 { }
-static inline void ptp_qoriq_remove_debugfs(struct qoriq_ptp *qoriq_ptp)
+static inline void ptp_qoriq_remove_debugfs(struct ptp_qoriq *ptp_qoriq)
 { }
 #endif
 
index 8663f21..2d6100e 100644 (file)
 
 #ifdef CONFIG_DEBUG_FS
 
+#include <linux/kfifo.h>
+
 #define HID_DEBUG_BUFSIZE 512
+#define HID_DEBUG_FIFOSIZE 512
 
 void hid_dump_input(struct hid_device *, struct hid_usage *, __s32);
 void hid_dump_report(struct hid_device *, int , u8 *, int);
@@ -37,11 +40,8 @@ void hid_debug_init(void);
 void hid_debug_exit(void);
 void hid_debug_event(struct hid_device *, char *);
 
-
 struct hid_debug_list {
-       char *hid_debug_buf;
-       int head;
-       int tail;
+       DECLARE_KFIFO_PTR(hid_debug_fifo, char);
        struct fasync_struct *fasync;
        struct hid_device *hdev;
        struct list_head node;
@@ -64,4 +64,3 @@ struct hid_debug_list {
 #endif
 
 #endif
-
index e7d29ae..971cf76 100644 (file)
@@ -615,6 +615,7 @@ struct ide_drive_s {
 
        /* current sense rq and buffer */
        bool sense_rq_armed;
+       bool sense_rq_active;
        struct request *sense_rq;
        struct request_sense sense_data;
 
@@ -1219,6 +1220,7 @@ extern void ide_stall_queue(ide_drive_t *drive, unsigned long timeout);
 extern void ide_timer_expiry(struct timer_list *t);
 extern irqreturn_t ide_intr(int irq, void *dev_id);
 extern blk_status_t ide_queue_rq(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data *);
+extern blk_status_t ide_issue_rq(ide_drive_t *, struct request *, bool);
 extern void ide_requeue_and_plug(ide_drive_t *drive, struct request *rq);
 
 void ide_init_disk(struct gendisk *, ide_drive_t *);
index 071b4cb..c848a7c 100644 (file)
 #define GITS_TYPER_PLPIS               (1UL << 0)
 #define GITS_TYPER_VLPIS               (1UL << 1)
 #define GITS_TYPER_ITT_ENTRY_SIZE_SHIFT        4
-#define GITS_TYPER_ITT_ENTRY_SIZE(r)   ((((r) >> GITS_TYPER_ITT_ENTRY_SIZE_SHIFT) & 0x1f) + 1)
+#define GITS_TYPER_ITT_ENTRY_SIZE(r)   ((((r) >> GITS_TYPER_ITT_ENTRY_SIZE_SHIFT) & 0xf) + 1)
 #define GITS_TYPER_IDBITS_SHIFT                8
 #define GITS_TYPER_DEVBITS_SHIFT       13
 #define GITS_TYPER_DEVBITS(r)          ((((r) >> GITS_TYPER_DEVBITS_SHIFT) & 0x1f) + 1)
index 07da5c6..368267c 100644 (file)
@@ -21,14 +21,16 @@ struct vmem_altmap;
  * walkers which rely on the fully initialized page->flags and others
  * should use this rather than pfn_valid && pfn_to_page
  */
-#define pfn_to_online_page(pfn)                                \
-({                                                     \
-       struct page *___page = NULL;                    \
-       unsigned long ___nr = pfn_to_section_nr(pfn);   \
-                                                       \
-       if (___nr < NR_MEM_SECTIONS && online_section_nr(___nr))\
-               ___page = pfn_to_page(pfn);             \
-       ___page;                                        \
+#define pfn_to_online_page(pfn)                                           \
+({                                                                \
+       struct page *___page = NULL;                               \
+       unsigned long ___pfn = pfn;                                \
+       unsigned long ___nr = pfn_to_section_nr(___pfn);           \
+                                                                  \
+       if (___nr < NR_MEM_SECTIONS && online_section_nr(___nr) && \
+           pfn_valid_within(___pfn))                              \
+               ___page = pfn_to_page(___pfn);                     \
+       ___page;                                                   \
 })
 
 /*
index 8c4a820..f93a559 100644 (file)
@@ -67,7 +67,7 @@
 #define MLX5_UN_SZ_BYTES(typ) (sizeof(union mlx5_ifc_##typ##_bits) / 8)
 #define MLX5_UN_SZ_DW(typ) (sizeof(union mlx5_ifc_##typ##_bits) / 32)
 #define MLX5_BYTE_OFF(typ, fld) (__mlx5_bit_off(typ, fld) / 8)
-#define MLX5_ADDR_OF(typ, p, fld) ((char *)(p) + MLX5_BYTE_OFF(typ, fld))
+#define MLX5_ADDR_OF(typ, p, fld) ((void *)((uint8_t *)(p) + MLX5_BYTE_OFF(typ, fld)))
 
 /* insert a value to a struct */
 #define MLX5_SET(typ, p, fld, v) do { \
@@ -342,6 +342,8 @@ enum mlx5_event {
        MLX5_EVENT_TYPE_PAGE_FAULT         = 0xc,
        MLX5_EVENT_TYPE_NIC_VPORT_CHANGE   = 0xd,
 
+       MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE = 0xe,
+
        MLX5_EVENT_TYPE_DCT_DRAINED        = 0x1c,
 
        MLX5_EVENT_TYPE_FPGA_ERROR         = 0x20,
@@ -591,7 +593,7 @@ struct mlx5_eqe_cmd {
 };
 
 struct mlx5_eqe_page_req {
-       u8              rsvd0[2];
+       __be16          ec_function;
        __be16          func_id;
        __be32          num_pages;
        __be32          rsvd1[5];
@@ -1201,6 +1203,9 @@ enum mlx5_qcam_feature_groups {
 #define MLX5_CAP_ODP(mdev, cap)\
        MLX5_GET(odp_cap, mdev->caps.hca_cur[MLX5_CAP_ODP], cap)
 
+#define MLX5_CAP_ODP_MAX(mdev, cap)\
+       MLX5_GET(odp_cap, mdev->caps.hca_max[MLX5_CAP_ODP], cap)
+
 #define MLX5_CAP_VECTOR_CALC(mdev, cap) \
        MLX5_GET(vector_calc_cap, \
                 mdev->caps.hca_cur[MLX5_CAP_VECTOR_CALC], cap)
index 5429925..c2de50f 100644 (file)
@@ -522,6 +522,7 @@ struct mlx5_priv {
        atomic_t                reg_pages;
        struct list_head        free_list;
        int                     vfs_pages;
+       int                     peer_pf_pages;
 
        struct mlx5_core_health health;
 
@@ -652,6 +653,7 @@ struct mlx5_core_dev {
                u32 mcam[MLX5_ST_SZ_DW(mcam_reg)];
                u32 fpga[MLX5_ST_SZ_DW(fpga_cap)];
                u32 qcam[MLX5_ST_SZ_DW(qcam_reg)];
+               u8  embedded_cpu;
        } caps;
        u64                     sys_image_guid;
        phys_addr_t             iseg_base;
@@ -850,11 +852,30 @@ void mlx5_cmd_cleanup(struct mlx5_core_dev *dev);
 void mlx5_cmd_use_events(struct mlx5_core_dev *dev);
 void mlx5_cmd_use_polling(struct mlx5_core_dev *dev);
 
+struct mlx5_async_ctx {
+       struct mlx5_core_dev *dev;
+       atomic_t num_inflight;
+       struct wait_queue_head wait;
+};
+
+struct mlx5_async_work;
+
+typedef void (*mlx5_async_cbk_t)(int status, struct mlx5_async_work *context);
+
+struct mlx5_async_work {
+       struct mlx5_async_ctx *ctx;
+       mlx5_async_cbk_t user_callback;
+};
+
+void mlx5_cmd_init_async_ctx(struct mlx5_core_dev *dev,
+                            struct mlx5_async_ctx *ctx);
+void mlx5_cmd_cleanup_async_ctx(struct mlx5_async_ctx *ctx);
+int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size,
+                    void *out, int out_size, mlx5_async_cbk_t callback,
+                    struct mlx5_async_work *work);
+
 int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
                  int out_size);
-int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size,
-                    void *out, int out_size, mlx5_cmd_cbk_t callback,
-                    void *context);
 int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size,
                          void *out, int out_size);
 void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome);
@@ -885,9 +906,10 @@ void mlx5_init_mkey_table(struct mlx5_core_dev *dev);
 void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev);
 int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev,
                             struct mlx5_core_mkey *mkey,
-                            u32 *in, int inlen,
-                            u32 *out, int outlen,
-                            mlx5_cmd_cbk_t callback, void *context);
+                            struct mlx5_async_ctx *async_ctx, u32 *in,
+                            int inlen, u32 *out, int outlen,
+                            mlx5_async_cbk_t callback,
+                            struct mlx5_async_work *context);
 int mlx5_core_create_mkey(struct mlx5_core_dev *dev,
                          struct mlx5_core_mkey *mkey,
                          u32 *in, int inlen);
@@ -897,14 +919,12 @@ int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey,
                         u32 *out, int outlen);
 int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn);
 int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn);
-int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
-                     u16 opmod, u8 port);
 int mlx5_pagealloc_init(struct mlx5_core_dev *dev);
 void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev);
 void mlx5_pagealloc_start(struct mlx5_core_dev *dev);
 void mlx5_pagealloc_stop(struct mlx5_core_dev *dev);
 void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id,
-                                s32 npages);
+                                s32 npages, bool ec_function);
 int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot);
 int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev);
 void mlx5_register_debugfs(void);
@@ -1058,11 +1078,29 @@ static inline int mlx5_core_is_pf(struct mlx5_core_dev *dev)
        return !(dev->priv.pci_dev_data & MLX5_PCI_DEV_IS_VF);
 }
 
-#define MLX5_TOTAL_VPORTS(mdev) (1 + pci_sriov_get_totalvfs((mdev)->pdev))
-#define MLX5_VPORT_MANAGER(mdev) \
-       (MLX5_CAP_GEN(mdev, vport_group_manager) && \
-        (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && \
-        mlx5_core_is_pf(mdev))
+static inline bool mlx5_core_is_ecpf(struct mlx5_core_dev *dev)
+{
+       return dev->caps.embedded_cpu;
+}
+
+static inline bool mlx5_core_is_ecpf_esw_manager(struct mlx5_core_dev *dev)
+{
+       return dev->caps.embedded_cpu && MLX5_CAP_GEN(dev, eswitch_manager);
+}
+
+static inline bool mlx5_ecpf_vport_exists(struct mlx5_core_dev *dev)
+{
+       return mlx5_core_is_pf(dev) && MLX5_CAP_ESW(dev, ecpf_vport_exists);
+}
+
+#define MLX5_HOST_PF_MAX_VFS   (127u)
+static inline u16 mlx5_core_max_vfs(struct mlx5_core_dev *dev)
+{
+       if (mlx5_core_is_ecpf_esw_manager(dev))
+               return MLX5_HOST_PF_MAX_VFS;
+       else
+               return pci_sriov_get_totalvfs(dev->pdev);
+}
 
 static inline int mlx5_get_gid_table_len(u16 param)
 {
index fab5121..96d8435 100644 (file)
@@ -22,6 +22,12 @@ enum {
        NUM_REP_TYPES,
 };
 
+enum {
+       REP_UNREGISTERED,
+       REP_REGISTERED,
+       REP_LOADED,
+};
+
 struct mlx5_eswitch_rep;
 struct mlx5_eswitch_rep_if {
        int                    (*load)(struct mlx5_core_dev *dev,
@@ -29,7 +35,7 @@ struct mlx5_eswitch_rep_if {
        void                   (*unload)(struct mlx5_eswitch_rep *rep);
        void                   *(*get_proto_dev)(struct mlx5_eswitch_rep *rep);
        void                    *priv;
-       bool                   valid;
+       u8                      state;
 };
 
 struct mlx5_eswitch_rep {
@@ -40,13 +46,10 @@ struct mlx5_eswitch_rep {
        u32                    vlan_refcount;
 };
 
-void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
-                                    int vport_index,
-                                    struct mlx5_eswitch_rep_if *rep_if,
-                                    u8 rep_type);
-void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
-                                      int vport_index,
-                                      u8 rep_type);
+void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw,
+                                     struct mlx5_eswitch_rep_if *rep_if,
+                                     u8 rep_type);
+void mlx5_eswitch_unregister_vport_reps(struct mlx5_eswitch *esw, u8 rep_type);
 void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,
                                 int vport,
                                 u8 rep_type);
index 35fe521..b7bb774 100644 (file)
@@ -72,6 +72,7 @@ enum {
 
 enum {
        MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE        = 0x0,
+       MLX5_SET_HCA_CAP_OP_MOD_ODP                   = 0x2,
        MLX5_SET_HCA_CAP_OP_MOD_ATOMIC                = 0x3,
 };
 
@@ -141,6 +142,7 @@ enum {
        MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY     = 0x725,
        MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY       = 0x726,
        MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS        = 0x727,
+       MLX5_CMD_OP_QUERY_HOST_PARAMS             = 0x740,
        MLX5_CMD_OP_QUERY_VPORT_STATE             = 0x750,
        MLX5_CMD_OP_MODIFY_VPORT_STATE            = 0x751,
        MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT       = 0x752,
@@ -629,7 +631,8 @@ struct mlx5_ifc_e_switch_cap_bits {
        u8         vport_svlan_insert[0x1];
        u8         vport_cvlan_insert_if_not_exist[0x1];
        u8         vport_cvlan_insert_overwrite[0x1];
-       u8         reserved_at_5[0x17];
+       u8         reserved_at_5[0x16];
+       u8         ecpf_vport_exists[0x1];
        u8         counter_eswitch_affinity[0x1];
        u8         merged_eswitch[0x1];
        u8         nic_vport_node_guid_modify[0x1];
@@ -831,7 +834,9 @@ struct mlx5_ifc_odp_cap_bits {
 
        struct mlx5_ifc_odp_per_transport_service_cap_bits ud_odp_caps;
 
-       u8         reserved_at_e0[0x720];
+       struct mlx5_ifc_odp_per_transport_service_cap_bits xrc_odp_caps;
+
+       u8         reserved_at_100[0x700];
 };
 
 struct mlx5_ifc_calc_op {
@@ -4438,7 +4443,8 @@ struct mlx5_ifc_query_pages_out_bits {
 
        u8         syndrome[0x20];
 
-       u8         reserved_at_40[0x10];
+       u8         embedded_cpu_function[0x1];
+       u8         reserved_at_41[0xf];
        u8         function_id[0x10];
 
        u8         num_pages[0x20];
@@ -4457,7 +4463,8 @@ struct mlx5_ifc_query_pages_in_bits {
        u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_at_40[0x10];
+       u8         embedded_cpu_function[0x1];
+       u8         reserved_at_41[0xf];
        u8         function_id[0x10];
 
        u8         reserved_at_60[0x20];
@@ -5877,7 +5884,8 @@ struct mlx5_ifc_manage_pages_in_bits {
        u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_at_40[0x10];
+       u8         embedded_cpu_function[0x1];
+       u8         reserved_at_41[0xf];
        u8         function_id[0x10];
 
        u8         input_num_entries[0x20];
@@ -6055,7 +6063,8 @@ struct mlx5_ifc_enable_hca_in_bits {
        u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_at_40[0x10];
+       u8         embedded_cpu_function[0x1];
+       u8         reserved_at_41[0xf];
        u8         function_id[0x10];
 
        u8         reserved_at_60[0x20];
@@ -6099,7 +6108,8 @@ struct mlx5_ifc_disable_hca_in_bits {
        u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
 
-       u8         reserved_at_40[0x10];
+       u8         embedded_cpu_function[0x1];
+       u8         reserved_at_41[0xf];
        u8         function_id[0x10];
 
        u8         reserved_at_60[0x20];
@@ -7817,21 +7827,23 @@ struct mlx5_ifc_ptys_reg_bits {
        u8         proto_mask[0x3];
 
        u8         an_status[0x4];
-       u8         reserved_at_24[0x3c];
+       u8         reserved_at_24[0x1c];
+
+       u8         ext_eth_proto_capability[0x20];
 
        u8         eth_proto_capability[0x20];
 
        u8         ib_link_width_capability[0x10];
        u8         ib_proto_capability[0x10];
 
-       u8         reserved_at_a0[0x20];
+       u8         ext_eth_proto_admin[0x20];
 
        u8         eth_proto_admin[0x20];
 
        u8         ib_link_width_admin[0x10];
        u8         ib_proto_admin[0x10];
 
-       u8         reserved_at_100[0x20];
+       u8         ext_eth_proto_oper[0x20];
 
        u8         eth_proto_oper[0x20];
 
@@ -8280,7 +8292,9 @@ struct mlx5_ifc_mpegc_reg_bits {
 struct mlx5_ifc_pcam_enhanced_features_bits {
        u8         reserved_at_0[0x6d];
        u8         rx_icrc_encapsulated_counter[0x1];
-       u8         reserved_at_6e[0x8];
+       u8         reserved_at_6e[0x4];
+       u8         ptys_extended_ethernet[0x1];
+       u8         reserved_at_73[0x3];
        u8         pfcc_mask[0x1];
        u8         reserved_at_77[0x3];
        u8         per_lane_error_counters[0x1];
@@ -8746,7 +8760,8 @@ struct mlx5_ifc_initial_seg_bits {
        u8         initializing[0x1];
        u8         reserved_at_fe1[0x4];
        u8         nic_interface_supported[0x3];
-       u8         reserved_at_fe8[0x18];
+       u8         embedded_cpu[0x1];
+       u8         reserved_at_fe9[0x17];
 
        struct mlx5_ifc_health_buffer_bits health_buffer;
 
@@ -9513,4 +9528,44 @@ struct mlx5_ifc_mtrc_ctrl_bits {
        u8         reserved_at_80[0x180];
 };
 
+struct mlx5_ifc_host_params_context_bits {
+       u8         host_number[0x8];
+       u8         reserved_at_8[0x8];
+       u8         host_num_of_vfs[0x10];
+
+       u8         reserved_at_20[0x10];
+       u8         host_pci_bus[0x10];
+
+       u8         reserved_at_40[0x10];
+       u8         host_pci_device[0x10];
+
+       u8         reserved_at_60[0x10];
+       u8         host_pci_function[0x10];
+
+       u8         reserved_at_80[0x180];
+};
+
+struct mlx5_ifc_query_host_params_in_bits {
+       u8         opcode[0x10];
+       u8         reserved_at_10[0x10];
+
+       u8         reserved_at_20[0x10];
+       u8         op_mod[0x10];
+
+       u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_query_host_params_out_bits {
+       u8         status[0x8];
+       u8         reserved_at_8[0x18];
+
+       u8         syndrome[0x20];
+
+       u8         reserved_at_40[0x40];
+
+       struct mlx5_ifc_host_params_context_bits host_params_context;
+
+       u8         reserved_at_280[0x180];
+};
+
 #endif /* MLX5_IFC_H */
index bf4bc01..814fa19 100644 (file)
@@ -92,6 +92,22 @@ enum mlx5e_link_mode {
        MLX5E_LINK_MODES_NUMBER,
 };
 
+enum mlx5e_ext_link_mode {
+       MLX5E_SGMII_100M                        = 0,
+       MLX5E_1000BASE_X_SGMII                  = 1,
+       MLX5E_5GBASE_R                          = 3,
+       MLX5E_10GBASE_XFI_XAUI_1                = 4,
+       MLX5E_40GBASE_XLAUI_4_XLPPI_4           = 5,
+       MLX5E_25GAUI_1_25GBASE_CR_KR            = 6,
+       MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2   = 7,
+       MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR     = 8,
+       MLX5E_CAUI_4_100GBASE_CR4_KR4           = 9,
+       MLX5E_100GAUI_2_100GBASE_CR2_KR2        = 10,
+       MLX5E_200GAUI_4_200GBASE_CR4_KR4        = 12,
+       MLX5E_400GAUI_8                         = 15,
+       MLX5E_EXT_LINK_MODES_NUMBER,
+};
+
 enum mlx5e_connector_type {
        MLX5E_PORT_UNKNOWN      = 0,
        MLX5E_PORT_NONE                 = 1,
@@ -106,31 +122,23 @@ enum mlx5e_connector_type {
 };
 
 #define MLX5E_PROT_MASK(link_mode) (1 << link_mode)
+#define MLX5_GET_ETH_PROTO(reg, out, ext, field)       \
+       (ext ? MLX5_GET(reg, out, ext_##field) :        \
+       MLX5_GET(reg, out, field))
 
 int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps);
 int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys,
                         int ptys_size, int proto_mask, u8 local_port);
-int mlx5_query_port_proto_cap(struct mlx5_core_dev *dev,
-                             u32 *proto_cap, int proto_mask);
-int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev,
-                               u32 *proto_admin, int proto_mask);
 int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev,
                                    u8 *link_width_oper, u8 local_port);
 int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev,
                                  u8 *proto_oper, u8 local_port);
-int mlx5_query_port_eth_proto_oper(struct mlx5_core_dev *dev,
-                                  u32 *proto_oper, u8 local_port);
-int mlx5_set_port_ptys(struct mlx5_core_dev *dev, bool an_disable,
-                      u32 proto_admin, int proto_mask);
 void mlx5_toggle_port_link(struct mlx5_core_dev *dev);
 int mlx5_set_port_admin_status(struct mlx5_core_dev *dev,
                               enum mlx5_port_status status);
 int mlx5_query_port_admin_status(struct mlx5_core_dev *dev,
                                 enum mlx5_port_status *status);
 int mlx5_set_port_beacon(struct mlx5_core_dev *dev, u16 beacon_duration);
-void mlx5_query_port_autoneg(struct mlx5_core_dev *dev, int proto_mask,
-                            u8 *an_status,
-                            u8 *an_disable_cap, u8 *an_disable_admin);
 
 int mlx5_set_port_mtu(struct mlx5_core_dev *dev, u16 mtu, u8 port);
 void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, u16 *max_mtu, u8 port);
index 9c69480..0eef548 100644 (file)
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/device.h>
 
+#define MLX5_VPORT_PF_PLACEHOLDER              (1u)
+#define MLX5_VPORT_UPLINK_PLACEHOLDER          (1u)
+#define MLX5_VPORT_ECPF_PLACEHOLDER(mdev)      (mlx5_ecpf_vport_exists(mdev))
+
+#define MLX5_SPECIAL_VPORTS(mdev) (MLX5_VPORT_PF_PLACEHOLDER +         \
+                                  MLX5_VPORT_UPLINK_PLACEHOLDER +      \
+                                  MLX5_VPORT_ECPF_PLACEHOLDER(mdev))
+
+#define MLX5_TOTAL_VPORTS(mdev)        (MLX5_SPECIAL_VPORTS(mdev) +            \
+                                mlx5_core_max_vfs(mdev))
+
+#define MLX5_VPORT_MANAGER(mdev)                                       \
+       (MLX5_CAP_GEN(mdev, vport_group_manager) &&                     \
+        (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) &&   \
+        mlx5_core_is_pf(mdev))
+
 enum {
        MLX5_CAP_INLINE_MODE_L2,
        MLX5_CAP_INLINE_MODE_VPORT_CONTEXT,
        MLX5_CAP_INLINE_MODE_NOT_REQUIRED,
 };
 
+enum {
+       MLX5_VPORT_PF                   = 0x0,
+       MLX5_VPORT_FIRST_VF             = 0x1,
+       MLX5_VPORT_ECPF                 = 0xfffe,
+       MLX5_VPORT_UPLINK               = 0xffff
+};
+
 u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport);
 int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
-                                 u16 vport, u8 state);
+                                 u16 vport, u8 other_vport, u8 state);
 int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
                                     u16 vport, u8 *addr);
 int mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev,
@@ -60,7 +83,7 @@ int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev,
                                           u64 *system_image_guid);
 int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid);
 int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev,
-                                   u32 vport, u64 node_guid);
+                                   u16 vport, u64 node_guid);
 int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev,
                                        u16 *qkey_viol_cntr);
 int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport,
@@ -78,7 +101,7 @@ int mlx5_query_hca_vport_system_image_guid(struct mlx5_core_dev *dev,
 int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev,
                                   u64 *node_guid);
 int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev,
-                                 u32 vport,
+                                 u16 vport,
                                  enum mlx5_list_type list_type,
                                  u8 addr_list[][ETH_ALEN],
                                  int *list_size);
@@ -87,7 +110,7 @@ int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev,
                                   u8 addr_list[][ETH_ALEN],
                                   int list_size);
 int mlx5_query_nic_vport_promisc(struct mlx5_core_dev *mdev,
-                                u32 vport,
+                                u16 vport,
                                 int *promisc_uc,
                                 int *promisc_mc,
                                 int *promisc_all);
@@ -96,7 +119,7 @@ int mlx5_modify_nic_vport_promisc(struct mlx5_core_dev *mdev,
                                  int promisc_mc,
                                  int promisc_all);
 int mlx5_query_nic_vport_vlans(struct mlx5_core_dev *dev,
-                              u32 vport,
+                              u16 vport,
                               u16 vlans[],
                               int *size);
 int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev,
@@ -106,7 +129,7 @@ int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev,
 int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev);
 int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev);
 int mlx5_query_vport_down_stats(struct mlx5_core_dev *mdev, u16 vport,
-                               u64 *rx_discard_vport_down,
+                               u8 other_vport, u64 *rx_discard_vport_down,
                                u64 *tx_discard_vport_down);
 int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport,
                                  int vf, u8 port_num, void *out,
index 2c471a2..0a36a22 100644 (file)
@@ -95,6 +95,13 @@ struct page {
                         */
                        unsigned long private;
                };
+               struct {        /* page_pool used by netstack */
+                       /**
+                        * @dma_addr: might require a 64-bit value even on
+                        * 32-bit architectures.
+                        */
+                       dma_addr_t dma_addr;
+               };
                struct {        /* slab, slob and slub */
                        union {
                                struct list_head slab_list;     /* uses lru */
index de73778..8ef3300 100644 (file)
@@ -308,6 +308,7 @@ struct mmc_card {
        unsigned int    nr_parts;
 
        unsigned int            bouncesz;       /* Bounce buffer size */
+       struct workqueue_struct *complete_wq;   /* Private workqueue */
 };
 
 static inline bool mmc_large_sector(struct mmc_card *card)
index 6aedaf1..aab4d9f 100644 (file)
@@ -1491,6 +1491,7 @@ struct net_device_ops {
  * @IFF_NO_RX_HANDLER: device doesn't support the rx_handler hook
  * @IFF_FAILOVER: device is a failover master device
  * @IFF_FAILOVER_SLAVE: device is lower dev of a failover master device
+ * @IFF_L3MDEV_RX_HANDLER: only invoke the rx handler of L3 master device
  */
 enum netdev_priv_flags {
        IFF_802_1Q_VLAN                 = 1<<0,
@@ -1522,6 +1523,7 @@ enum netdev_priv_flags {
        IFF_NO_RX_HANDLER               = 1<<26,
        IFF_FAILOVER                    = 1<<27,
        IFF_FAILOVER_SLAVE              = 1<<28,
+       IFF_L3MDEV_RX_HANDLER           = 1<<29,
 };
 
 #define IFF_802_1Q_VLAN                        IFF_802_1Q_VLAN
@@ -1552,6 +1554,7 @@ enum netdev_priv_flags {
 #define IFF_NO_RX_HANDLER              IFF_NO_RX_HANDLER
 #define IFF_FAILOVER                   IFF_FAILOVER
 #define IFF_FAILOVER_SLAVE             IFF_FAILOVER_SLAVE
+#define IFF_L3MDEV_RX_HANDLER          IFF_L3MDEV_RX_HANDLER
 
 /**
  *     struct net_device - The DEVICE structure.
@@ -4560,6 +4563,11 @@ static inline bool netif_supports_nofcs(struct net_device *dev)
        return dev->priv_flags & IFF_SUPP_NOFCS;
 }
 
+static inline bool netif_has_l3_rx_handler(const struct net_device *dev)
+{
+       return dev->priv_flags & IFF_L3MDEV_RX_HANDLER;
+}
+
 static inline bool netif_is_l3_master(const struct net_device *dev)
 {
        return dev->priv_flags & IFF_L3MDEV_MASTER;
index 34f38c1..7802177 100644 (file)
@@ -6,14 +6,19 @@
 
 struct objagg_ops {
        size_t obj_size;
+       bool (*delta_check)(void *priv, const void *parent_obj,
+                           const void *obj);
+       int (*hints_obj_cmp)(const void *obj1, const void *obj2);
        void * (*delta_create)(void *priv, void *parent_obj, void *obj);
        void (*delta_destroy)(void *priv, void *delta_priv);
-       void * (*root_create)(void *priv, void *obj);
+       void * (*root_create)(void *priv, void *obj, unsigned int root_id);
+#define OBJAGG_OBJ_ROOT_ID_INVALID UINT_MAX
        void (*root_destroy)(void *priv, void *root_priv);
 };
 
 struct objagg;
 struct objagg_obj;
+struct objagg_hints;
 
 const void *objagg_obj_root_priv(const struct objagg_obj *objagg_obj);
 const void *objagg_obj_delta_priv(const struct objagg_obj *objagg_obj);
@@ -21,7 +26,8 @@ const void *objagg_obj_raw(const struct objagg_obj *objagg_obj);
 
 struct objagg_obj *objagg_obj_get(struct objagg *objagg, void *obj);
 void objagg_obj_put(struct objagg *objagg, struct objagg_obj *objagg_obj);
-struct objagg *objagg_create(const struct objagg_ops *ops, void *priv);
+struct objagg *objagg_create(const struct objagg_ops *ops,
+                            struct objagg_hints *hints, void *priv);
 void objagg_destroy(struct objagg *objagg);
 
 struct objagg_obj_stats {
@@ -36,6 +42,7 @@ struct objagg_obj_stats_info {
 };
 
 struct objagg_stats {
+       unsigned int root_count;
        unsigned int stats_info_count;
        struct objagg_obj_stats_info stats_info[];
 };
@@ -43,4 +50,14 @@ struct objagg_stats {
 const struct objagg_stats *objagg_stats_get(struct objagg *objagg);
 void objagg_stats_put(const struct objagg_stats *objagg_stats);
 
+enum objagg_opt_algo_type {
+       OBJAGG_OPT_ALGO_SIMPLE_GREEDY,
+};
+
+struct objagg_hints *objagg_hints_get(struct objagg *objagg,
+                                     enum objagg_opt_algo_type opt_algo_type);
+void objagg_hints_put(struct objagg_hints *objagg_hints);
+const struct objagg_stats *
+objagg_hints_stats_get(struct objagg_hints *objagg_hints);
+
 #endif
index 237dd03..bf1070c 100644 (file)
@@ -502,6 +502,12 @@ struct phy_driver {
         */
        int (*probe)(struct phy_device *phydev);
 
+       /*
+        * Probe the hardware to determine what abilities it has.
+        * Should only set phydev->supported.
+        */
+       int (*get_features)(struct phy_device *phydev);
+
        /* PHY Power Management */
        int (*suspend)(struct phy_device *phydev);
        int (*resume)(struct phy_device *phydev);
@@ -667,13 +673,8 @@ phy_lookup_setting(int speed, int duplex, const unsigned long *mask,
                   bool exact);
 size_t phy_speeds(unsigned int *speeds, size_t size,
                  unsigned long *mask);
-
-static inline bool __phy_is_started(struct phy_device *phydev)
-{
-       WARN_ON(!mutex_is_locked(&phydev->lock));
-
-       return phydev->state >= PHY_UP;
-}
+void of_set_phy_supported(struct phy_device *phydev);
+void of_set_phy_eee_broken(struct phy_device *phydev);
 
 /**
  * phy_is_started - Convenience function to check whether PHY is started
@@ -681,13 +682,7 @@ static inline bool __phy_is_started(struct phy_device *phydev)
  */
 static inline bool phy_is_started(struct phy_device *phydev)
 {
-       bool started;
-
-       mutex_lock(&phydev->lock);
-       started = __phy_is_started(phydev);
-       mutex_unlock(&phydev->lock);
-
-       return started;
+       return phydev->state >= PHY_UP;
 }
 
 void phy_resolve_aneg_linkmode(struct phy_device *phydev);
@@ -793,13 +788,21 @@ int phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val);
  */
 int __phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val);
 
+int __phy_modify_changed(struct phy_device *phydev, u32 regnum, u16 mask,
+                        u16 set);
+int phy_modify_changed(struct phy_device *phydev, u32 regnum, u16 mask,
+                      u16 set);
 int __phy_modify(struct phy_device *phydev, u32 regnum, u16 mask, u16 set);
 int phy_modify(struct phy_device *phydev, u32 regnum, u16 mask, u16 set);
 
+int __phy_modify_mmd_changed(struct phy_device *phydev, int devad, u32 regnum,
+                            u16 mask, u16 set);
+int phy_modify_mmd_changed(struct phy_device *phydev, int devad, u32 regnum,
+                          u16 mask, u16 set);
 int __phy_modify_mmd(struct phy_device *phydev, int devad, u32 regnum,
-               u16 mask, u16 set);
+                    u16 mask, u16 set);
 int phy_modify_mmd(struct phy_device *phydev, int devad, u32 regnum,
-               u16 mask, u16 set);
+                  u16 mask, u16 set);
 
 /**
  * __phy_set_bits - Convenience function for setting bits in a PHY register
@@ -1094,12 +1097,13 @@ int genphy_write_mmd_unsupported(struct phy_device *phdev, int devnum,
 /* Clause 45 PHY */
 int genphy_c45_restart_aneg(struct phy_device *phydev);
 int genphy_c45_aneg_done(struct phy_device *phydev);
-int genphy_c45_read_link(struct phy_device *phydev, u32 mmd_mask);
+int genphy_c45_read_link(struct phy_device *phydev);
 int genphy_c45_read_lpa(struct phy_device *phydev);
 int genphy_c45_read_pma(struct phy_device *phydev);
 int genphy_c45_pma_setup_forced(struct phy_device *phydev);
 int genphy_c45_an_disable_aneg(struct phy_device *phydev);
 int genphy_c45_read_mdix(struct phy_device *phydev);
+int genphy_c45_pma_read_abilities(struct phy_device *phydev);
 
 /* The gen10g_* functions are the old Clause 45 stub */
 int gen10g_config_aneg(struct phy_device *phydev);
index 021fc65..f57059e 100644 (file)
@@ -220,6 +220,7 @@ void phylink_ethtool_get_pauseparam(struct phylink *,
 int phylink_ethtool_set_pauseparam(struct phylink *,
                                   struct ethtool_pauseparam *);
 int phylink_get_eee_err(struct phylink *);
+int phylink_init_eee(struct phylink *, bool);
 int phylink_ethtool_get_eee(struct phylink *, struct ethtool_eee *);
 int phylink_ethtool_set_eee(struct phylink *, struct ethtool_eee *);
 int phylink_mii_ioctl(struct phylink *, struct ifreq *, int);
index 54af4ee..fed5be7 100644 (file)
@@ -105,7 +105,7 @@ static inline bool pm_runtime_callbacks_present(struct device *dev)
 
 static inline void pm_runtime_mark_last_busy(struct device *dev)
 {
-       WRITE_ONCE(dev->power.last_busy, ktime_to_ns(ktime_get()));
+       WRITE_ONCE(dev->power.last_busy, ktime_get_mono_fast_ns());
 }
 
 static inline bool pm_runtime_is_irq_safe(struct device *dev)
index 35170f7..f6165d3 100644 (file)
@@ -643,6 +643,7 @@ struct qed_dev_info {
        u16             mtu;
 
        bool wol_support;
+       bool smart_an;
 
        /* MBI version */
        u32 mbi_version;
index d2f90fa..bba3afb 100644 (file)
@@ -995,7 +995,7 @@ struct task_struct {
        /* cg_list protected by css_set_lock and tsk->alloc_lock: */
        struct list_head                cg_list;
 #endif
-#ifdef CONFIG_X86_RESCTRL
+#ifdef CONFIG_X86_CPU_RESCTRL
        u32                             closid;
        u32                             rmid;
 #endif
index ec912d0..ecdc654 100644 (file)
@@ -71,6 +71,7 @@ static inline int get_dumpable(struct mm_struct *mm)
 #define MMF_HUGE_ZERO_PAGE     23      /* mm has ever used the global huge zero page */
 #define MMF_DISABLE_THP                24      /* disable THP for all VMAs */
 #define MMF_OOM_VICTIM         25      /* mm is the oom victim */
+#define MMF_OOM_REAP_QUEUED    26      /* mm was queued for oom_reaper */
 #define MMF_DISABLE_THP_MASK   (1 << MMF_DISABLE_THP)
 
 #define MMF_INIT_MASK          (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\
index cc7e2c1..9702016 100644 (file)
@@ -392,7 +392,7 @@ extern bool unhandled_signal(struct task_struct *tsk, int sig);
 #endif
 
 #define siginmask(sig, mask) \
-       ((sig) < SIGRTMIN && (rt_sigmask(sig) & (mask)))
+       ((sig) > 0 && (sig) < SIGRTMIN && (rt_sigmask(sig) & (mask)))
 
 #define SIG_KERNEL_ONLY_MASK (\
        rt_sigmask(SIGKILL)   |  rt_sigmask(SIGSTOP))
index 8318466..a41e84f 100644 (file)
@@ -1889,12 +1889,12 @@ static inline void __skb_queue_before(struct sk_buff_head *list,
  *
  *     A buffer cannot be placed on two lists at the same time.
  */
-void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk);
 static inline void __skb_queue_head(struct sk_buff_head *list,
                                    struct sk_buff *newsk)
 {
        __skb_queue_after(list, (struct sk_buff *)list, newsk);
 }
+void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk);
 
 /**
  *     __skb_queue_tail - queue a buffer at the list tail
@@ -1906,12 +1906,12 @@ static inline void __skb_queue_head(struct sk_buff_head *list,
  *
  *     A buffer cannot be placed on two lists at the same time.
  */
-void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk);
 static inline void __skb_queue_tail(struct sk_buff_head *list,
                                   struct sk_buff *newsk)
 {
        __skb_queue_before(list, (struct sk_buff *)list, newsk);
 }
+void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk);
 
 /*
  * remove sk_buff from list. _Must_ be called atomically, and with
@@ -1938,7 +1938,6 @@ static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
  *     so must be used with appropriate locks held only. The head item is
  *     returned or %NULL if the list is empty.
  */
-struct sk_buff *skb_dequeue(struct sk_buff_head *list);
 static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list)
 {
        struct sk_buff *skb = skb_peek(list);
@@ -1946,6 +1945,7 @@ static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list)
                __skb_unlink(skb, list);
        return skb;
 }
+struct sk_buff *skb_dequeue(struct sk_buff_head *list);
 
 /**
  *     __skb_dequeue_tail - remove from the tail of the queue
@@ -1955,7 +1955,6 @@ static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list)
  *     so must be used with appropriate locks held only. The tail item is
  *     returned or %NULL if the list is empty.
  */
-struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list);
 static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list)
 {
        struct sk_buff *skb = skb_peek_tail(list);
@@ -1963,6 +1962,7 @@ static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list)
                __skb_unlink(skb, list);
        return skb;
 }
+struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list);
 
 
 static inline bool skb_is_nonlinear(const struct sk_buff *skb)
@@ -2653,13 +2653,13 @@ static inline int skb_orphan_frags_rx(struct sk_buff *skb, gfp_t gfp_mask)
  *     the list and one reference dropped. This function does not take the
  *     list lock and the caller must hold the relevant locks to use it.
  */
-void skb_queue_purge(struct sk_buff_head *list);
 static inline void __skb_queue_purge(struct sk_buff_head *list)
 {
        struct sk_buff *skb;
        while ((skb = __skb_dequeue(list)) != NULL)
                kfree_skb(skb);
 }
+void skb_queue_purge(struct sk_buff_head *list);
 
 unsigned int skb_rbtree_purge(struct rb_root *root);
 
@@ -3028,7 +3028,7 @@ static inline int skb_padto(struct sk_buff *skb, unsigned int len)
 }
 
 /**
- *     skb_put_padto - increase size and pad an skbuff up to a minimal size
+ *     __skb_put_padto - increase size and pad an skbuff up to a minimal size
  *     @skb: buffer to pad
  *     @len: minimal length
  *     @free_on_error: free buffer on error
index 7ddfc65..4335bd7 100644 (file)
@@ -184,6 +184,7 @@ struct plat_stmmacenet_data {
        struct clk *pclk;
        struct clk *clk_ptp_ref;
        unsigned int clk_ptp_rate;
+       unsigned int clk_ref_rate;
        struct reset_control *stmmac_rst;
        struct stmmac_axi *axi;
        int has_gmac4;
index dbc795e..c745e9c 100644 (file)
@@ -80,7 +80,7 @@ static inline void tcf_tm_dump(struct tcf_t *dtm, const struct tcf_t *stm)
 struct tc_action_ops {
        struct list_head head;
        char    kind[IFNAMSIZ];
-       __u32   type; /* TBD to match kind */
+       enum tca_id  id; /* identifier should match kind */
        size_t  size;
        struct module           *owner;
        int     (*act)(struct sk_buff *, const struct tc_action *,
index c12ad6e..c6d8875 100644 (file)
@@ -358,6 +358,7 @@ struct devlink_param_item {
        const struct devlink_param *param;
        union devlink_param_value driverinit_value;
        bool driverinit_value_valid;
+       bool published;
 };
 
 enum devlink_param_generic_id {
@@ -369,17 +370,12 @@ enum devlink_param_generic_id {
        DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MAX,
        DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MIN,
        DEVLINK_PARAM_GENERIC_ID_FW_LOAD_POLICY,
-       DEVLINK_PARAM_GENERIC_ID_WOL,
 
        /* add new param generic ids above here*/
        __DEVLINK_PARAM_GENERIC_ID_MAX,
        DEVLINK_PARAM_GENERIC_ID_MAX = __DEVLINK_PARAM_GENERIC_ID_MAX - 1,
 };
 
-enum devlink_param_wol_types {
-       DEVLINK_PARAM_WAKE_MAGIC = (1 << 0),
-};
-
 #define DEVLINK_PARAM_GENERIC_INT_ERR_RESET_NAME "internal_error_reset"
 #define DEVLINK_PARAM_GENERIC_INT_ERR_RESET_TYPE DEVLINK_PARAM_TYPE_BOOL
 
@@ -404,9 +400,6 @@ enum devlink_param_wol_types {
 #define DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_NAME "fw_load_policy"
 #define DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_TYPE DEVLINK_PARAM_TYPE_U8
 
-#define DEVLINK_PARAM_GENERIC_WOL_NAME "wake_on_lan"
-#define DEVLINK_PARAM_GENERIC_WOL_TYPE DEVLINK_PARAM_TYPE_U8
-
 #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate)     \
 {                                                                      \
        .id = DEVLINK_PARAM_GENERIC_ID_##_id,                           \
@@ -434,6 +427,8 @@ enum devlink_param_wol_types {
 #define DEVLINK_INFO_VERSION_GENERIC_BOARD_ID  "board.id"
 /* Revision of board design */
 #define DEVLINK_INFO_VERSION_GENERIC_BOARD_REV "board.rev"
+/* Maker of the board */
+#define DEVLINK_INFO_VERSION_GENERIC_BOARD_MANUFACTURE "board.manufacture"
 
 /* Control processor FW version */
 #define DEVLINK_INFO_VERSION_GENERIC_FW_MGMT   "fw.mgmt"
@@ -618,6 +613,8 @@ int devlink_params_register(struct devlink *devlink,
 void devlink_params_unregister(struct devlink *devlink,
                               const struct devlink_param *params,
                               size_t params_count);
+void devlink_params_publish(struct devlink *devlink);
+void devlink_params_unpublish(struct devlink *devlink);
 int devlink_port_params_register(struct devlink_port *devlink_port,
                                 const struct devlink_param *params,
                                 size_t params_count);
@@ -724,6 +721,14 @@ static inline void devlink_unregister(struct devlink *devlink)
 {
 }
 
+static inline void devlink_params_publish(struct devlink *devlink)
+{
+}
+
+static inline void devlink_params_unpublish(struct devlink *devlink)
+{
+}
+
 static inline void devlink_free(struct devlink *devlink)
 {
        kfree(devlink);
index 23166ca..d035183 100644 (file)
@@ -171,7 +171,7 @@ static inline bool flow_action_has_entries(const struct flow_action *action)
 }
 
 #define flow_action_for_each(__i, __act, __actions)                    \
-        for (__i = 0, __act = &(__actions)->entries[0]; __i < (__actions)->num_entries; __act = &(__actions)->entries[__i++])
+        for (__i = 0, __act = &(__actions)->entries[0]; __i < (__actions)->num_entries; __act = &(__actions)->entries[++__i])
 
 struct flow_rule {
        struct flow_match       match;
@@ -195,9 +195,9 @@ struct flow_stats {
 static inline void flow_stats_update(struct flow_stats *flow_stats,
                                     u64 bytes, u64 pkts, u64 lastused)
 {
-       flow_stats->pkts        = pkts;
-       flow_stats->bytes       = bytes;
-       flow_stats->lastused    = lastused;
+       flow_stats->pkts        += pkts;
+       flow_stats->bytes       += bytes;
+       flow_stats->lastused    = max_t(u64, flow_stats->lastused, lastused);
 }
 
 #endif /* _NET_FLOW_OFFLOAD_H */
index 00b5e78..74ff688 100644 (file)
@@ -39,6 +39,7 @@ struct inet_peer {
 
        u32                     metrics[RTAX_MAX];
        u32                     rate_tokens;    /* rate limiting for ICMP */
+       u32                     n_redirects;
        unsigned long           rate_last;
        /*
         * Once inet_peer is queued for deletion (refcnt == 0), following field
index 78fa0ac..5175fd6 100644 (file)
@@ -153,7 +153,8 @@ struct sk_buff *l3mdev_l3_rcv(struct sk_buff *skb, u16 proto)
 
        if (netif_is_l3_slave(skb->dev))
                master = netdev_master_upper_dev_get_rcu(skb->dev);
-       else if (netif_is_l3_master(skb->dev))
+       else if (netif_is_l3_master(skb->dev) ||
+                netif_has_l3_rx_handler(skb->dev))
                master = skb->dev;
 
        if (master && master->l3mdev_ops->l3mdev_l3_rcv)
index 45eba7d..a66fcd3 100644 (file)
@@ -469,9 +469,7 @@ struct nft_set_binding {
 int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
                       struct nft_set_binding *binding);
 void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
-                         struct nft_set_binding *binding);
-void nf_tables_rebind_set(const struct nft_ctx *ctx, struct nft_set *set,
-                         struct nft_set_binding *binding);
+                         struct nft_set_binding *binding, bool commit);
 void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set);
 
 /**
@@ -721,6 +719,13 @@ struct nft_expr_type {
 #define NFT_EXPR_STATEFUL              0x1
 #define NFT_EXPR_GC                    0x2
 
+enum nft_trans_phase {
+       NFT_TRANS_PREPARE,
+       NFT_TRANS_ABORT,
+       NFT_TRANS_COMMIT,
+       NFT_TRANS_RELEASE
+};
+
 /**
  *     struct nft_expr_ops - nf_tables expression operations
  *
@@ -750,7 +755,8 @@ struct nft_expr_ops {
        void                            (*activate)(const struct nft_ctx *ctx,
                                                    const struct nft_expr *expr);
        void                            (*deactivate)(const struct nft_ctx *ctx,
-                                                     const struct nft_expr *expr);
+                                                     const struct nft_expr *expr,
+                                                     enum nft_trans_phase phase);
        void                            (*destroy)(const struct nft_ctx *ctx,
                                                   const struct nft_expr *expr);
        void                            (*destroy_clone)(const struct nft_ctx *ctx,
@@ -1335,12 +1341,15 @@ struct nft_trans_rule {
 struct nft_trans_set {
        struct nft_set                  *set;
        u32                             set_id;
+       bool                            bound;
 };
 
 #define nft_trans_set(trans)   \
        (((struct nft_trans_set *)trans->data)->set)
 #define nft_trans_set_id(trans)        \
        (((struct nft_trans_set *)trans->data)->set_id)
+#define nft_trans_set_bound(trans)     \
+       (((struct nft_trans_set *)trans->data)->bound)
 
 struct nft_trans_chain {
        bool                            update;
index cb8be39..6a530be 100644 (file)
@@ -44,6 +44,10 @@ bool tcf_queue_work(struct rcu_work *rwork, work_func_t func);
 struct tcf_chain *tcf_chain_get_by_act(struct tcf_block *block,
                                       u32 chain_index);
 void tcf_chain_put_by_act(struct tcf_chain *chain);
+struct tcf_chain *tcf_get_next_chain(struct tcf_block *block,
+                                    struct tcf_chain *chain);
+struct tcf_proto *tcf_get_next_proto(struct tcf_chain *chain,
+                                    struct tcf_proto *tp, bool rtnl_held);
 void tcf_block_netif_keep_dst(struct tcf_block *block);
 int tcf_block_get(struct tcf_block **p_block,
                  struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q,
@@ -412,7 +416,7 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts,
 
 int tcf_exts_validate(struct net *net, struct tcf_proto *tp,
                      struct nlattr **tb, struct nlattr *rate_tlv,
-                     struct tcf_exts *exts, bool ovr,
+                     struct tcf_exts *exts, bool ovr, bool rtnl_held,
                      struct netlink_ext_ack *extack);
 void tcf_exts_destroy(struct tcf_exts *exts);
 void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src);
index 7a49575..e50b729 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/list.h>
 #include <linux/refcount.h>
 #include <linux/workqueue.h>
+#include <linux/mutex.h>
 #include <net/gen_stats.h>
 #include <net/rtnetlink.h>
 
@@ -178,6 +179,7 @@ static inline int qdisc_avail_bulklimit(const struct netdev_queue *txq)
 }
 
 struct Qdisc_class_ops {
+       unsigned int            flags;
        /* Child qdisc manipulation */
        struct netdev_queue *   (*select_queue)(struct Qdisc *, struct tcmsg *);
        int                     (*graft)(struct Qdisc *, unsigned long cl,
@@ -209,6 +211,13 @@ struct Qdisc_class_ops {
                                        struct gnet_dump *);
 };
 
+/* Qdisc_class_ops flag values */
+
+/* Implements API that doesn't require rtnl lock */
+enum qdisc_class_ops_flags {
+       QDISC_CLASS_OPS_DOIT_UNLOCKED = 1,
+};
+
 struct Qdisc_ops {
        struct Qdisc_ops        *next;
        const struct Qdisc_class_ops    *cl_ops;
@@ -272,19 +281,21 @@ struct tcf_proto_ops {
                                            const struct tcf_proto *,
                                            struct tcf_result *);
        int                     (*init)(struct tcf_proto*);
-       void                    (*destroy)(struct tcf_proto *tp,
+       void                    (*destroy)(struct tcf_proto *tp, bool rtnl_held,
                                           struct netlink_ext_ack *extack);
 
        void*                   (*get)(struct tcf_proto*, u32 handle);
+       void                    (*put)(struct tcf_proto *tp, void *f);
        int                     (*change)(struct net *net, struct sk_buff *,
                                        struct tcf_proto*, unsigned long,
                                        u32 handle, struct nlattr **,
-                                       void **, bool,
+                                       void **, bool, bool,
                                        struct netlink_ext_ack *);
        int                     (*delete)(struct tcf_proto *tp, void *arg,
-                                         bool *last,
+                                         bool *last, bool rtnl_held,
                                          struct netlink_ext_ack *);
-       void                    (*walk)(struct tcf_proto*, struct tcf_walker *arg);
+       void                    (*walk)(struct tcf_proto *tp,
+                                       struct tcf_walker *arg, bool rtnl_held);
        int                     (*reoffload)(struct tcf_proto *tp, bool add,
                                             tc_setup_cb_t *cb, void *cb_priv,
                                             struct netlink_ext_ack *extack);
@@ -297,12 +308,18 @@ struct tcf_proto_ops {
 
        /* rtnetlink specific */
        int                     (*dump)(struct net*, struct tcf_proto*, void *,
-                                       struct sk_buff *skb, struct tcmsg*);
+                                       struct sk_buff *skb, struct tcmsg*,
+                                       bool);
        int                     (*tmplt_dump)(struct sk_buff *skb,
                                              struct net *net,
                                              void *tmplt_priv);
 
        struct module           *owner;
+       int                     flags;
+};
+
+enum tcf_proto_ops_flags {
+       TCF_PROTO_OPS_DOIT_UNLOCKED = 1,
 };
 
 struct tcf_proto {
@@ -321,6 +338,12 @@ struct tcf_proto {
        void                    *data;
        const struct tcf_proto_ops      *ops;
        struct tcf_chain        *chain;
+       /* Lock protects tcf_proto shared state and can be used by unlocked
+        * classifiers to protect their private data.
+        */
+       spinlock_t              lock;
+       bool                    deleting;
+       refcount_t              refcnt;
        struct rcu_head         rcu;
 };
 
@@ -340,6 +363,8 @@ struct qdisc_skb_cb {
 typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv);
 
 struct tcf_chain {
+       /* Protects filter_chain. */
+       struct mutex filter_chain_lock;
        struct tcf_proto __rcu *filter_chain;
        struct list_head list;
        struct tcf_block *block;
@@ -347,11 +372,16 @@ struct tcf_chain {
        unsigned int refcnt;
        unsigned int action_refcnt;
        bool explicitly_created;
+       bool flushing;
        const struct tcf_proto_ops *tmplt_ops;
        void *tmplt_priv;
 };
 
 struct tcf_block {
+       /* Lock protects tcf_block and lifetime-management data of chains
+        * attached to the block (refcnt, action_refcnt, explicitly_created).
+        */
+       struct mutex lock;
        struct list_head chain_list;
        u32 index; /* block index for shared blocks */
        refcount_t refcnt;
@@ -369,6 +399,34 @@ struct tcf_block {
        struct rcu_head rcu;
 };
 
+#ifdef CONFIG_PROVE_LOCKING
+static inline bool lockdep_tcf_chain_is_locked(struct tcf_chain *chain)
+{
+       return lockdep_is_held(&chain->filter_chain_lock);
+}
+
+static inline bool lockdep_tcf_proto_is_locked(struct tcf_proto *tp)
+{
+       return lockdep_is_held(&tp->lock);
+}
+#else
+static inline bool lockdep_tcf_chain_is_locked(struct tcf_block *chain)
+{
+       return true;
+}
+
+static inline bool lockdep_tcf_proto_is_locked(struct tcf_proto *tp)
+{
+       return true;
+}
+#endif /* #ifdef CONFIG_PROVE_LOCKING */
+
+#define tcf_chain_dereference(p, chain)                                        \
+       rcu_dereference_protected(p, lockdep_tcf_chain_is_locked(chain))
+
+#define tcf_proto_dereference(p, tp)                                   \
+       rcu_dereference_protected(p, lockdep_tcf_proto_is_locked(tp))
+
 static inline void tcf_block_offload_inc(struct tcf_block *block, u32 *flags)
 {
        if (*flags & TCA_CLS_FLAGS_IN_HW)
index 6679f3c..328cb7c 100644 (file)
@@ -1278,7 +1278,7 @@ static inline void sk_sockets_allocated_inc(struct sock *sk)
        percpu_counter_inc(sk->sk_prot->sockets_allocated);
 }
 
-static inline int
+static inline u64
 sk_sockets_allocated_read_positive(struct sock *sk)
 {
        return percpu_counter_read_positive(sk->sk_prot->sockets_allocated);
index 32d2454..68269e4 100644 (file)
@@ -21,7 +21,7 @@ struct tcf_csum {
 static inline bool is_tcf_csum(const struct tc_action *a)
 {
 #ifdef CONFIG_NET_CLS_ACT
-       if (a->ops && a->ops->type == TCA_ACT_CSUM)
+       if (a->ops && a->ops->id == TCA_ID_CSUM)
                return true;
 #endif
        return false;
index ef8dd0d..ee8d005 100644 (file)
@@ -22,7 +22,7 @@ static inline bool __is_tcf_gact_act(const struct tc_action *a, int act,
 #ifdef CONFIG_NET_CLS_ACT
        struct tcf_gact *gact;
 
-       if (a->ops && a->ops->type != TCA_ACT_GACT)
+       if (a->ops && a->ops->id != TCA_ID_GACT)
                return false;
 
        gact = to_gact(a);
index a2e9cbc..c757585 100644 (file)
@@ -17,7 +17,7 @@ struct tcf_mirred {
 static inline bool is_tcf_mirred_egress_redirect(const struct tc_action *a)
 {
 #ifdef CONFIG_NET_CLS_ACT
-       if (a->ops && a->ops->type == TCA_ACT_MIRRED)
+       if (a->ops && a->ops->id == TCA_ID_MIRRED)
                return to_mirred(a)->tcfm_eaction == TCA_EGRESS_REDIR;
 #endif
        return false;
@@ -26,7 +26,7 @@ static inline bool is_tcf_mirred_egress_redirect(const struct tc_action *a)
 static inline bool is_tcf_mirred_egress_mirror(const struct tc_action *a)
 {
 #ifdef CONFIG_NET_CLS_ACT
-       if (a->ops && a->ops->type == TCA_ACT_MIRRED)
+       if (a->ops && a->ops->id == TCA_ID_MIRRED)
                return to_mirred(a)->tcfm_eaction == TCA_EGRESS_MIRROR;
 #endif
        return false;
index fac3ad4..748cf87 100644 (file)
@@ -23,7 +23,7 @@ struct tcf_pedit {
 static inline bool is_tcf_pedit(const struct tc_action *a)
 {
 #ifdef CONFIG_NET_CLS_ACT
-       if (a->ops && a->ops->type == TCA_ACT_PEDIT)
+       if (a->ops && a->ops->id == TCA_ID_PEDIT)
                return true;
 #endif
        return false;
index 01dbfea..0a559d4 100644 (file)
@@ -20,7 +20,7 @@ struct tcf_sample {
 static inline bool is_tcf_sample(const struct tc_action *a)
 {
 #ifdef CONFIG_NET_CLS_ACT
-       return a->ops && a->ops->type == TCA_ACT_SAMPLE;
+       return a->ops && a->ops->id == TCA_ID_SAMPLE;
 #else
        return false;
 #endif
index 911bbac..85c5c47 100644 (file)
@@ -44,7 +44,7 @@ static inline bool is_tcf_skbedit_mark(const struct tc_action *a)
 #ifdef CONFIG_NET_CLS_ACT
        u32 flags;
 
-       if (a->ops && a->ops->type == TCA_ACT_SKBEDIT) {
+       if (a->ops && a->ops->id == TCA_ID_SKBEDIT) {
                rcu_read_lock();
                flags = rcu_dereference(to_skbedit(a)->params)->flags;
                rcu_read_unlock();
index 46b8c7f..23d5b8b 100644 (file)
@@ -34,7 +34,7 @@ static inline bool is_tcf_tunnel_set(const struct tc_action *a)
        struct tcf_tunnel_key *t = to_tunnel_key(a);
        struct tcf_tunnel_key_params *params = rtnl_dereference(t->params);
 
-       if (a->ops && a->ops->type == TCA_ACT_TUNNEL_KEY)
+       if (a->ops && a->ops->id == TCA_ID_TUNNEL_KEY)
                return params->tcft_action == TCA_TUNNEL_KEY_ACT_SET;
 #endif
        return false;
@@ -46,7 +46,7 @@ static inline bool is_tcf_tunnel_release(const struct tc_action *a)
        struct tcf_tunnel_key *t = to_tunnel_key(a);
        struct tcf_tunnel_key_params *params = rtnl_dereference(t->params);
 
-       if (a->ops && a->ops->type == TCA_ACT_TUNNEL_KEY)
+       if (a->ops && a->ops->id == TCA_ID_TUNNEL_KEY)
                return params->tcft_action == TCA_TUNNEL_KEY_ACT_RELEASE;
 #endif
        return false;
index 22ae260..fe39ed5 100644 (file)
@@ -30,7 +30,7 @@ struct tcf_vlan {
 static inline bool is_tcf_vlan(const struct tc_action *a)
 {
 #ifdef CONFIG_NET_CLS_ACT
-       if (a->ops && a->ops->type == TCA_ACT_VLAN)
+       if (a->ops && a->ops->id == TCA_ID_VLAN)
                return true;
 #endif
        return false;
index a3ceed3..80debf5 100644 (file)
@@ -2579,9 +2579,10 @@ struct ib_device {
 
        const struct uapi_definition   *driver_def;
        enum rdma_driver_id             driver_id;
+
        /*
-        * Provides synchronization between device unregistration and netlink
-        * commands on a device. To be used only by core.
+        * Positive refcount indicates that the device is currently
+        * registered and cannot be unregistered.
         */
        refcount_t refcount;
        struct completion unreg_completion;
@@ -3926,6 +3927,25 @@ static inline bool ib_access_writable(int access_flags)
 int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
                       struct ib_mr_status *mr_status);
 
+/**
+ * ib_device_try_get: Hold a registration lock
+ * device: The device to lock
+ *
+ * A device under an active registration lock cannot become unregistered. It
+ * is only possible to obtain a registration lock on a device that is fully
+ * registered, otherwise this function returns false.
+ *
+ * The registration lock is only necessary for actions which require the
+ * device to still be registered. Uses that only require the device pointer to
+ * be valid should use get_device(&ibdev->dev) to hold the memory.
+ *
+ */
+static inline bool ib_device_try_get(struct ib_device *dev)
+{
+       return refcount_inc_not_zero(&dev->refcount);
+}
+
+void ib_device_put(struct ib_device *device);
 struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u8 port,
                                            u16 pkey, const union ib_gid *gid,
                                            const struct sockaddr *addr);
index 0cdc399..c5188ff 100644 (file)
@@ -173,7 +173,11 @@ static inline void snd_compr_drain_notify(struct snd_compr_stream *stream)
        if (snd_BUG_ON(!stream))
                return;
 
-       stream->runtime->state = SNDRV_PCM_STATE_SETUP;
+       if (stream->direction == SND_COMPRESS_PLAYBACK)
+               stream->runtime->state = SNDRV_PCM_STATE_SETUP;
+       else
+               stream->runtime->state = SNDRV_PCM_STATE_PREPARED;
+
        wake_up(&stream->runtime->sleep);
 }
 
index 7fa48b1..cc7c8d4 100644 (file)
@@ -68,6 +68,7 @@ struct hda_bus {
        unsigned int response_reset:1;  /* controller was reset */
        unsigned int in_reset:1;        /* during reset operation */
        unsigned int no_response_fallback:1; /* don't fallback at RIRB error */
+       unsigned int bus_probing :1;    /* during probing process */
 
        int primary_dig_out_type;       /* primary digital out PCM type */
        unsigned int mixer_assigned;    /* codec addr for mixer name */
index 6c2bafc..a5ce6df 100644 (file)
@@ -11,6 +11,7 @@
 
 struct mlxsw_sp;
 struct mlxsw_sp_acl_atcam_region;
+struct mlxsw_sp_acl_tcam_vregion;
 
 TRACE_EVENT(mlxsw_sp_acl_atcam_entry_add_ctcam_spill,
        TP_PROTO(const struct mlxsw_sp *mlxsw_sp,
@@ -32,6 +33,66 @@ TRACE_EVENT(mlxsw_sp_acl_atcam_entry_add_ctcam_spill,
                  __entry->mlxsw_sp, __entry->aregion)
 );
 
+TRACE_EVENT(mlxsw_sp_acl_tcam_vregion_rehash,
+       TP_PROTO(const struct mlxsw_sp *mlxsw_sp,
+                const struct mlxsw_sp_acl_tcam_vregion *vregion),
+
+       TP_ARGS(mlxsw_sp, vregion),
+
+       TP_STRUCT__entry(
+               __field(const void *, mlxsw_sp)
+               __field(const void *, vregion)
+       ),
+
+       TP_fast_assign(
+               __entry->mlxsw_sp = mlxsw_sp;
+               __entry->vregion = vregion;
+       ),
+
+       TP_printk("mlxsw_sp %p, vregion %p",
+                 __entry->mlxsw_sp, __entry->vregion)
+);
+
+TRACE_EVENT(mlxsw_sp_acl_tcam_vregion_migrate,
+       TP_PROTO(const struct mlxsw_sp *mlxsw_sp,
+                const struct mlxsw_sp_acl_tcam_vregion *vregion),
+
+       TP_ARGS(mlxsw_sp, vregion),
+
+       TP_STRUCT__entry(
+               __field(const void *, mlxsw_sp)
+               __field(const void *, vregion)
+       ),
+
+       TP_fast_assign(
+               __entry->mlxsw_sp = mlxsw_sp;
+               __entry->vregion = vregion;
+       ),
+
+       TP_printk("mlxsw_sp %p, vregion %p",
+                 __entry->mlxsw_sp, __entry->vregion)
+);
+
+TRACE_EVENT(mlxsw_sp_acl_tcam_vregion_rehash_dis,
+       TP_PROTO(const struct mlxsw_sp *mlxsw_sp,
+                const struct mlxsw_sp_acl_tcam_vregion *vregion),
+
+       TP_ARGS(mlxsw_sp, vregion),
+
+       TP_STRUCT__entry(
+               __field(const void *, mlxsw_sp)
+               __field(const void *, vregion)
+       ),
+
+       TP_fast_assign(
+               __entry->mlxsw_sp = mlxsw_sp;
+               __entry->vregion = vregion;
+       ),
+
+       TP_printk("mlxsw_sp %p, vregion %p",
+                 __entry->mlxsw_sp, __entry->vregion)
+);
+
 #endif /* _MLXSW_TRACEPOINT_H */
 
 /* This part must be outside protection */
index 7eb2936..c99336f 100644 (file)
@@ -229,7 +229,7 @@ struct batadv_ogm_packet {
  * @packet_type: batman-adv packet type, part of the general header
  * @version: batman-adv protocol version, part of the general header
  * @ttl: time to live for this packet, part of the general header
- * @flags: reseved for routing relevant flags - currently always 0
+ * @flags: reserved for routing relevant flags - currently always 0
  * @seqno: sequence number
  * @orig: originator mac address
  * @tvlv_len: length of the appended tvlv buffer (in bytes)
index a28e76a..305bf31 100644 (file)
@@ -27,6 +27,7 @@
 
 #define BATADV_NL_NAME "batadv"
 
+#define BATADV_NL_MCAST_GROUP_CONFIG   "config"
 #define BATADV_NL_MCAST_GROUP_TPMETER  "tpmeter"
 
 /**
@@ -139,6 +140,20 @@ enum batadv_mcast_flags_priv {
 };
 
 /**
+ * enum batadv_gw_modes - gateway mode of node
+ */
+enum batadv_gw_modes {
+       /** @BATADV_GW_MODE_OFF: gw mode disabled */
+       BATADV_GW_MODE_OFF,
+
+       /** @BATADV_GW_MODE_CLIENT: send DHCP requests to gw servers */
+       BATADV_GW_MODE_CLIENT,
+
+       /** @BATADV_GW_MODE_SERVER: announce itself as gatway server */
+       BATADV_GW_MODE_SERVER,
+};
+
+/**
  * enum batadv_nl_attrs - batman-adv netlink attributes
  */
 enum batadv_nl_attrs {
@@ -344,6 +359,138 @@ enum batadv_nl_attrs {
         */
        BATADV_ATTR_MCAST_FLAGS_PRIV,
 
+       /**
+        * @BATADV_ATTR_VLANID: VLAN id on top of soft interface
+        */
+       BATADV_ATTR_VLANID,
+
+       /**
+        * @BATADV_ATTR_AGGREGATED_OGMS_ENABLED: whether the batman protocol
+        *  messages of the mesh interface shall be aggregated or not.
+        */
+       BATADV_ATTR_AGGREGATED_OGMS_ENABLED,
+
+       /**
+        * @BATADV_ATTR_AP_ISOLATION_ENABLED: whether the data traffic going
+        *  from a wireless client to another wireless client will be silently
+        *  dropped.
+        */
+       BATADV_ATTR_AP_ISOLATION_ENABLED,
+
+       /**
+        * @BATADV_ATTR_ISOLATION_MARK: the isolation mark which is used to
+        *  classify clients as "isolated" by the Extended Isolation feature.
+        */
+       BATADV_ATTR_ISOLATION_MARK,
+
+       /**
+        * @BATADV_ATTR_ISOLATION_MASK: the isolation (bit)mask which is used to
+        *  classify clients as "isolated" by the Extended Isolation feature.
+        */
+       BATADV_ATTR_ISOLATION_MASK,
+
+       /**
+        * @BATADV_ATTR_BONDING_ENABLED: whether the data traffic going through
+        *  the mesh will be sent using multiple interfaces at the same time.
+        */
+       BATADV_ATTR_BONDING_ENABLED,
+
+       /**
+        * @BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED: whether the bridge loop
+        *  avoidance feature is enabled. This feature detects and avoids loops
+        *  between the mesh and devices bridged with the soft interface
+        */
+       BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED,
+
+       /**
+        * @BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED: whether the distributed
+        *  arp table feature is enabled. This feature uses a distributed hash
+        *  table to answer ARP requests without flooding the request through
+        *  the whole mesh.
+        */
+       BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED,
+
+       /**
+        * @BATADV_ATTR_FRAGMENTATION_ENABLED: whether the data traffic going
+        *  through the mesh will be fragmented or silently discarded if the
+        *  packet size exceeds the outgoing interface MTU.
+        */
+       BATADV_ATTR_FRAGMENTATION_ENABLED,
+
+       /**
+        * @BATADV_ATTR_GW_BANDWIDTH_DOWN: defines the download bandwidth which
+        *  is propagated by this node if %BATADV_ATTR_GW_BANDWIDTH_MODE was set
+        *  to 'server'.
+        */
+       BATADV_ATTR_GW_BANDWIDTH_DOWN,
+
+       /**
+        * @BATADV_ATTR_GW_BANDWIDTH_UP: defines the upload bandwidth which
+        *  is propagated by this node if %BATADV_ATTR_GW_BANDWIDTH_MODE was set
+        *  to 'server'.
+        */
+       BATADV_ATTR_GW_BANDWIDTH_UP,
+
+       /**
+        * @BATADV_ATTR_GW_MODE: defines the state of the gateway features.
+        * Possible values are specified in enum batadv_gw_modes
+        */
+       BATADV_ATTR_GW_MODE,
+
+       /**
+        * @BATADV_ATTR_GW_SEL_CLASS: defines the selection criteria this node
+        *  will use to choose a gateway if gw_mode was set to 'client'.
+        */
+       BATADV_ATTR_GW_SEL_CLASS,
+
+       /**
+        * @BATADV_ATTR_HOP_PENALTY: defines the penalty which will be applied
+        *  to an originator message's tq-field on every hop.
+        */
+       BATADV_ATTR_HOP_PENALTY,
+
+       /**
+        * @BATADV_ATTR_LOG_LEVEL: bitmask with to define which debug messages
+        *  should be send to the debug log/trace ring buffer
+        */
+       BATADV_ATTR_LOG_LEVEL,
+
+       /**
+        * @BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED: whether multicast
+        *  optimizations should be replaced by simple broadcast-like flooding
+        *  of multicast packets. If set to non-zero then all nodes in the mesh
+        *  are going to use classic flooding for any multicast packet with no
+        *  optimizations.
+        */
+       BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED,
+
+       /**
+        * @BATADV_ATTR_NETWORK_CODING_ENABLED: whether Network Coding (using
+        *  some magic to send fewer wifi packets but still the same content) is
+        *  enabled or not.
+        */
+       BATADV_ATTR_NETWORK_CODING_ENABLED,
+
+       /**
+        * @BATADV_ATTR_ORIG_INTERVAL: defines the interval in milliseconds in
+        *  which batman sends its protocol messages.
+        */
+       BATADV_ATTR_ORIG_INTERVAL,
+
+       /**
+        * @BATADV_ATTR_ELP_INTERVAL: defines the interval in milliseconds in
+        *  which batman emits probing packets for neighbor sensing (ELP).
+        */
+       BATADV_ATTR_ELP_INTERVAL,
+
+       /**
+        * @BATADV_ATTR_THROUGHPUT_OVERRIDE: defines the throughput value to be
+        *  used by B.A.T.M.A.N. V when estimating the link throughput using
+        *  this interface. If the value is set to 0 then batman-adv will try to
+        *  estimate the throughput by itself.
+        */
+       BATADV_ATTR_THROUGHPUT_OVERRIDE,
+
        /* add attributes above here, update the policy in netlink.c */
 
        /**
@@ -372,10 +519,14 @@ enum batadv_nl_commands {
        BATADV_CMD_UNSPEC,
 
        /**
-        * @BATADV_CMD_GET_MESH_INFO: Query basic information about batman-adv
-        * device
+        * @BATADV_CMD_GET_MESH: Get attributes from softif/mesh
+        */
+       BATADV_CMD_GET_MESH,
+
+       /**
+        * @BATADV_CMD_GET_MESH_INFO: Alias for @BATADV_CMD_GET_MESH
         */
-       BATADV_CMD_GET_MESH_INFO,
+       BATADV_CMD_GET_MESH_INFO = BATADV_CMD_GET_MESH,
 
        /**
         * @BATADV_CMD_TP_METER: Start a tp meter session
@@ -393,9 +544,15 @@ enum batadv_nl_commands {
        BATADV_CMD_GET_ROUTING_ALGOS,
 
        /**
-        * @BATADV_CMD_GET_HARDIFS: Query list of hard interfaces
+        * @BATADV_CMD_GET_HARDIF: Get attributes from a hardif of the
+        *  current softif
         */
-       BATADV_CMD_GET_HARDIFS,
+       BATADV_CMD_GET_HARDIF,
+
+       /**
+        * @BATADV_CMD_GET_HARDIFS: Alias for @BATADV_CMD_GET_HARDIF
+        */
+       BATADV_CMD_GET_HARDIFS = BATADV_CMD_GET_HARDIF,
 
        /**
         * @BATADV_CMD_GET_TRANSTABLE_LOCAL: Query list of local translations
@@ -443,6 +600,29 @@ enum batadv_nl_commands {
         */
        BATADV_CMD_GET_MCAST_FLAGS,
 
+       /**
+        * @BATADV_CMD_SET_MESH: Set attributes for softif/mesh
+        */
+       BATADV_CMD_SET_MESH,
+
+       /**
+        * @BATADV_CMD_SET_HARDIF: Set attributes for hardif of the
+        *  current softif
+        */
+       BATADV_CMD_SET_HARDIF,
+
+       /**
+        * @BATADV_CMD_GET_VLAN: Get attributes from a VLAN of the
+        *  current softif
+        */
+       BATADV_CMD_GET_VLAN,
+
+       /**
+        * @BATADV_CMD_SET_VLAN: Set attributes for VLAN of the
+        *  current softif
+        */
+       BATADV_CMD_SET_VLAN,
+
        /* add new commands above here */
 
        /**
index d955b9e..28491da 100644 (file)
@@ -3,6 +3,7 @@
 #define _UAPI_LINUX_ERRQUEUE_H
 
 #include <linux/types.h>
+#include <linux/time_types.h>
 
 struct sock_extended_err {
        __u32   ee_errno;       
index 14565d7..e8baca8 100644 (file)
@@ -137,15 +137,21 @@ enum {
        INET_DIAG_TCLASS,
        INET_DIAG_SKMEMINFO,
        INET_DIAG_SHUTDOWN,
-       INET_DIAG_DCTCPINFO,
-       INET_DIAG_PROTOCOL,  /* response attribute only */
+
+       /*
+        * Next extenstions cannot be requested in struct inet_diag_req_v2:
+        * its field idiag_ext has only 8 bits.
+        */
+
+       INET_DIAG_DCTCPINFO,    /* request as INET_DIAG_VEGASINFO */
+       INET_DIAG_PROTOCOL,     /* response attribute only */
        INET_DIAG_SKV6ONLY,
        INET_DIAG_LOCALS,
        INET_DIAG_PEERS,
        INET_DIAG_PAD,
-       INET_DIAG_MARK,
-       INET_DIAG_BBRINFO,
-       INET_DIAG_CLASS_ID,
+       INET_DIAG_MARK,         /* only with CAP_NET_ADMIN */
+       INET_DIAG_BBRINFO,      /* request as INET_DIAG_VEGASINFO */
+       INET_DIAG_CLASS_ID,     /* request as INET_DIAG_TCLASS */
        INET_DIAG_MD5SIG,
        __INET_DIAG_MAX,
 };
index d435b00..0a55206 100644 (file)
@@ -45,6 +45,7 @@
 #define MDIO_AN_ADVERTISE      16      /* AN advertising (base page) */
 #define MDIO_AN_LPA            19      /* AN LP abilities (base page) */
 #define MDIO_PCS_EEE_ABLE      20      /* EEE Capability register */
+#define MDIO_PMA_NG_EXTABLE    21      /* 2.5G/5G PMA/PMD extended ability */
 #define MDIO_PCS_EEE_WK_ERR    22      /* EEE wake error counter */
 #define MDIO_PHYXS_LNSTAT      24      /* PHY XGXS lane state */
 #define MDIO_AN_EEE_ADV                60      /* EEE advertisement */
 #define MDIO_CTRL1_SPEED10G            (MDIO_CTRL1_SPEEDSELEXT | 0x00)
 /* 10PASS-TS/2BASE-TL */
 #define MDIO_CTRL1_SPEED10P2B          (MDIO_CTRL1_SPEEDSELEXT | 0x04)
+/* 2.5 Gb/s */
+#define MDIO_CTRL1_SPEED2_5G           (MDIO_CTRL1_SPEEDSELEXT | 0x18)
+/* 5 Gb/s */
+#define MDIO_CTRL1_SPEED5G             (MDIO_CTRL1_SPEEDSELEXT | 0x1c)
 
 /* Status register 1. */
 #define MDIO_STAT1_LPOWERABLE          0x0002  /* Low-power ability */
 
 /* Device present registers. */
 #define MDIO_DEVS_PRESENT(devad)       (1 << (devad))
+#define MDIO_DEVS_C22PRESENT           MDIO_DEVS_PRESENT(0)
 #define MDIO_DEVS_PMAPMD               MDIO_DEVS_PRESENT(MDIO_MMD_PMAPMD)
 #define MDIO_DEVS_WIS                  MDIO_DEVS_PRESENT(MDIO_MMD_WIS)
 #define MDIO_DEVS_PCS                  MDIO_DEVS_PRESENT(MDIO_MMD_PCS)
 #define MDIO_DEVS_TC                   MDIO_DEVS_PRESENT(MDIO_MMD_TC)
 #define MDIO_DEVS_AN                   MDIO_DEVS_PRESENT(MDIO_MMD_AN)
 #define MDIO_DEVS_C22EXT               MDIO_DEVS_PRESENT(MDIO_MMD_C22EXT)
+#define MDIO_DEVS_VEND1                        MDIO_DEVS_PRESENT(MDIO_MMD_VEND1)
+#define MDIO_DEVS_VEND2                        MDIO_DEVS_PRESENT(MDIO_MMD_VEND2)
 
 /* Control register 2. */
 #define MDIO_PMA_CTRL2_TYPE            0x000f  /* PMA/PMD type selection */
 #define MDIO_PMA_CTRL2_1000BKX         0x000d  /* 1000BASE-KX type */
 #define MDIO_PMA_CTRL2_100BTX          0x000e  /* 100BASE-TX type */
 #define MDIO_PMA_CTRL2_10BT            0x000f  /* 10BASE-T type */
+#define MDIO_PMA_CTRL2_2_5GBT          0x0030  /* 2.5GBaseT type */
+#define MDIO_PMA_CTRL2_5GBT            0x0031  /* 5GBaseT type */
 #define MDIO_PCS_CTRL2_TYPE            0x0003  /* PCS type selection */
 #define MDIO_PCS_CTRL2_10GBR           0x0000  /* 10GBASE-R type */
 #define MDIO_PCS_CTRL2_10GBX           0x0001  /* 10GBASE-X type */
 #define MDIO_PMA_EXTABLE_1000BKX       0x0040  /* 1000BASE-KX ability */
 #define MDIO_PMA_EXTABLE_100BTX                0x0080  /* 100BASE-TX ability */
 #define MDIO_PMA_EXTABLE_10BT          0x0100  /* 10BASE-T ability */
+#define MDIO_PMA_EXTABLE_NBT           0x4000  /* 2.5/5GBASE-T ability */
 
 /* PHY XGXS lane state register. */
 #define MDIO_PHYXS_LNSTAT_SYNC0                0x0001
 #define MDIO_PCS_10GBRT_STAT2_BER      0x3f00
 
 /* AN 10GBASE-T control register. */
+#define MDIO_AN_10GBT_CTRL_ADV2_5G     0x0080  /* Advertise 2.5GBASE-T */
+#define MDIO_AN_10GBT_CTRL_ADV5G       0x0100  /* Advertise 5GBASE-T */
 #define MDIO_AN_10GBT_CTRL_ADV10G      0x1000  /* Advertise 10GBASE-T */
 
 /* AN 10GBASE-T status register. */
+#define MDIO_AN_10GBT_STAT_LP2_5G      0x0020  /* LP is 2.5GBT capable */
+#define MDIO_AN_10GBT_STAT_LP5G                0x0040  /* LP is 5GBT capable */
 #define MDIO_AN_10GBT_STAT_LPTRR       0x0200  /* LP training reset req. */
 #define MDIO_AN_10GBT_STAT_LPLTABLE    0x0400  /* LP loop timing ability */
 #define MDIO_AN_10GBT_STAT_LP10G       0x0800  /* LP is 10GBT capable */
 #define MDIO_EEE_10GKX4                0x0020  /* 10G KX4 EEE cap */
 #define MDIO_EEE_10GKR         0x0040  /* 10G KR EEE cap */
 
+/* 2.5G/5G Extended abilities register. */
+#define MDIO_PMA_NG_EXTABLE_2_5GBT     0x0001  /* 2.5GBASET ability */
+#define MDIO_PMA_NG_EXTABLE_5GBT       0x0002  /* 5GBASET ability */
+
 /* LASI RX_ALARM control/status registers. */
 #define MDIO_PMA_LASI_RX_PHYXSLFLT     0x0001  /* PHY XS RX local fault */
 #define MDIO_PMA_LASI_RX_PCSLFLT       0x0008  /* PCS RX local fault */
index 02ac251..51a0496 100644 (file)
@@ -63,12 +63,49 @@ enum {
 #define TC_ACT_GOTO_CHAIN __TC_ACT_EXT(2)
 #define TC_ACT_EXT_OPCODE_MAX  TC_ACT_GOTO_CHAIN
 
+/* These macros are put here for binary compatibility with userspace apps that
+ * make use of them. For kernel code and new userspace apps, use the TCA_ID_*
+ * versions.
+ */
+#define TCA_ACT_GACT 5
+#define TCA_ACT_IPT 6
+#define TCA_ACT_PEDIT 7
+#define TCA_ACT_MIRRED 8
+#define TCA_ACT_NAT 9
+#define TCA_ACT_XT 10
+#define TCA_ACT_SKBEDIT 11
+#define TCA_ACT_VLAN 12
+#define TCA_ACT_BPF 13
+#define TCA_ACT_CONNMARK 14
+#define TCA_ACT_SKBMOD 15
+#define TCA_ACT_CSUM 16
+#define TCA_ACT_TUNNEL_KEY 17
+#define TCA_ACT_SIMP 22
+#define TCA_ACT_IFE 25
+#define TCA_ACT_SAMPLE 26
+
 /* Action type identifiers*/
-enum {
-       TCA_ID_UNSPEC=0,
-       TCA_ID_POLICE=1,
+enum tca_id {
+       TCA_ID_UNSPEC = 0,
+       TCA_ID_POLICE = 1,
+       TCA_ID_GACT = TCA_ACT_GACT,
+       TCA_ID_IPT = TCA_ACT_IPT,
+       TCA_ID_PEDIT = TCA_ACT_PEDIT,
+       TCA_ID_MIRRED = TCA_ACT_MIRRED,
+       TCA_ID_NAT = TCA_ACT_NAT,
+       TCA_ID_XT = TCA_ACT_XT,
+       TCA_ID_SKBEDIT = TCA_ACT_SKBEDIT,
+       TCA_ID_VLAN = TCA_ACT_VLAN,
+       TCA_ID_BPF = TCA_ACT_BPF,
+       TCA_ID_CONNMARK = TCA_ACT_CONNMARK,
+       TCA_ID_SKBMOD = TCA_ACT_SKBMOD,
+       TCA_ID_CSUM = TCA_ACT_CSUM,
+       TCA_ID_TUNNEL_KEY = TCA_ACT_TUNNEL_KEY,
+       TCA_ID_SIMP = TCA_ACT_SIMP,
+       TCA_ID_IFE = TCA_ACT_IFE,
+       TCA_ID_SAMPLE = TCA_ACT_SAMPLE,
        /* other actions go here */
-       __TCA_ID_MAX=255
+       __TCA_ID_MAX = 255
 };
 
 #define TCA_ID_MAX __TCA_ID_MAX
index 6e89a5d..653c4f9 100644 (file)
@@ -13,8 +13,6 @@
 
 #include <linux/pkt_cls.h>
 
-#define TCA_ACT_BPF 13
-
 struct tc_act_bpf {
        tc_gen;
 };
index 80caa47..9f8f6f7 100644 (file)
@@ -5,8 +5,6 @@
 #include <linux/types.h>
 #include <linux/pkt_cls.h>
 
-#define TCA_ACT_CONNMARK 14
-
 struct tc_connmark {
        tc_gen;
        __u16 zone;
index 0ecf4d2..94b2044 100644 (file)
@@ -5,8 +5,6 @@
 #include <linux/types.h>
 #include <linux/pkt_cls.h>
 
-#define TCA_ACT_CSUM 16
-
 enum {
        TCA_CSUM_UNSPEC,
        TCA_CSUM_PARMS,
index 94273c3..37e5392 100644 (file)
@@ -5,7 +5,6 @@
 #include <linux/types.h>
 #include <linux/pkt_cls.h>
 
-#define TCA_ACT_GACT 5
 struct tc_gact {
        tc_gen;
 
index 2f48490..8c401f1 100644 (file)
@@ -6,7 +6,6 @@
 #include <linux/pkt_cls.h>
 #include <linux/ife.h>
 
-#define TCA_ACT_IFE 25
 /* Flag bits for now just encoding/decoding; mutually exclusive */
 #define IFE_ENCODE 1
 #define IFE_DECODE 0
index b743c8b..c48d7da 100644 (file)
@@ -4,9 +4,6 @@
 
 #include <linux/pkt_cls.h>
 
-#define TCA_ACT_IPT 6
-#define TCA_ACT_XT 10
-
 enum {
        TCA_IPT_UNSPEC,
        TCA_IPT_TABLE,
index 5dd671c..2500a00 100644 (file)
@@ -5,7 +5,6 @@
 #include <linux/types.h>
 #include <linux/pkt_cls.h>
 
-#define TCA_ACT_MIRRED 8
 #define TCA_EGRESS_REDIR 1  /* packet redirect to EGRESS*/
 #define TCA_EGRESS_MIRROR 2 /* mirror packet to EGRESS */
 #define TCA_INGRESS_REDIR 3  /* packet redirect to INGRESS*/
index 086be84..21399c2 100644 (file)
@@ -5,8 +5,6 @@
 #include <linux/pkt_cls.h>
 #include <linux/types.h>
 
-#define TCA_ACT_NAT 9
-
 enum {
        TCA_NAT_UNSPEC,
        TCA_NAT_PARMS,
index 24ec792..f3e61b0 100644 (file)
@@ -5,8 +5,6 @@
 #include <linux/types.h>
 #include <linux/pkt_cls.h>
 
-#define TCA_ACT_PEDIT 7
-
 enum {
        TCA_PEDIT_UNSPEC,
        TCA_PEDIT_TM,
index bd7e9f0..fee1bcc 100644 (file)
@@ -6,8 +6,6 @@
 #include <linux/pkt_cls.h>
 #include <linux/if_ether.h>
 
-#define TCA_ACT_SAMPLE 26
-
 struct tc_sample {
        tc_gen;
 };
index 6de6071..800e933 100644 (file)
@@ -23,8 +23,6 @@
 
 #include <linux/pkt_cls.h>
 
-#define TCA_ACT_SKBEDIT 11
-
 #define SKBEDIT_F_PRIORITY             0x1
 #define SKBEDIT_F_QUEUE_MAPPING                0x2
 #define SKBEDIT_F_MARK                 0x4
index 38c072f..c525b35 100644 (file)
@@ -13,8 +13,6 @@
 
 #include <linux/pkt_cls.h>
 
-#define TCA_ACT_SKBMOD 15
-
 #define SKBMOD_F_DMAC  0x1
 #define SKBMOD_F_SMAC  0x2
 #define SKBMOD_F_ETYPE 0x4
index be384d6..41c8b46 100644 (file)
@@ -14,8 +14,6 @@
 
 #include <linux/pkt_cls.h>
 
-#define TCA_ACT_TUNNEL_KEY 17
-
 #define TCA_TUNNEL_KEY_ACT_SET     1
 #define TCA_TUNNEL_KEY_ACT_RELEASE  2
 
index 0d7b5fd..168995b 100644 (file)
@@ -13,8 +13,6 @@
 
 #include <linux/pkt_cls.h>
 
-#define TCA_ACT_VLAN 12
-
 #define TCA_VLAN_ACT_POP       1
 #define TCA_VLAN_ACT_PUSH      2
 #define TCA_VLAN_ACT_MODIFY    3
index b8ad1b8..958932e 100644 (file)
@@ -3,7 +3,7 @@
 #define _UAPI_LINUX_TIME_H
 
 #include <linux/types.h>
-
+#include <linux/time_types.h>
 
 #ifndef _STRUCT_TIMESPEC
 #define _STRUCT_TIMESPEC
@@ -23,7 +23,6 @@ struct timezone {
        int     tz_dsttime;     /* type of dst correction */
 };
 
-
 /*
  * Names of the interval timers, and structure
  * defining a timer setting:
@@ -42,39 +41,6 @@ struct itimerval {
        struct timeval it_value;        /* current value */
 };
 
-#ifndef __kernel_timespec
-struct __kernel_timespec {
-       __kernel_time64_t       tv_sec;                 /* seconds */
-       long long               tv_nsec;                /* nanoseconds */
-};
-#endif
-
-#ifndef __kernel_itimerspec
-struct __kernel_itimerspec {
-       struct __kernel_timespec it_interval;    /* timer period */
-       struct __kernel_timespec it_value;       /* timer expiration */
-};
-#endif
-
-/*
- * legacy timeval structure, only embedded in structures that
- * traditionally used 'timeval' to pass time intervals (not absolute
- * times). Do not add new users. If user space fails to compile
- * here, this is probably because it is not y2038 safe and needs to
- * be changed to use another interface.
- */
-#ifndef __kernel_old_timeval
-struct __kernel_old_timeval {
-       __kernel_long_t tv_sec;
-       __kernel_long_t tv_usec;
-};
-#endif
-
-struct __kernel_sock_timeval {
-       __s64 tv_sec;
-       __s64 tv_usec;
-};
-
 /*
  * The IDs of the various system clocks (for POSIX.1b interval timers):
  */
diff --git a/include/uapi/linux/time_types.h b/include/uapi/linux/time_types.h
new file mode 100644 (file)
index 0000000..459070c
--- /dev/null
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_TIME_TYPES_H
+#define _UAPI_LINUX_TIME_TYPES_H
+
+#include <linux/types.h>
+
+#ifndef __kernel_timespec
+struct __kernel_timespec {
+       __kernel_time64_t       tv_sec;                 /* seconds */
+       long long               tv_nsec;                /* nanoseconds */
+};
+#endif
+
+#ifndef __kernel_itimerspec
+struct __kernel_itimerspec {
+       struct __kernel_timespec it_interval;    /* timer period */
+       struct __kernel_timespec it_value;       /* timer expiration */
+};
+#endif
+
+/*
+ * legacy timeval structure, only embedded in structures that
+ * traditionally used 'timeval' to pass time intervals (not absolute
+ * times). Do not add new users. If user space fails to compile
+ * here, this is probably because it is not y2038 safe and needs to
+ * be changed to use another interface.
+ */
+#ifndef __kernel_old_timeval
+struct __kernel_old_timeval {
+       __kernel_long_t tv_sec;
+       __kernel_long_t tv_usec;
+};
+#endif
+
+struct __kernel_sock_timeval {
+       __s64 tv_sec;
+       __s64 tv_usec;
+};
+
+#endif /* _UAPI_LINUX_TIME_TYPES_H */
index 1196e1c..ff8e7dc 100644 (file)
 #define VIRTIO_F_RING_PACKED           34
 
 /*
+ * This feature indicates that memory accesses by the driver and the
+ * device are ordered in a way described by the platform.
+ */
+#define VIRTIO_F_ORDER_PLATFORM                36
+
+/*
  * Does the device support Single Root I/O Virtualization?
  */
 #define VIRTIO_F_SR_IOV                        37
index 2414f8a..4c4e24c 100644 (file)
@@ -213,14 +213,4 @@ struct vring_packed_desc {
        __le16 flags;
 };
 
-struct vring_packed {
-       unsigned int num;
-
-       struct vring_packed_desc *desc;
-
-       struct vring_packed_desc_event *driver;
-
-       struct vring_packed_desc_event *device;
-};
-
 #endif /* _UAPI_LINUX_VIRTIO_RING_H */
index ef3c7ec..eb76b38 100644 (file)
@@ -52,6 +52,11 @@ struct hns_roce_ib_create_srq {
        __aligned_u64 que_addr;
 };
 
+struct hns_roce_ib_create_srq_resp {
+       __u32   srqn;
+       __u32   reserved;
+};
+
 struct hns_roce_ib_create_qp {
        __aligned_u64 buf_addr;
        __aligned_u64 db_addr;
index 513fa54..c9386a3 100644 (file)
@@ -512,6 +512,17 @@ config PSI_DEFAULT_DISABLED
          per default but can be enabled through passing psi=1 on the
          kernel commandline during boot.
 
+         This feature adds some code to the task wakeup and sleep
+         paths of the scheduler. The overhead is too low to affect
+         common scheduling-intense workloads in practice (such as
+         webservers, memcache), but it does show up in artificial
+         scheduler stress tests, such as hackbench.
+
+         If you are paranoid and not sure what the kernel will be
+         used for, say Y.
+
+         Say N if unsure.
+
 endmenu # "CPU/Task time and stats accounting"
 
 config CPU_ISOLATION
@@ -825,7 +836,7 @@ config CGROUP_PIDS
          PIDs controller is designed to stop this from happening.
 
          It should be noted that organisational operations (such as attaching
-         to a cgroup hierarchy will *not* be blocked by the PIDs controller),
+         to a cgroup hierarchy) will *not* be blocked by the PIDs controller,
          since the PIDs limit only affects a process's ability to fork, not to
          attach to a cgroup.
 
index e2e80ca..c86a1c8 100644 (file)
@@ -695,7 +695,6 @@ asmlinkage __visible void __init start_kernel(void)
                initrd_start = 0;
        }
 #endif
-       page_ext_init();
        kmemleak_init();
        setup_per_cpu_pageset();
        numa_policy_init();
@@ -1131,6 +1130,8 @@ static noinline void __init kernel_init_freeable(void)
        sched_init_smp();
 
        page_alloc_init_late();
+       /* Initialize page ext after all struct pages are initialized. */
+       page_ext_init();
 
        do_basic_setup();
 
index 7019c1f..bd3921b 100644 (file)
@@ -1530,7 +1530,8 @@ static int btf_modifier_resolve(struct btf_verifier_env *env,
 
                /* "typedef void new_void", "const void"...etc */
                if (!btf_type_is_void(next_type) &&
-                   !btf_type_is_fwd(next_type)) {
+                   !btf_type_is_fwd(next_type) &&
+                   !btf_type_is_func_proto(next_type)) {
                        btf_verifier_log_type(env, v->t, "Invalid type_id");
                        return -EINVAL;
                }
index d78cfec..4e80797 100644 (file)
@@ -573,7 +573,7 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
        bpf_compute_and_save_data_end(skb, &saved_data_end);
 
        ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
-                                bpf_prog_run_save_cb);
+                                __bpf_prog_run_save_cb);
        bpf_restore_data_end(skb, saved_data_end);
        __skb_pull(skb, offset);
        skb->sk = save_sk;
index 9377765..fed15cf 100644 (file)
@@ -686,7 +686,7 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
        }
 
        if (htab_is_prealloc(htab)) {
-               pcpu_freelist_push(&htab->freelist, &l->fnode);
+               __pcpu_freelist_push(&htab->freelist, &l->fnode);
        } else {
                atomic_dec(&htab->count);
                l->htab = htab;
@@ -739,7 +739,7 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
                } else {
                        struct pcpu_freelist_node *l;
 
-                       l = pcpu_freelist_pop(&htab->freelist);
+                       l = __pcpu_freelist_pop(&htab->freelist);
                        if (!l)
                                return ERR_PTR(-E2BIG);
                        l_new = container_of(l, struct htab_elem, fnode);
index 673fa6f..0c1b4ba 100644 (file)
@@ -28,8 +28,8 @@ void pcpu_freelist_destroy(struct pcpu_freelist *s)
        free_percpu(s->freelist);
 }
 
-static inline void __pcpu_freelist_push(struct pcpu_freelist_head *head,
-                                       struct pcpu_freelist_node *node)
+static inline void ___pcpu_freelist_push(struct pcpu_freelist_head *head,
+                                        struct pcpu_freelist_node *node)
 {
        raw_spin_lock(&head->lock);
        node->next = head->first;
@@ -37,12 +37,22 @@ static inline void __pcpu_freelist_push(struct pcpu_freelist_head *head,
        raw_spin_unlock(&head->lock);
 }
 
-void pcpu_freelist_push(struct pcpu_freelist *s,
+void __pcpu_freelist_push(struct pcpu_freelist *s,
                        struct pcpu_freelist_node *node)
 {
        struct pcpu_freelist_head *head = this_cpu_ptr(s->freelist);
 
-       __pcpu_freelist_push(head, node);
+       ___pcpu_freelist_push(head, node);
+}
+
+void pcpu_freelist_push(struct pcpu_freelist *s,
+                       struct pcpu_freelist_node *node)
+{
+       unsigned long flags;
+
+       local_irq_save(flags);
+       __pcpu_freelist_push(s, node);
+       local_irq_restore(flags);
 }
 
 void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size,
@@ -63,7 +73,7 @@ void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size,
        for_each_possible_cpu(cpu) {
 again:
                head = per_cpu_ptr(s->freelist, cpu);
-               __pcpu_freelist_push(head, buf);
+               ___pcpu_freelist_push(head, buf);
                i++;
                buf += elem_size;
                if (i == nr_elems)
@@ -74,14 +84,12 @@ again:
        local_irq_restore(flags);
 }
 
-struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s)
+struct pcpu_freelist_node *__pcpu_freelist_pop(struct pcpu_freelist *s)
 {
        struct pcpu_freelist_head *head;
        struct pcpu_freelist_node *node;
-       unsigned long flags;
        int orig_cpu, cpu;
 
-       local_irq_save(flags);
        orig_cpu = cpu = raw_smp_processor_id();
        while (1) {
                head = per_cpu_ptr(s->freelist, cpu);
@@ -89,16 +97,25 @@ struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s)
                node = head->first;
                if (node) {
                        head->first = node->next;
-                       raw_spin_unlock_irqrestore(&head->lock, flags);
+                       raw_spin_unlock(&head->lock);
                        return node;
                }
                raw_spin_unlock(&head->lock);
                cpu = cpumask_next(cpu, cpu_possible_mask);
                if (cpu >= nr_cpu_ids)
                        cpu = 0;
-               if (cpu == orig_cpu) {
-                       local_irq_restore(flags);
+               if (cpu == orig_cpu)
                        return NULL;
-               }
        }
 }
+
+struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s)
+{
+       struct pcpu_freelist_node *ret;
+       unsigned long flags;
+
+       local_irq_save(flags);
+       ret = __pcpu_freelist_pop(s);
+       local_irq_restore(flags);
+       return ret;
+}
index 3049aae..c396011 100644 (file)
@@ -22,8 +22,12 @@ struct pcpu_freelist_node {
        struct pcpu_freelist_node *next;
 };
 
+/* pcpu_freelist_* do spin_lock_irqsave. */
 void pcpu_freelist_push(struct pcpu_freelist *, struct pcpu_freelist_node *);
 struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *);
+/* __pcpu_freelist_* do spin_lock only. caller must disable irqs. */
+void __pcpu_freelist_push(struct pcpu_freelist *, struct pcpu_freelist_node *);
+struct pcpu_freelist_node *__pcpu_freelist_pop(struct pcpu_freelist *);
 void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size,
                            u32 nr_elems);
 int pcpu_freelist_init(struct pcpu_freelist *);
index 0834958..ec7c552 100644 (file)
@@ -740,8 +740,13 @@ static int map_lookup_elem(union bpf_attr *attr)
 
        if (bpf_map_is_dev_bound(map)) {
                err = bpf_map_offload_lookup_elem(map, key, value);
-       } else if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
-                  map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
+               goto done;
+       }
+
+       preempt_disable();
+       this_cpu_inc(bpf_prog_active);
+       if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+           map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
                err = bpf_percpu_hash_copy(map, key, value);
        } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
                err = bpf_percpu_array_copy(map, key, value);
@@ -777,7 +782,10 @@ static int map_lookup_elem(union bpf_attr *attr)
                }
                rcu_read_unlock();
        }
+       this_cpu_dec(bpf_prog_active);
+       preempt_enable();
 
+done:
        if (err)
                goto free_value;
 
index 91d5c38..d1c6d15 100644 (file)
@@ -376,9 +376,6 @@ void __weak arch_smt_update(void) { }
 
 #ifdef CONFIG_HOTPLUG_SMT
 enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
-EXPORT_SYMBOL_GPL(cpu_smt_control);
-
-static bool cpu_smt_available __read_mostly;
 
 void __init cpu_smt_disable(bool force)
 {
@@ -397,25 +394,11 @@ void __init cpu_smt_disable(bool force)
 
 /*
  * The decision whether SMT is supported can only be done after the full
- * CPU identification. Called from architecture code before non boot CPUs
- * are brought up.
- */
-void __init cpu_smt_check_topology_early(void)
-{
-       if (!topology_smt_supported())
-               cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
-}
-
-/*
- * If SMT was disabled by BIOS, detect it here, after the CPUs have been
- * brought online. This ensures the smt/l1tf sysfs entries are consistent
- * with reality. cpu_smt_available is set to true during the bringup of non
- * boot CPUs when a SMT sibling is detected. Note, this may overwrite
- * cpu_smt_control's previous setting.
+ * CPU identification. Called from architecture code.
  */
 void __init cpu_smt_check_topology(void)
 {
-       if (!cpu_smt_available)
+       if (!topology_smt_supported())
                cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
 }
 
@@ -428,18 +411,10 @@ early_param("nosmt", smt_cmdline_disable);
 
 static inline bool cpu_smt_allowed(unsigned int cpu)
 {
-       if (topology_is_primary_thread(cpu))
+       if (cpu_smt_control == CPU_SMT_ENABLED)
                return true;
 
-       /*
-        * If the CPU is not a 'primary' thread and the booted_once bit is
-        * set then the processor has SMT support. Store this information
-        * for the late check of SMT support in cpu_smt_check_topology().
-        */
-       if (per_cpu(cpuhp_state, cpu).booted_once)
-               cpu_smt_available = true;
-
-       if (cpu_smt_control == CPU_SMT_ENABLED)
+       if (topology_is_primary_thread(cpu))
                return true;
 
        /*
@@ -2090,10 +2065,8 @@ static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
                 */
                cpuhp_offline_cpu_device(cpu);
        }
-       if (!ret) {
+       if (!ret)
                cpu_smt_control = ctrlval;
-               arch_smt_update();
-       }
        cpu_maps_update_done();
        return ret;
 }
@@ -2104,7 +2077,6 @@ static int cpuhp_smt_enable(void)
 
        cpu_maps_update_begin();
        cpu_smt_control = CPU_SMT_ENABLED;
-       arch_smt_update();
        for_each_present_cpu(cpu) {
                /* Skip online CPUs and CPUs on offline nodes */
                if (cpu_online(cpu) || !node_online(cpu_to_node(cpu)))
index 3cd13a3..e5ede69 100644 (file)
@@ -436,18 +436,18 @@ int perf_proc_update_handler(struct ctl_table *table, int write,
                void __user *buffer, size_t *lenp,
                loff_t *ppos)
 {
-       int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
-
-       if (ret || !write)
-               return ret;
-
+       int ret;
+       int perf_cpu = sysctl_perf_cpu_time_max_percent;
        /*
         * If throttling is disabled don't allow the write:
         */
-       if (sysctl_perf_cpu_time_max_percent == 100 ||
-           sysctl_perf_cpu_time_max_percent == 0)
+       if (write && (perf_cpu == 100 || perf_cpu == 0))
                return -EINVAL;
 
+       ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+       if (ret || !write)
+               return ret;
+
        max_samples_per_tick = DIV_ROUND_UP(sysctl_perf_event_sample_rate, HZ);
        perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate;
        update_perf_cpu_limits();
index 4a99370..309ef5a 100644 (file)
@@ -734,6 +734,9 @@ struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
        size = sizeof(struct ring_buffer);
        size += nr_pages * sizeof(void *);
 
+       if (order_base_2(size) >= MAX_ORDER)
+               goto fail;
+
        rb = kzalloc(size, GFP_KERNEL);
        if (!rb)
                goto fail;
index 3fb7be0..2639a30 100644 (file)
@@ -558,12 +558,14 @@ static struct task_struct *find_alive_thread(struct task_struct *p)
        return NULL;
 }
 
-static struct task_struct *find_child_reaper(struct task_struct *father)
+static struct task_struct *find_child_reaper(struct task_struct *father,
+                                               struct list_head *dead)
        __releases(&tasklist_lock)
        __acquires(&tasklist_lock)
 {
        struct pid_namespace *pid_ns = task_active_pid_ns(father);
        struct task_struct *reaper = pid_ns->child_reaper;
+       struct task_struct *p, *n;
 
        if (likely(reaper != father))
                return reaper;
@@ -579,6 +581,12 @@ static struct task_struct *find_child_reaper(struct task_struct *father)
                panic("Attempted to kill init! exitcode=0x%08x\n",
                        father->signal->group_exit_code ?: father->exit_code);
        }
+
+       list_for_each_entry_safe(p, n, dead, ptrace_entry) {
+               list_del_init(&p->ptrace_entry);
+               release_task(p);
+       }
+
        zap_pid_ns_processes(pid_ns);
        write_lock_irq(&tasklist_lock);
 
@@ -668,7 +676,7 @@ static void forget_original_parent(struct task_struct *father,
                exit_ptrace(father, dead);
 
        /* Can drop and reacquire tasklist_lock */
-       reaper = find_child_reaper(father);
+       reaper = find_child_reaper(father, dead);
        if (list_empty(&father->children))
                return;
 
index fdd312d..a0514e0 100644 (file)
@@ -2221,11 +2221,11 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
         * decrement the counter at queue_unlock() when some error has
         * occurred and we don't end up adding the task to the list.
         */
-       hb_waiters_inc(hb);
+       hb_waiters_inc(hb); /* implies smp_mb(); (A) */
 
        q->lock_ptr = &hb->lock;
 
-       spin_lock(&hb->lock); /* implies smp_mb(); (A) */
+       spin_lock(&hb->lock);
        return hb;
 }
 
@@ -2861,35 +2861,39 @@ retry_private:
         * and BUG when futex_unlock_pi() interleaves with this.
         *
         * Therefore acquire wait_lock while holding hb->lock, but drop the
-        * latter before calling rt_mutex_start_proxy_lock(). This still fully
-        * serializes against futex_unlock_pi() as that does the exact same
-        * lock handoff sequence.
+        * latter before calling __rt_mutex_start_proxy_lock(). This
+        * interleaves with futex_unlock_pi() -- which does a similar lock
+        * handoff -- such that the latter can observe the futex_q::pi_state
+        * before __rt_mutex_start_proxy_lock() is done.
         */
        raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock);
        spin_unlock(q.lock_ptr);
+       /*
+        * __rt_mutex_start_proxy_lock() unconditionally enqueues the @rt_waiter
+        * such that futex_unlock_pi() is guaranteed to observe the waiter when
+        * it sees the futex_q::pi_state.
+        */
        ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current);
        raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock);
 
        if (ret) {
                if (ret == 1)
                        ret = 0;
-
-               spin_lock(q.lock_ptr);
-               goto no_block;
+               goto cleanup;
        }
 
-
        if (unlikely(to))
                hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS);
 
        ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter);
 
+cleanup:
        spin_lock(q.lock_ptr);
        /*
-        * If we failed to acquire the lock (signal/timeout), we must
+        * If we failed to acquire the lock (deadlock/signal/timeout), we must
         * first acquire the hb->lock before removing the lock from the
-        * rt_mutex waitqueue, such that we can keep the hb and rt_mutex
-        * wait lists consistent.
+        * rt_mutex waitqueue, such that we can keep the hb and rt_mutex wait
+        * lists consistent.
         *
         * In particular; it is important that futex_unlock_pi() can not
         * observe this inconsistency.
@@ -3013,6 +3017,10 @@ retry:
                 * there is no point where we hold neither; and therefore
                 * wake_futex_pi() must observe a state consistent with what we
                 * observed.
+                *
+                * In particular; this forces __rt_mutex_start_proxy() to
+                * complete such that we're guaranteed to observe the
+                * rt_waiter. Also see the WARN in wake_futex_pi().
                 */
                raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
                spin_unlock(&hb->lock);
index 581edcc..978d63a 100644 (file)
@@ -1726,12 +1726,33 @@ void rt_mutex_proxy_unlock(struct rt_mutex *lock,
        rt_mutex_set_owner(lock, NULL);
 }
 
+/**
+ * __rt_mutex_start_proxy_lock() - Start lock acquisition for another task
+ * @lock:              the rt_mutex to take
+ * @waiter:            the pre-initialized rt_mutex_waiter
+ * @task:              the task to prepare
+ *
+ * Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock
+ * detection. It does not wait, see rt_mutex_wait_proxy_lock() for that.
+ *
+ * NOTE: does _NOT_ remove the @waiter on failure; must either call
+ * rt_mutex_wait_proxy_lock() or rt_mutex_cleanup_proxy_lock() after this.
+ *
+ * Returns:
+ *  0 - task blocked on lock
+ *  1 - acquired the lock for task, caller should wake it up
+ * <0 - error
+ *
+ * Special API call for PI-futex support.
+ */
 int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
                              struct rt_mutex_waiter *waiter,
                              struct task_struct *task)
 {
        int ret;
 
+       lockdep_assert_held(&lock->wait_lock);
+
        if (try_to_take_rt_mutex(lock, task, NULL))
                return 1;
 
@@ -1749,9 +1770,6 @@ int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
                ret = 0;
        }
 
-       if (unlikely(ret))
-               remove_waiter(lock, waiter);
-
        debug_rt_mutex_print_deadlock(waiter);
 
        return ret;
@@ -1763,12 +1781,18 @@ int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
  * @waiter:            the pre-initialized rt_mutex_waiter
  * @task:              the task to prepare
  *
+ * Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock
+ * detection. It does not wait, see rt_mutex_wait_proxy_lock() for that.
+ *
+ * NOTE: unlike __rt_mutex_start_proxy_lock this _DOES_ remove the @waiter
+ * on failure.
+ *
  * Returns:
  *  0 - task blocked on lock
  *  1 - acquired the lock for task, caller should wake it up
  * <0 - error
  *
- * Special API call for FUTEX_REQUEUE_PI support.
+ * Special API call for PI-futex support.
  */
 int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
                              struct rt_mutex_waiter *waiter,
@@ -1778,6 +1802,8 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
 
        raw_spin_lock_irq(&lock->wait_lock);
        ret = __rt_mutex_start_proxy_lock(lock, waiter, task);
+       if (unlikely(ret))
+               remove_waiter(lock, waiter);
        raw_spin_unlock_irq(&lock->wait_lock);
 
        return ret;
@@ -1845,7 +1871,8 @@ int rt_mutex_wait_proxy_lock(struct rt_mutex *lock,
  * @lock:              the rt_mutex we were woken on
  * @waiter:            the pre-initialized rt_mutex_waiter
  *
- * Attempt to clean up after a failed rt_mutex_wait_proxy_lock().
+ * Attempt to clean up after a failed __rt_mutex_start_proxy_lock() or
+ * rt_mutex_wait_proxy_lock().
  *
  * Unless we acquired the lock; we're still enqueued on the wait-list and can
  * in fact still be granted ownership until we're removed. Therefore we can
index 04f2486..9e0f523 100644 (file)
@@ -428,6 +428,8 @@ static struct dentry *relay_create_buf_file(struct rchan *chan,
        dentry = chan->cb->create_buf_file(tmpname, chan->parent,
                                           S_IRUSR, buf,
                                           &chan->is_global);
+       if (IS_ERR(dentry))
+               dentry = NULL;
 
        kfree(tmpname);
 
@@ -461,7 +463,7 @@ static struct rchan_buf *relay_open_buf(struct rchan *chan, unsigned int cpu)
                dentry = chan->cb->create_buf_file(NULL, NULL,
                                                   S_IRUSR, buf,
                                                   &chan->is_global);
-               if (WARN_ON(dentry))
+               if (IS_ERR_OR_NULL(dentry))
                        goto free_buf;
        }
 
index 50aa2ab..310d063 100644 (file)
@@ -5980,6 +5980,7 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p
 
 #ifdef CONFIG_SCHED_SMT
 DEFINE_STATIC_KEY_FALSE(sched_smt_present);
+EXPORT_SYMBOL_GPL(sched_smt_present);
 
 static inline void set_idle_cores(int cpu, int val)
 {
index fe24de3..c348478 100644 (file)
  * sampling of the aggregate task states would be.
  */
 
+#include "../workqueue_internal.h"
 #include <linux/sched/loadavg.h>
 #include <linux/seq_file.h>
 #include <linux/proc_fs.h>
@@ -480,9 +481,6 @@ static void psi_group_change(struct psi_group *group, int cpu,
                        groupc->tasks[t]++;
 
        write_seqcount_end(&groupc->seq);
-
-       if (!delayed_work_pending(&group->clock_work))
-               schedule_delayed_work(&group->clock_work, PSI_FREQ);
 }
 
 static struct psi_group *iterate_groups(struct task_struct *task, void **iter)
@@ -513,6 +511,7 @@ void psi_task_change(struct task_struct *task, int clear, int set)
 {
        int cpu = task_cpu(task);
        struct psi_group *group;
+       bool wake_clock = true;
        void *iter = NULL;
 
        if (!task->pid)
@@ -530,8 +529,22 @@ void psi_task_change(struct task_struct *task, int clear, int set)
        task->psi_flags &= ~clear;
        task->psi_flags |= set;
 
-       while ((group = iterate_groups(task, &iter)))
+       /*
+        * Periodic aggregation shuts off if there is a period of no
+        * task changes, so we wake it back up if necessary. However,
+        * don't do this if the task change is the aggregation worker
+        * itself going to sleep, or we'll ping-pong forever.
+        */
+       if (unlikely((clear & TSK_RUNNING) &&
+                    (task->flags & PF_WQ_WORKER) &&
+                    wq_worker_last_func(task) == psi_update_work))
+               wake_clock = false;
+
+       while ((group = iterate_groups(task, &iter))) {
                psi_group_change(group, cpu, clear, set);
+               if (wake_clock && !delayed_work_pending(&group->clock_work))
+                       schedule_delayed_work(&group->clock_work, PSI_FREQ);
+       }
 }
 
 void psi_memstall_tick(struct task_struct *task, int cpu)
index e1d7ad8..57b7771 100644 (file)
@@ -688,6 +688,48 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, kernel_siginfo_t *in
 }
 EXPORT_SYMBOL_GPL(dequeue_signal);
 
+static int dequeue_synchronous_signal(kernel_siginfo_t *info)
+{
+       struct task_struct *tsk = current;
+       struct sigpending *pending = &tsk->pending;
+       struct sigqueue *q, *sync = NULL;
+
+       /*
+        * Might a synchronous signal be in the queue?
+        */
+       if (!((pending->signal.sig[0] & ~tsk->blocked.sig[0]) & SYNCHRONOUS_MASK))
+               return 0;
+
+       /*
+        * Return the first synchronous signal in the queue.
+        */
+       list_for_each_entry(q, &pending->list, list) {
+               /* Synchronous signals have a postive si_code */
+               if ((q->info.si_code > SI_USER) &&
+                   (sigmask(q->info.si_signo) & SYNCHRONOUS_MASK)) {
+                       sync = q;
+                       goto next;
+               }
+       }
+       return 0;
+next:
+       /*
+        * Check if there is another siginfo for the same signal.
+        */
+       list_for_each_entry_continue(q, &pending->list, list) {
+               if (q->info.si_signo == sync->info.si_signo)
+                       goto still_pending;
+       }
+
+       sigdelset(&pending->signal, sync->info.si_signo);
+       recalc_sigpending();
+still_pending:
+       list_del_init(&sync->list);
+       copy_siginfo(info, &sync->info);
+       __sigqueue_free(sync);
+       return info->si_signo;
+}
+
 /*
  * Tell a process that it has a new active signal..
  *
@@ -1057,10 +1099,9 @@ static int __send_signal(int sig, struct kernel_siginfo *info, struct task_struc
 
        result = TRACE_SIGNAL_DELIVERED;
        /*
-        * Skip useless siginfo allocation for SIGKILL SIGSTOP,
-        * and kernel threads.
+        * Skip useless siginfo allocation for SIGKILL and kernel threads.
         */
-       if (sig_kernel_only(sig) || (t->flags & PF_KTHREAD))
+       if ((sig == SIGKILL) || (t->flags & PF_KTHREAD))
                goto out_set;
 
        /*
@@ -2394,6 +2435,14 @@ relock:
                goto relock;
        }
 
+       /* Has this task already been marked for death? */
+       if (signal_group_exit(signal)) {
+               ksig->info.si_signo = signr = SIGKILL;
+               sigdelset(&current->pending.signal, SIGKILL);
+               recalc_sigpending();
+               goto fatal;
+       }
+
        for (;;) {
                struct k_sigaction *ka;
 
@@ -2407,7 +2456,15 @@ relock:
                        goto relock;
                }
 
-               signr = dequeue_signal(current, &current->blocked, &ksig->info);
+               /*
+                * Signals generated by the execution of an instruction
+                * need to be delivered before any other pending signals
+                * so that the instruction pointer in the signal stack
+                * frame points to the faulting instruction.
+                */
+               signr = dequeue_synchronous_signal(&ksig->info);
+               if (!signr)
+                       signr = dequeue_signal(current, &current->blocked, &ksig->info);
 
                if (!signr)
                        break; /* will return 0 */
@@ -2489,6 +2546,7 @@ relock:
                        continue;
                }
 
+       fatal:
                spin_unlock_irq(&sighand->siglock);
 
                /*
index 163c451..f4cf1b0 100644 (file)
@@ -584,8 +584,6 @@ void __init smp_init(void)
                num_nodes, (num_nodes > 1 ? "s" : ""),
                num_cpus,  (num_cpus  > 1 ? "s" : ""));
 
-       /* Final decision about SMT support */
-       cpu_smt_check_topology();
        /* Any cleanup work */
        smp_cpus_done(setup_max_cpus);
 }
index 8b068ad..f1a86a0 100644 (file)
@@ -1204,22 +1204,12 @@ static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *
 
 int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
 {
-       int err;
-
-       mutex_lock(&bpf_event_mutex);
-       err = __bpf_probe_register(btp, prog);
-       mutex_unlock(&bpf_event_mutex);
-       return err;
+       return __bpf_probe_register(btp, prog);
 }
 
 int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
 {
-       int err;
-
-       mutex_lock(&bpf_event_mutex);
-       err = tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, prog);
-       mutex_unlock(&bpf_event_mutex);
-       return err;
+       return tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, prog);
 }
 
 int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
index 5c56afc..4737bb8 100644 (file)
@@ -180,10 +180,12 @@ store_trace_args(void *data, struct trace_probe *tp, struct pt_regs *regs,
                if (unlikely(arg->dynamic))
                        *dl = make_data_loc(maxlen, dyndata - base);
                ret = process_fetch_insn(arg->code, regs, dl, base);
-               if (unlikely(ret < 0 && arg->dynamic))
+               if (unlikely(ret < 0 && arg->dynamic)) {
                        *dl = make_data_loc(0, dyndata - base);
-               else
+               } else {
                        dyndata += ret;
+                       maxlen -= ret;
+               }
        }
 }
 
index e335576..9bde07c 100644 (file)
@@ -5,7 +5,7 @@
  * Copyright (C) IBM Corporation, 2010-2012
  * Author:     Srikar Dronamraju <srikar@linux.vnet.ibm.com>
  */
-#define pr_fmt(fmt)    "trace_kprobe: " fmt
+#define pr_fmt(fmt)    "trace_uprobe: " fmt
 
 #include <linux/ctype.h>
 #include <linux/module.h>
@@ -160,6 +160,13 @@ fetch_store_string(unsigned long addr, void *dest, void *base)
        if (ret >= 0) {
                if (ret == maxlen)
                        dst[ret - 1] = '\0';
+               else
+                       /*
+                        * Include the terminating null byte. In this case it
+                        * was copied by strncpy_from_user but not accounted
+                        * for in ret.
+                        */
+                       ret++;
                *(u32 *)dest = make_data_loc(ret, (void *)dst - base);
        }
 
index 392be4b..fc5d23d 100644 (file)
@@ -910,6 +910,26 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task)
 }
 
 /**
+ * wq_worker_last_func - retrieve worker's last work function
+ *
+ * Determine the last function a worker executed. This is called from
+ * the scheduler to get a worker's last known identity.
+ *
+ * CONTEXT:
+ * spin_lock_irq(rq->lock)
+ *
+ * Return:
+ * The last work function %current executed as a worker, NULL if it
+ * hasn't executed any work yet.
+ */
+work_func_t wq_worker_last_func(struct task_struct *task)
+{
+       struct worker *worker = kthread_data(task);
+
+       return worker->last_func;
+}
+
+/**
  * worker_set_flags - set worker flags and adjust nr_running accordingly
  * @worker: self
  * @flags: flags to set
@@ -2184,6 +2204,9 @@ __acquires(&pool->lock)
        if (unlikely(cpu_intensive))
                worker_clr_flags(worker, WORKER_CPU_INTENSIVE);
 
+       /* tag the worker for identification in schedule() */
+       worker->last_func = worker->current_func;
+
        /* we're done with it, release */
        hash_del(&worker->hentry);
        worker->current_work = NULL;
index 66fbb5a..cb68b03 100644 (file)
@@ -53,6 +53,9 @@ struct worker {
 
        /* used only by rescuers to point to the target workqueue */
        struct workqueue_struct *rescue_wq;     /* I: the workqueue to rescue */
+
+       /* used by the scheduler to determine a worker's last known identity */
+       work_func_t             last_func;
 };
 
 /**
@@ -67,9 +70,10 @@ static inline struct worker *current_wq_worker(void)
 
 /*
  * Scheduler hooks for concurrency managed workqueue.  Only to be used from
- * sched/core.c and workqueue.c.
+ * sched/ and workqueue.c.
  */
 void wq_worker_waking_up(struct task_struct *task, int cpu);
 struct task_struct *wq_worker_sleeping(struct task_struct *task);
+work_func_t wq_worker_last_func(struct task_struct *task);
 
 #endif /* _KERNEL_WORKQUEUE_INTERNAL_H */
index c9b457a..576be22 100644 (file)
@@ -4,6 +4,7 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/rhashtable.h>
+#include <linux/idr.h>
 #include <linux/list.h>
 #include <linux/sort.h>
 #include <linux/objagg.h>
 #define CREATE_TRACE_POINTS
 #include <trace/events/objagg.h>
 
+struct objagg_hints {
+       struct rhashtable node_ht;
+       struct rhashtable_params ht_params;
+       struct list_head node_list;
+       unsigned int node_count;
+       unsigned int root_count;
+       unsigned int refcount;
+       const struct objagg_ops *ops;
+};
+
+struct objagg_hints_node {
+       struct rhash_head ht_node; /* member of objagg_hints->node_ht */
+       struct list_head list; /* member of objagg_hints->node_list */
+       struct objagg_hints_node *parent;
+       unsigned int root_id;
+       struct objagg_obj_stats_info stats_info;
+       unsigned long obj[0];
+};
+
+static struct objagg_hints_node *
+objagg_hints_lookup(struct objagg_hints *objagg_hints, void *obj)
+{
+       if (!objagg_hints)
+               return NULL;
+       return rhashtable_lookup_fast(&objagg_hints->node_ht, obj,
+                                     objagg_hints->ht_params);
+}
+
 struct objagg {
        const struct objagg_ops *ops;
        void *priv;
@@ -18,6 +47,8 @@ struct objagg {
        struct rhashtable_params ht_params;
        struct list_head obj_list;
        unsigned int obj_count;
+       struct ida root_ida;
+       struct objagg_hints *hints;
 };
 
 struct objagg_obj {
@@ -30,6 +61,7 @@ struct objagg_obj {
                void *delta_priv; /* user delta private */
                void *root_priv; /* user root private */
        };
+       unsigned int root_id;
        unsigned int refcount; /* counts number of users of this object
                                * including nested objects
                                */
@@ -130,7 +162,8 @@ static struct objagg_obj *objagg_obj_lookup(struct objagg *objagg, void *obj)
 
 static int objagg_obj_parent_assign(struct objagg *objagg,
                                    struct objagg_obj *objagg_obj,
-                                   struct objagg_obj *parent)
+                                   struct objagg_obj *parent,
+                                   bool take_parent_ref)
 {
        void *delta_priv;
 
@@ -144,7 +177,8 @@ static int objagg_obj_parent_assign(struct objagg *objagg,
         */
        objagg_obj->parent = parent;
        objagg_obj->delta_priv = delta_priv;
-       objagg_obj_ref_inc(objagg_obj->parent);
+       if (take_parent_ref)
+               objagg_obj_ref_inc(objagg_obj->parent);
        trace_objagg_obj_parent_assign(objagg, objagg_obj,
                                       parent,
                                       parent->refcount);
@@ -164,7 +198,7 @@ static int objagg_obj_parent_lookup_assign(struct objagg *objagg,
                if (!objagg_obj_is_root(objagg_obj_cur))
                        continue;
                err = objagg_obj_parent_assign(objagg, objagg_obj,
-                                              objagg_obj_cur);
+                                              objagg_obj_cur, true);
                if (!err)
                        return 0;
        }
@@ -184,16 +218,68 @@ static void objagg_obj_parent_unassign(struct objagg *objagg,
        __objagg_obj_put(objagg, objagg_obj->parent);
 }
 
+static int objagg_obj_root_id_alloc(struct objagg *objagg,
+                                   struct objagg_obj *objagg_obj,
+                                   struct objagg_hints_node *hnode)
+{
+       unsigned int min, max;
+       int root_id;
+
+       /* In case there are no hints available, the root id is invalid. */
+       if (!objagg->hints) {
+               objagg_obj->root_id = OBJAGG_OBJ_ROOT_ID_INVALID;
+               return 0;
+       }
+
+       if (hnode) {
+               min = hnode->root_id;
+               max = hnode->root_id;
+       } else {
+               /* For objects with no hint, start after the last
+                * hinted root_id.
+                */
+               min = objagg->hints->root_count;
+               max = ~0;
+       }
+
+       root_id = ida_alloc_range(&objagg->root_ida, min, max, GFP_KERNEL);
+
+       if (root_id < 0)
+               return root_id;
+       objagg_obj->root_id = root_id;
+       return 0;
+}
+
+static void objagg_obj_root_id_free(struct objagg *objagg,
+                                   struct objagg_obj *objagg_obj)
+{
+       if (!objagg->hints)
+               return;
+       ida_free(&objagg->root_ida, objagg_obj->root_id);
+}
+
 static int objagg_obj_root_create(struct objagg *objagg,
-                                 struct objagg_obj *objagg_obj)
+                                 struct objagg_obj *objagg_obj,
+                                 struct objagg_hints_node *hnode)
 {
-       objagg_obj->root_priv = objagg->ops->root_create(objagg->priv,
-                                                        objagg_obj->obj);
-       if (IS_ERR(objagg_obj->root_priv))
-               return PTR_ERR(objagg_obj->root_priv);
+       int err;
 
+       err = objagg_obj_root_id_alloc(objagg, objagg_obj, hnode);
+       if (err)
+               return err;
+       objagg_obj->root_priv = objagg->ops->root_create(objagg->priv,
+                                                        objagg_obj->obj,
+                                                        objagg_obj->root_id);
+       if (IS_ERR(objagg_obj->root_priv)) {
+               err = PTR_ERR(objagg_obj->root_priv);
+               goto err_root_create;
+       }
        trace_objagg_obj_root_create(objagg, objagg_obj);
        return 0;
+
+err_root_create:
+       objagg_obj_root_id_free(objagg, objagg_obj);
+       return err;
 }
 
 static void objagg_obj_root_destroy(struct objagg *objagg,
@@ -201,19 +287,69 @@ static void objagg_obj_root_destroy(struct objagg *objagg,
 {
        trace_objagg_obj_root_destroy(objagg, objagg_obj);
        objagg->ops->root_destroy(objagg->priv, objagg_obj->root_priv);
+       objagg_obj_root_id_free(objagg, objagg_obj);
+}
+
+static struct objagg_obj *__objagg_obj_get(struct objagg *objagg, void *obj);
+
+static int objagg_obj_init_with_hints(struct objagg *objagg,
+                                     struct objagg_obj *objagg_obj,
+                                     bool *hint_found)
+{
+       struct objagg_hints_node *hnode;
+       struct objagg_obj *parent;
+       int err;
+
+       hnode = objagg_hints_lookup(objagg->hints, objagg_obj->obj);
+       if (!hnode) {
+               *hint_found = false;
+               return 0;
+       }
+       *hint_found = true;
+
+       if (!hnode->parent)
+               return objagg_obj_root_create(objagg, objagg_obj, hnode);
+
+       parent = __objagg_obj_get(objagg, hnode->parent->obj);
+       if (IS_ERR(parent))
+               return PTR_ERR(parent);
+
+       err = objagg_obj_parent_assign(objagg, objagg_obj, parent, false);
+       if (err) {
+               *hint_found = false;
+               err = 0;
+               goto err_parent_assign;
+       }
+
+       return 0;
+
+err_parent_assign:
+       objagg_obj_put(objagg, parent);
+       return err;
 }
 
 static int objagg_obj_init(struct objagg *objagg,
                           struct objagg_obj *objagg_obj)
 {
+       bool hint_found;
        int err;
 
+       /* First, try to use hints if they are available and
+        * if they provide result.
+        */
+       err = objagg_obj_init_with_hints(objagg, objagg_obj, &hint_found);
+       if (err)
+               return err;
+
+       if (hint_found)
+               return 0;
+
        /* Try to find if the object can be aggregated under an existing one. */
        err = objagg_obj_parent_lookup_assign(objagg, objagg_obj);
        if (!err)
                return 0;
        /* If aggregation is not possible, make the object a root. */
-       return objagg_obj_root_create(objagg, objagg_obj);
+       return objagg_obj_root_create(objagg, objagg_obj, NULL);
 }
 
 static void objagg_obj_fini(struct objagg *objagg,
@@ -349,8 +485,9 @@ EXPORT_SYMBOL(objagg_obj_put);
 
 /**
  * objagg_create - creates a new objagg instance
- * @ops:       user-specific callbacks
- * @priv:      pointer to a private data passed to the ops
+ * @ops:               user-specific callbacks
+ * @objagg_hints:      hints, can be NULL
+ * @priv:              pointer to a private data passed to the ops
  *
  * Note: all locking must be provided by the caller.
  *
@@ -374,18 +511,25 @@ EXPORT_SYMBOL(objagg_obj_put);
  * Returns a pointer to newly created objagg instance in case of success,
  * otherwise it returns pointer error using ERR_PTR macro.
  */
-struct objagg *objagg_create(const struct objagg_ops *ops, void *priv)
+struct objagg *objagg_create(const struct objagg_ops *ops,
+                            struct objagg_hints *objagg_hints, void *priv)
 {
        struct objagg *objagg;
        int err;
 
        if (WARN_ON(!ops || !ops->root_create || !ops->root_destroy ||
-                   !ops->delta_create || !ops->delta_destroy))
+                   !ops->delta_check || !ops->delta_create ||
+                   !ops->delta_destroy))
                return ERR_PTR(-EINVAL);
+
        objagg = kzalloc(sizeof(*objagg), GFP_KERNEL);
        if (!objagg)
                return ERR_PTR(-ENOMEM);
        objagg->ops = ops;
+       if (objagg_hints) {
+               objagg->hints = objagg_hints;
+               objagg_hints->refcount++;
+       }
        objagg->priv = priv;
        INIT_LIST_HEAD(&objagg->obj_list);
 
@@ -397,6 +541,8 @@ struct objagg *objagg_create(const struct objagg_ops *ops, void *priv)
        if (err)
                goto err_rhashtable_init;
 
+       ida_init(&objagg->root_ida);
+
        trace_objagg_create(objagg);
        return objagg;
 
@@ -415,8 +561,11 @@ EXPORT_SYMBOL(objagg_create);
 void objagg_destroy(struct objagg *objagg)
 {
        trace_objagg_destroy(objagg);
+       ida_destroy(&objagg->root_ida);
        WARN_ON(!list_empty(&objagg->obj_list));
        rhashtable_destroy(&objagg->obj_ht);
+       if (objagg->hints)
+               objagg_hints_put(objagg->hints);
        kfree(objagg);
 }
 EXPORT_SYMBOL(objagg_destroy);
@@ -472,6 +621,8 @@ const struct objagg_stats *objagg_stats_get(struct objagg *objagg)
                objagg_stats->stats_info[i].objagg_obj = objagg_obj;
                objagg_stats->stats_info[i].is_root =
                                        objagg_obj_is_root(objagg_obj);
+               if (objagg_stats->stats_info[i].is_root)
+                       objagg_stats->root_count++;
                i++;
        }
        objagg_stats->stats_info_count = i;
@@ -485,7 +636,7 @@ const struct objagg_stats *objagg_stats_get(struct objagg *objagg)
 EXPORT_SYMBOL(objagg_stats_get);
 
 /**
- * objagg_stats_puts - puts stats of the objagg instance
+ * objagg_stats_put - puts stats of the objagg instance
  * @objagg_stats:      objagg instance stats
  *
  * Note: all locking must be provided by the caller.
@@ -496,6 +647,410 @@ void objagg_stats_put(const struct objagg_stats *objagg_stats)
 }
 EXPORT_SYMBOL(objagg_stats_put);
 
+static struct objagg_hints_node *
+objagg_hints_node_create(struct objagg_hints *objagg_hints,
+                        struct objagg_obj *objagg_obj, size_t obj_size,
+                        struct objagg_hints_node *parent_hnode)
+{
+       unsigned int user_count = objagg_obj->stats.user_count;
+       struct objagg_hints_node *hnode;
+       int err;
+
+       hnode = kzalloc(sizeof(*hnode) + obj_size, GFP_KERNEL);
+       if (!hnode)
+               return ERR_PTR(-ENOMEM);
+       memcpy(hnode->obj, &objagg_obj->obj, obj_size);
+       hnode->stats_info.stats.user_count = user_count;
+       hnode->stats_info.stats.delta_user_count = user_count;
+       if (parent_hnode) {
+               parent_hnode->stats_info.stats.delta_user_count += user_count;
+       } else {
+               hnode->root_id = objagg_hints->root_count++;
+               hnode->stats_info.is_root = true;
+       }
+       hnode->stats_info.objagg_obj = objagg_obj;
+
+       err = rhashtable_insert_fast(&objagg_hints->node_ht, &hnode->ht_node,
+                                    objagg_hints->ht_params);
+       if (err)
+               goto err_ht_insert;
+
+       list_add(&hnode->list, &objagg_hints->node_list);
+       hnode->parent = parent_hnode;
+       objagg_hints->node_count++;
+
+       return hnode;
+
+err_ht_insert:
+       kfree(hnode);
+       return ERR_PTR(err);
+}
+
+static void objagg_hints_flush(struct objagg_hints *objagg_hints)
+{
+       struct objagg_hints_node *hnode, *tmp;
+
+       list_for_each_entry_safe(hnode, tmp, &objagg_hints->node_list, list) {
+               list_del(&hnode->list);
+               rhashtable_remove_fast(&objagg_hints->node_ht, &hnode->ht_node,
+                                      objagg_hints->ht_params);
+               kfree(hnode);
+       }
+}
+
+struct objagg_tmp_node {
+       struct objagg_obj *objagg_obj;
+       bool crossed_out;
+};
+
+struct objagg_tmp_graph {
+       struct objagg_tmp_node *nodes;
+       unsigned long nodes_count;
+       unsigned long *edges;
+};
+
+static int objagg_tmp_graph_edge_index(struct objagg_tmp_graph *graph,
+                                      int parent_index, int index)
+{
+       return index * graph->nodes_count + parent_index;
+}
+
+static void objagg_tmp_graph_edge_set(struct objagg_tmp_graph *graph,
+                                     int parent_index, int index)
+{
+       int edge_index = objagg_tmp_graph_edge_index(graph, index,
+                                                    parent_index);
+
+       __set_bit(edge_index, graph->edges);
+}
+
+static bool objagg_tmp_graph_is_edge(struct objagg_tmp_graph *graph,
+                                    int parent_index, int index)
+{
+       int edge_index = objagg_tmp_graph_edge_index(graph, index,
+                                                    parent_index);
+
+       return test_bit(edge_index, graph->edges);
+}
+
+static unsigned int objagg_tmp_graph_node_weight(struct objagg_tmp_graph *graph,
+                                                unsigned int index)
+{
+       struct objagg_tmp_node *node = &graph->nodes[index];
+       unsigned int weight = node->objagg_obj->stats.user_count;
+       int j;
+
+       /* Node weight is sum of node users and all other nodes users
+        * that this node can represent with delta.
+        */
+
+       for (j = 0; j < graph->nodes_count; j++) {
+               if (!objagg_tmp_graph_is_edge(graph, index, j))
+                       continue;
+               node = &graph->nodes[j];
+               if (node->crossed_out)
+                       continue;
+               weight += node->objagg_obj->stats.user_count;
+       }
+       return weight;
+}
+
+static int objagg_tmp_graph_node_max_weight(struct objagg_tmp_graph *graph)
+{
+       struct objagg_tmp_node *node;
+       unsigned int max_weight = 0;
+       unsigned int weight;
+       int max_index = -1;
+       int i;
+
+       for (i = 0; i < graph->nodes_count; i++) {
+               node = &graph->nodes[i];
+               if (node->crossed_out)
+                       continue;
+               weight = objagg_tmp_graph_node_weight(graph, i);
+               if (weight >= max_weight) {
+                       max_weight = weight;
+                       max_index = i;
+               }
+       }
+       return max_index;
+}
+
+static struct objagg_tmp_graph *objagg_tmp_graph_create(struct objagg *objagg)
+{
+       unsigned int nodes_count = objagg->obj_count;
+       struct objagg_tmp_graph *graph;
+       struct objagg_tmp_node *node;
+       struct objagg_tmp_node *pnode;
+       struct objagg_obj *objagg_obj;
+       size_t alloc_size;
+       int i, j;
+
+       graph = kzalloc(sizeof(*graph), GFP_KERNEL);
+       if (!graph)
+               return NULL;
+
+       graph->nodes = kcalloc(nodes_count, sizeof(*graph->nodes), GFP_KERNEL);
+       if (!graph->nodes)
+               goto err_nodes_alloc;
+       graph->nodes_count = nodes_count;
+
+       alloc_size = BITS_TO_LONGS(nodes_count * nodes_count) *
+                    sizeof(unsigned long);
+       graph->edges = kzalloc(alloc_size, GFP_KERNEL);
+       if (!graph->edges)
+               goto err_edges_alloc;
+
+       i = 0;
+       list_for_each_entry(objagg_obj, &objagg->obj_list, list) {
+               node = &graph->nodes[i++];
+               node->objagg_obj = objagg_obj;
+       }
+
+       /* Assemble a temporary graph. Insert edge X->Y in case Y can be
+        * in delta of X.
+        */
+       for (i = 0; i < nodes_count; i++) {
+               for (j = 0; j < nodes_count; j++) {
+                       if (i == j)
+                               continue;
+                       pnode = &graph->nodes[i];
+                       node = &graph->nodes[j];
+                       if (objagg->ops->delta_check(objagg->priv,
+                                                    pnode->objagg_obj->obj,
+                                                    node->objagg_obj->obj)) {
+                               objagg_tmp_graph_edge_set(graph, i, j);
+
+                       }
+               }
+       }
+       return graph;
+
+err_edges_alloc:
+       kfree(graph->nodes);
+err_nodes_alloc:
+       kfree(graph);
+       return NULL;
+}
+
+static void objagg_tmp_graph_destroy(struct objagg_tmp_graph *graph)
+{
+       kfree(graph->edges);
+       kfree(graph->nodes);
+       kfree(graph);
+}
+
+static int
+objagg_opt_simple_greedy_fillup_hints(struct objagg_hints *objagg_hints,
+                                     struct objagg *objagg)
+{
+       struct objagg_hints_node *hnode, *parent_hnode;
+       struct objagg_tmp_graph *graph;
+       struct objagg_tmp_node *node;
+       int index;
+       int j;
+       int err;
+
+       graph = objagg_tmp_graph_create(objagg);
+       if (!graph)
+               return -ENOMEM;
+
+       /* Find the nodes from the ones that can accommodate most users
+        * and cross them out of the graph. Save them to the hint list.
+        */
+       while ((index = objagg_tmp_graph_node_max_weight(graph)) != -1) {
+               node = &graph->nodes[index];
+               node->crossed_out = true;
+               hnode = objagg_hints_node_create(objagg_hints,
+                                                node->objagg_obj,
+                                                objagg->ops->obj_size,
+                                                NULL);
+               if (IS_ERR(hnode)) {
+                       err = PTR_ERR(hnode);
+                       goto out;
+               }
+               parent_hnode = hnode;
+               for (j = 0; j < graph->nodes_count; j++) {
+                       if (!objagg_tmp_graph_is_edge(graph, index, j))
+                               continue;
+                       node = &graph->nodes[j];
+                       if (node->crossed_out)
+                               continue;
+                       node->crossed_out = true;
+                       hnode = objagg_hints_node_create(objagg_hints,
+                                                        node->objagg_obj,
+                                                        objagg->ops->obj_size,
+                                                        parent_hnode);
+                       if (IS_ERR(hnode)) {
+                               err = PTR_ERR(hnode);
+                               goto out;
+                       }
+               }
+       }
+
+       err = 0;
+out:
+       objagg_tmp_graph_destroy(graph);
+       return err;
+}
+
+struct objagg_opt_algo {
+       int (*fillup_hints)(struct objagg_hints *objagg_hints,
+                           struct objagg *objagg);
+};
+
+static const struct objagg_opt_algo objagg_opt_simple_greedy = {
+       .fillup_hints = objagg_opt_simple_greedy_fillup_hints,
+};
+
+
+static const struct objagg_opt_algo *objagg_opt_algos[] = {
+       [OBJAGG_OPT_ALGO_SIMPLE_GREEDY] = &objagg_opt_simple_greedy,
+};
+
+static int objagg_hints_obj_cmp(struct rhashtable_compare_arg *arg,
+                               const void *obj)
+{
+       struct rhashtable *ht = arg->ht;
+       struct objagg_hints *objagg_hints =
+                       container_of(ht, struct objagg_hints, node_ht);
+       const struct objagg_ops *ops = objagg_hints->ops;
+       const char *ptr = obj;
+
+       ptr += ht->p.key_offset;
+       return ops->hints_obj_cmp ? ops->hints_obj_cmp(ptr, arg->key) :
+                                   memcmp(ptr, arg->key, ht->p.key_len);
+}
+
+/**
+ * objagg_hints_get - obtains hints instance
+ * @objagg:            objagg instance
+ * @opt_algo_type:     type of hints finding algorithm
+ *
+ * Note: all locking must be provided by the caller.
+ *
+ * According to the algo type, the existing objects of objagg instance
+ * are going to be went-through to assemble an optimal tree. We call this
+ * tree hints. These hints can be later on used for creation of
+ * a new objagg instance. There, the future object creations are going
+ * to be consulted with these hints in order to find out, where exactly
+ * the new object should be put as a root or delta.
+ *
+ * Returns a pointer to hints instance in case of success,
+ * otherwise it returns pointer error using ERR_PTR macro.
+ */
+struct objagg_hints *objagg_hints_get(struct objagg *objagg,
+                                     enum objagg_opt_algo_type opt_algo_type)
+{
+       const struct objagg_opt_algo *algo = objagg_opt_algos[opt_algo_type];
+       struct objagg_hints *objagg_hints;
+       int err;
+
+       objagg_hints = kzalloc(sizeof(*objagg_hints), GFP_KERNEL);
+       if (!objagg_hints)
+               return ERR_PTR(-ENOMEM);
+
+       objagg_hints->ops = objagg->ops;
+       objagg_hints->refcount = 1;
+
+       INIT_LIST_HEAD(&objagg_hints->node_list);
+
+       objagg_hints->ht_params.key_len = objagg->ops->obj_size;
+       objagg_hints->ht_params.key_offset =
+                               offsetof(struct objagg_hints_node, obj);
+       objagg_hints->ht_params.head_offset =
+                               offsetof(struct objagg_hints_node, ht_node);
+       objagg_hints->ht_params.obj_cmpfn = objagg_hints_obj_cmp;
+
+       err = rhashtable_init(&objagg_hints->node_ht, &objagg_hints->ht_params);
+       if (err)
+               goto err_rhashtable_init;
+
+       err = algo->fillup_hints(objagg_hints, objagg);
+       if (err)
+               goto err_fillup_hints;
+
+       if (WARN_ON(objagg_hints->node_count != objagg->obj_count)) {
+               err = -EINVAL;
+               goto err_node_count_check;
+       }
+
+       return objagg_hints;
+
+err_node_count_check:
+err_fillup_hints:
+       objagg_hints_flush(objagg_hints);
+       rhashtable_destroy(&objagg_hints->node_ht);
+err_rhashtable_init:
+       kfree(objagg_hints);
+       return ERR_PTR(err);
+}
+EXPORT_SYMBOL(objagg_hints_get);
+
+/**
+ * objagg_hints_put - puts hints instance
+ * @objagg_hints:      objagg hints instance
+ *
+ * Note: all locking must be provided by the caller.
+ */
+void objagg_hints_put(struct objagg_hints *objagg_hints)
+{
+       if (--objagg_hints->refcount)
+               return;
+       objagg_hints_flush(objagg_hints);
+       rhashtable_destroy(&objagg_hints->node_ht);
+       kfree(objagg_hints);
+}
+EXPORT_SYMBOL(objagg_hints_put);
+
+/**
+ * objagg_hints_stats_get - obtains stats of the hints instance
+ * @objagg_hints:      hints instance
+ *
+ * Note: all locking must be provided by the caller.
+ *
+ * The returned structure contains statistics of all objects
+ * currently in use, ordered by following rules:
+ * 1) Root objects are always on lower indexes than the rest.
+ * 2) Objects with higher delta user count are always on lower
+ *    indexes.
+ * 3) In case multiple objects have the same delta user count,
+ *    the objects are ordered by user count.
+ *
+ * Returns a pointer to stats instance in case of success,
+ * otherwise it returns pointer error using ERR_PTR macro.
+ */
+const struct objagg_stats *
+objagg_hints_stats_get(struct objagg_hints *objagg_hints)
+{
+       struct objagg_stats *objagg_stats;
+       struct objagg_hints_node *hnode;
+       int i;
+
+       objagg_stats = kzalloc(struct_size(objagg_stats, stats_info,
+                                          objagg_hints->node_count),
+                              GFP_KERNEL);
+       if (!objagg_stats)
+               return ERR_PTR(-ENOMEM);
+
+       i = 0;
+       list_for_each_entry(hnode, &objagg_hints->node_list, list) {
+               memcpy(&objagg_stats->stats_info[i], &hnode->stats_info,
+                      sizeof(objagg_stats->stats_info[0]));
+               if (objagg_stats->stats_info[i].is_root)
+                       objagg_stats->root_count++;
+               i++;
+       }
+       objagg_stats->stats_info_count = i;
+
+       sort(objagg_stats->stats_info, objagg_stats->stats_info_count,
+            sizeof(struct objagg_obj_stats_info),
+            objagg_stats_info_sort_cmp_func, NULL);
+
+       return objagg_stats;
+}
+EXPORT_SYMBOL(objagg_hints_stats_get);
+
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_AUTHOR("Jiri Pirko <jiri@mellanox.com>");
 MODULE_DESCRIPTION("Object aggregation manager");
index d82d022..9cf7762 100644 (file)
@@ -632,7 +632,7 @@ static void __kmod_config_free(struct test_config *config)
        config->test_driver = NULL;
 
        kfree_const(config->test_fs);
-       config->test_driver = NULL;
+       config->test_fs = NULL;
 }
 
 static void kmod_config_free(struct kmod_test_device *test_dev)
index ab57144..72c1abf 100644 (file)
@@ -87,6 +87,15 @@ static void world_obj_put(struct world *world, struct objagg *objagg,
 
 #define MAX_KEY_ID_DIFF 5
 
+static bool delta_check(void *priv, const void *parent_obj, const void *obj)
+{
+       const struct tokey *parent_key = parent_obj;
+       const struct tokey *key = obj;
+       int diff = key->id - parent_key->id;
+
+       return diff >= 0 && diff <= MAX_KEY_ID_DIFF;
+}
+
 static void *delta_create(void *priv, void *parent_obj, void *obj)
 {
        struct tokey *parent_key = parent_obj;
@@ -95,7 +104,7 @@ static void *delta_create(void *priv, void *parent_obj, void *obj)
        int diff = key->id - parent_key->id;
        struct delta *delta;
 
-       if (diff < 0 || diff > MAX_KEY_ID_DIFF)
+       if (!delta_check(priv, parent_obj, obj))
                return ERR_PTR(-EINVAL);
 
        delta = kzalloc(sizeof(*delta), GFP_KERNEL);
@@ -115,7 +124,7 @@ static void delta_destroy(void *priv, void *delta_priv)
        kfree(delta);
 }
 
-static void *root_create(void *priv, void *obj)
+static void *root_create(void *priv, void *obj, unsigned int id)
 {
        struct world *world = priv;
        struct tokey *key = obj;
@@ -268,6 +277,12 @@ stats_put:
        return err;
 }
 
+static bool delta_check_dummy(void *priv, const void *parent_obj,
+                             const void *obj)
+{
+       return false;
+}
+
 static void *delta_create_dummy(void *priv, void *parent_obj, void *obj)
 {
        return ERR_PTR(-EOPNOTSUPP);
@@ -279,6 +294,7 @@ static void delta_destroy_dummy(void *priv, void *delta_priv)
 
 static const struct objagg_ops nodelta_ops = {
        .obj_size = sizeof(struct tokey),
+       .delta_check = delta_check_dummy,
        .delta_create = delta_create_dummy,
        .delta_destroy = delta_destroy_dummy,
        .root_create = root_create,
@@ -292,7 +308,7 @@ static int test_nodelta(void)
        int i;
        int err;
 
-       objagg = objagg_create(&nodelta_ops, &world);
+       objagg = objagg_create(&nodelta_ops, NULL, &world);
        if (IS_ERR(objagg))
                return PTR_ERR(objagg);
 
@@ -357,6 +373,7 @@ err_stats_second_zero:
 
 static const struct objagg_ops delta_ops = {
        .obj_size = sizeof(struct tokey),
+       .delta_check = delta_check,
        .delta_create = delta_create,
        .delta_destroy = delta_destroy,
        .root_create = root_create,
@@ -728,8 +745,10 @@ static int check_expect_stats(struct objagg *objagg,
        int err;
 
        stats = objagg_stats_get(objagg);
-       if (IS_ERR(stats))
+       if (IS_ERR(stats)) {
+               *errmsg = "objagg_stats_get() failed.";
                return PTR_ERR(stats);
+       }
        err = __check_expect_stats(stats, expect_stats, errmsg);
        objagg_stats_put(stats);
        return err;
@@ -769,7 +788,6 @@ static int test_delta_action_item(struct world *world,
        if (err)
                goto errout;
 
-       errmsg = NULL;
        err = check_expect_stats(objagg, &action_item->expect_stats, &errmsg);
        if (err) {
                pr_err("Key %u: Stats: %s\n", action_item->key_id, errmsg);
@@ -793,7 +811,7 @@ static int test_delta(void)
        int i;
        int err;
 
-       objagg = objagg_create(&delta_ops, &world);
+       objagg = objagg_create(&delta_ops, NULL, &world);
        if (IS_ERR(objagg))
                return PTR_ERR(objagg);
 
@@ -815,6 +833,170 @@ err_do_action_item:
        return err;
 }
 
+struct hints_case {
+       const unsigned int *key_ids;
+       size_t key_ids_count;
+       struct expect_stats expect_stats;
+       struct expect_stats expect_stats_hints;
+};
+
+static const unsigned int hints_case_key_ids[] = {
+       1, 7, 3, 5, 3, 1, 30, 8, 8, 5, 6, 8,
+};
+
+static const struct hints_case hints_case = {
+       .key_ids = hints_case_key_ids,
+       .key_ids_count = ARRAY_SIZE(hints_case_key_ids),
+       .expect_stats =
+               EXPECT_STATS(7, ROOT(1, 2, 7), ROOT(7, 1, 4), ROOT(30, 1, 1),
+                               DELTA(8, 3), DELTA(3, 2),
+                               DELTA(5, 2), DELTA(6, 1)),
+       .expect_stats_hints =
+               EXPECT_STATS(7, ROOT(3, 2, 9), ROOT(1, 2, 2), ROOT(30, 1, 1),
+                               DELTA(8, 3), DELTA(5, 2),
+                               DELTA(6, 1), DELTA(7, 1)),
+};
+
+static void __pr_debug_stats(const struct objagg_stats *stats)
+{
+       int i;
+
+       for (i = 0; i < stats->stats_info_count; i++)
+               pr_debug("Stat index %d key %u: u %d, d %d, %s\n", i,
+                        obj_to_key_id(stats->stats_info[i].objagg_obj),
+                        stats->stats_info[i].stats.user_count,
+                        stats->stats_info[i].stats.delta_user_count,
+                        stats->stats_info[i].is_root ? "root" : "noroot");
+}
+
+static void pr_debug_stats(struct objagg *objagg)
+{
+       const struct objagg_stats *stats;
+
+       stats = objagg_stats_get(objagg);
+       if (IS_ERR(stats))
+               return;
+       __pr_debug_stats(stats);
+       objagg_stats_put(stats);
+}
+
+static void pr_debug_hints_stats(struct objagg_hints *objagg_hints)
+{
+       const struct objagg_stats *stats;
+
+       stats = objagg_hints_stats_get(objagg_hints);
+       if (IS_ERR(stats))
+               return;
+       __pr_debug_stats(stats);
+       objagg_stats_put(stats);
+}
+
+static int check_expect_hints_stats(struct objagg_hints *objagg_hints,
+                                   const struct expect_stats *expect_stats,
+                                   const char **errmsg)
+{
+       const struct objagg_stats *stats;
+       int err;
+
+       stats = objagg_hints_stats_get(objagg_hints);
+       if (IS_ERR(stats))
+               return PTR_ERR(stats);
+       err = __check_expect_stats(stats, expect_stats, errmsg);
+       objagg_stats_put(stats);
+       return err;
+}
+
+static int test_hints_case(const struct hints_case *hints_case)
+{
+       struct objagg_obj *objagg_obj;
+       struct objagg_hints *hints;
+       struct world world2 = {};
+       struct world world = {};
+       struct objagg *objagg2;
+       struct objagg *objagg;
+       const char *errmsg;
+       int i;
+       int err;
+
+       objagg = objagg_create(&delta_ops, NULL, &world);
+       if (IS_ERR(objagg))
+               return PTR_ERR(objagg);
+
+       for (i = 0; i < hints_case->key_ids_count; i++) {
+               objagg_obj = world_obj_get(&world, objagg,
+                                          hints_case->key_ids[i]);
+               if (IS_ERR(objagg_obj)) {
+                       err = PTR_ERR(objagg_obj);
+                       goto err_world_obj_get;
+               }
+       }
+
+       pr_debug_stats(objagg);
+       err = check_expect_stats(objagg, &hints_case->expect_stats, &errmsg);
+       if (err) {
+               pr_err("Stats: %s\n", errmsg);
+               goto err_check_expect_stats;
+       }
+
+       hints = objagg_hints_get(objagg, OBJAGG_OPT_ALGO_SIMPLE_GREEDY);
+       if (IS_ERR(hints)) {
+               err = PTR_ERR(hints);
+               goto err_hints_get;
+       }
+
+       pr_debug_hints_stats(hints);
+       err = check_expect_hints_stats(hints, &hints_case->expect_stats_hints,
+                                      &errmsg);
+       if (err) {
+               pr_err("Hints stats: %s\n", errmsg);
+               goto err_check_expect_hints_stats;
+       }
+
+       objagg2 = objagg_create(&delta_ops, hints, &world2);
+       if (IS_ERR(objagg2))
+               return PTR_ERR(objagg2);
+
+       for (i = 0; i < hints_case->key_ids_count; i++) {
+               objagg_obj = world_obj_get(&world2, objagg2,
+                                          hints_case->key_ids[i]);
+               if (IS_ERR(objagg_obj)) {
+                       err = PTR_ERR(objagg_obj);
+                       goto err_world2_obj_get;
+               }
+       }
+
+       pr_debug_stats(objagg2);
+       err = check_expect_stats(objagg2, &hints_case->expect_stats_hints,
+                                &errmsg);
+       if (err) {
+               pr_err("Stats2: %s\n", errmsg);
+               goto err_check_expect_stats2;
+       }
+
+       err = 0;
+
+err_check_expect_stats2:
+err_world2_obj_get:
+       for (i--; i >= 0; i--)
+               world_obj_put(&world2, objagg, hints_case->key_ids[i]);
+       objagg_hints_put(hints);
+       objagg_destroy(objagg2);
+       i = hints_case->key_ids_count;
+err_check_expect_hints_stats:
+err_hints_get:
+err_check_expect_stats:
+err_world_obj_get:
+       for (i--; i >= 0; i--)
+               world_obj_put(&world, objagg, hints_case->key_ids[i]);
+
+       objagg_destroy(objagg);
+       return err;
+}
+static int test_hints(void)
+{
+       return test_hints_case(&hints_case);
+}
+
 static int __init test_objagg_init(void)
 {
        int err;
@@ -822,7 +1004,10 @@ static int __init test_objagg_init(void)
        err = test_nodelta();
        if (err)
                return err;
-       return test_delta();
+       err = test_delta();
+       if (err)
+               return err;
+       return test_hints();
 }
 
 static void __exit test_objagg_exit(void)
index 6a8ac76..e52f8ca 100644 (file)
@@ -541,38 +541,45 @@ static unsigned int __init print_ht(struct rhltable *rhlt)
 static int __init test_insert_dup(struct test_obj_rhl *rhl_test_objects,
                                  int cnt, bool slow)
 {
-       struct rhltable rhlt;
+       struct rhltable *rhlt;
        unsigned int i, ret;
        const char *key;
        int err = 0;
 
-       err = rhltable_init(&rhlt, &test_rht_params_dup);
-       if (WARN_ON(err))
+       rhlt = kmalloc(sizeof(*rhlt), GFP_KERNEL);
+       if (WARN_ON(!rhlt))
+               return -EINVAL;
+
+       err = rhltable_init(rhlt, &test_rht_params_dup);
+       if (WARN_ON(err)) {
+               kfree(rhlt);
                return err;
+       }
 
        for (i = 0; i < cnt; i++) {
                rhl_test_objects[i].value.tid = i;
-               key = rht_obj(&rhlt.ht, &rhl_test_objects[i].list_node.rhead);
+               key = rht_obj(&rhlt->ht, &rhl_test_objects[i].list_node.rhead);
                key += test_rht_params_dup.key_offset;
 
                if (slow) {
-                       err = PTR_ERR(rhashtable_insert_slow(&rhlt.ht, key,
+                       err = PTR_ERR(rhashtable_insert_slow(&rhlt->ht, key,
                                                             &rhl_test_objects[i].list_node.rhead));
                        if (err == -EAGAIN)
                                err = 0;
                } else
-                       err = rhltable_insert(&rhlt,
+                       err = rhltable_insert(rhlt,
                                              &rhl_test_objects[i].list_node,
                                              test_rht_params_dup);
                if (WARN(err, "error %d on element %d/%d (%s)\n", err, i, cnt, slow? "slow" : "fast"))
                        goto skip_print;
        }
 
-       ret = print_ht(&rhlt);
+       ret = print_ht(rhlt);
        WARN(ret != cnt, "missing rhltable elements (%d != %d, %s)\n", ret, cnt, slow? "slow" : "fast");
 
 skip_print:
-       rhltable_destroy(&rhlt);
+       rhltable_destroy(rhlt);
+       kfree(rhlt);
 
        return 0;
 }
index 05acd7e..7502964 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1674,7 +1674,8 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
                if (!pmd_present(pmd))
                        return 0;
 
-               if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd))) {
+               if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd) ||
+                            pmd_devmap(pmd))) {
                        /*
                         * NUMA hinting faults need to be handled in the GUP
                         * slowpath for accounting purposes and so that they
index df2e7dd..afef616 100644 (file)
@@ -4268,7 +4268,8 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
                                break;
                        }
                        if (ret & VM_FAULT_RETRY) {
-                               if (nonblocking)
+                               if (nonblocking &&
+                                   !(fault_flags & FAULT_FLAG_RETRY_NOWAIT))
                                        *nonblocking = 0;
                                *nr_pages = 0;
                                /*
index 0a14fcf..e2bb06c 100644 (file)
@@ -5,6 +5,7 @@ UBSAN_SANITIZE_generic.o := n
 UBSAN_SANITIZE_tags.o := n
 KCOV_INSTRUMENT := n
 
+CFLAGS_REMOVE_common.o = -pg
 CFLAGS_REMOVE_generic.o = -pg
 # Function splitter causes unnecessary splits in __asan_load1/__asan_store1
 # see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63533
index 7c72f2a..831be5f 100644 (file)
@@ -372,7 +372,8 @@ static void kill_procs(struct list_head *to_kill, int forcekill, bool fail,
                        if (fail || tk->addr_valid == 0) {
                                pr_err("Memory failure: %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n",
                                       pfn, tk->tsk->comm, tk->tsk->pid);
-                               force_sig(SIGKILL, tk->tsk);
+                               do_send_sig_info(SIGKILL, SEND_SIG_PRIV,
+                                                tk->tsk, PIDTYPE_PID);
                        }
 
                        /*
index b9a667d..124e794 100644 (file)
@@ -1233,7 +1233,8 @@ static bool is_pageblock_removable_nolock(struct page *page)
 bool is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages)
 {
        struct page *page = pfn_to_page(start_pfn);
-       struct page *end_page = page + nr_pages;
+       unsigned long end_pfn = min(start_pfn + nr_pages, zone_end_pfn(page_zone(page)));
+       struct page *end_page = pfn_to_page(end_pfn);
 
        /* Check the starting page of each pageblock within the range */
        for (; page < end_page; page = next_active_pageblock(page)) {
@@ -1273,6 +1274,9 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn,
                                i++;
                        if (i == MAX_ORDER_NR_PAGES || pfn + i >= end_pfn)
                                continue;
+                       /* Check if we got outside of the zone */
+                       if (zone && !zone_spans_pfn(zone, pfn + i))
+                               return 0;
                        page = pfn_to_page(pfn + i);
                        if (zone && page_zone(page) != zone)
                                return 0;
@@ -1301,23 +1305,27 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn,
 static unsigned long scan_movable_pages(unsigned long start, unsigned long end)
 {
        unsigned long pfn;
-       struct page *page;
+
        for (pfn = start; pfn < end; pfn++) {
-               if (pfn_valid(pfn)) {
-                       page = pfn_to_page(pfn);
-                       if (PageLRU(page))
-                               return pfn;
-                       if (__PageMovable(page))
-                               return pfn;
-                       if (PageHuge(page)) {
-                               if (hugepage_migration_supported(page_hstate(page)) &&
-                                   page_huge_active(page))
-                                       return pfn;
-                               else
-                                       pfn = round_up(pfn + 1,
-                                               1 << compound_order(page)) - 1;
-                       }
-               }
+               struct page *page, *head;
+               unsigned long skip;
+
+               if (!pfn_valid(pfn))
+                       continue;
+               page = pfn_to_page(pfn);
+               if (PageLRU(page))
+                       return pfn;
+               if (__PageMovable(page))
+                       return pfn;
+
+               if (!PageHuge(page))
+                       continue;
+               head = compound_head(page);
+               if (hugepage_migration_supported(page_hstate(head)) &&
+                   page_huge_active(head))
+                       return pfn;
+               skip = (1 << compound_order(head)) - (page - head);
+               pfn += skip - 1;
        }
        return 0;
 }
@@ -1344,7 +1352,6 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
 {
        unsigned long pfn;
        struct page *page;
-       int not_managed = 0;
        int ret = 0;
        LIST_HEAD(source);
 
@@ -1392,7 +1399,6 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
                else
                        ret = isolate_movable_page(page, ISOLATE_UNEVICTABLE);
                if (!ret) { /* Success */
-                       put_page(page);
                        list_add_tail(&page->lru, &source);
                        if (!__PageMovable(page))
                                inc_node_page_state(page, NR_ISOLATED_ANON +
@@ -1401,22 +1407,10 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
                } else {
                        pr_warn("failed to isolate pfn %lx\n", pfn);
                        dump_page(page, "isolation failed");
-                       put_page(page);
-                       /* Because we don't have big zone->lock. we should
-                          check this again here. */
-                       if (page_count(page)) {
-                               not_managed++;
-                               ret = -EBUSY;
-                               break;
-                       }
                }
+               put_page(page);
        }
        if (!list_empty(&source)) {
-               if (not_managed) {
-                       putback_movable_pages(&source);
-                       goto out;
-               }
-
                /* Allocate a new page from the nearest neighbor node */
                ret = migrate_pages(&source, new_node_page, NULL, 0,
                                        MIGRATE_SYNC, MR_MEMORY_HOTPLUG);
@@ -1429,7 +1423,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
                        putback_movable_pages(&source);
                }
        }
-out:
+
        return ret;
 }
 
@@ -1576,7 +1570,6 @@ static int __ref __offline_pages(unsigned long start_pfn,
           we assume this for now. .*/
        if (!test_pages_in_a_zone(start_pfn, end_pfn, &valid_start,
                                  &valid_end)) {
-               mem_hotplug_done();
                ret = -EINVAL;
                reason = "multizone range";
                goto failed_removal;
@@ -1591,7 +1584,6 @@ static int __ref __offline_pages(unsigned long start_pfn,
                                       MIGRATE_MOVABLE,
                                       SKIP_HWPOISON | REPORT_FAILURE);
        if (ret) {
-               mem_hotplug_done();
                reason = "failure to isolate range";
                goto failed_removal;
        }
index a16b150..d4fd680 100644 (file)
@@ -709,7 +709,6 @@ static bool buffer_migrate_lock_buffers(struct buffer_head *head,
        /* Simple case, sync compaction */
        if (mode != MIGRATE_ASYNC) {
                do {
-                       get_bh(bh);
                        lock_buffer(bh);
                        bh = bh->b_this_page;
 
@@ -720,18 +719,15 @@ static bool buffer_migrate_lock_buffers(struct buffer_head *head,
 
        /* async case, we cannot block on lock_buffer so use trylock_buffer */
        do {
-               get_bh(bh);
                if (!trylock_buffer(bh)) {
                        /*
                         * We failed to lock the buffer and cannot stall in
                         * async migration. Release the taken locks
                         */
                        struct buffer_head *failed_bh = bh;
-                       put_bh(failed_bh);
                        bh = head;
                        while (bh != failed_bh) {
                                unlock_buffer(bh);
-                               put_bh(bh);
                                bh = bh->b_this_page;
                        }
                        return false;
@@ -818,7 +814,6 @@ unlock_buffers:
        bh = head;
        do {
                unlock_buffer(bh);
-               put_bh(bh);
                bh = bh->b_this_page;
 
        } while (bh != head);
@@ -1135,10 +1130,13 @@ out:
         * If migration is successful, decrease refcount of the newpage
         * which will not free the page because new page owner increased
         * refcounter. As well, if it is LRU page, add the page to LRU
-        * list in here.
+        * list in here. Use the old state of the isolated source page to
+        * determine if we migrated a LRU page. newpage was already unlocked
+        * and possibly modified by its owner - don't rely on the page
+        * state.
         */
        if (rc == MIGRATEPAGE_SUCCESS) {
-               if (unlikely(__PageMovable(newpage)))
+               if (unlikely(!is_lru))
                        put_page(newpage);
                else
                        putback_lru_page(newpage);
index f0e8cd9..26ea863 100644 (file)
@@ -647,8 +647,8 @@ static int oom_reaper(void *unused)
 
 static void wake_oom_reaper(struct task_struct *tsk)
 {
-       /* tsk is already queued? */
-       if (tsk == oom_reaper_list || tsk->oom_reaper_list)
+       /* mm is already queued? */
+       if (test_and_set_bit(MMF_OOM_REAP_QUEUED, &tsk->signal->oom_mm->flags))
                return;
 
        get_task_struct(tsk);
@@ -975,6 +975,13 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
         * still freeing memory.
         */
        read_lock(&tasklist_lock);
+
+       /*
+        * The task 'p' might have already exited before reaching here. The
+        * put_task_struct() will free task_struct 'p' while the loop still try
+        * to access the field of 'p', so, get an extra reference.
+        */
+       get_task_struct(p);
        for_each_thread(p, t) {
                list_for_each_entry(child, &t->children, sibling) {
                        unsigned int child_points;
@@ -994,6 +1001,7 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
                        }
                }
        }
+       put_task_struct(p);
        read_unlock(&tasklist_lock);
 
        /*
index 35fdde0..46285d2 100644 (file)
@@ -4675,11 +4675,11 @@ refill:
                /* Even if we own the page, we do not use atomic_set().
                 * This would break get_page_unless_zero() users.
                 */
-               page_ref_add(page, size - 1);
+               page_ref_add(page, size);
 
                /* reset page count bias and offset to start of new frag */
                nc->pfmemalloc = page_is_pfmemalloc(page);
-               nc->pagecnt_bias = size;
+               nc->pagecnt_bias = size + 1;
                nc->offset = size;
        }
 
@@ -4695,10 +4695,10 @@ refill:
                size = nc->size;
 #endif
                /* OK, page count is 0, we can safely set it */
-               set_page_count(page, size);
+               set_page_count(page, size + 1);
 
                /* reset page count bias and offset to start of new frag */
-               nc->pagecnt_bias = size;
+               nc->pagecnt_bias = size + 1;
                offset = size - fragsz;
        }
 
index ae44f7a..8c78b8d 100644 (file)
@@ -398,10 +398,8 @@ void __init page_ext_init(void)
                         * We know some arch can have a nodes layout such as
                         * -------------pfn-------------->
                         * N0 | N1 | N2 | N0 | N1 | N2|....
-                        *
-                        * Take into account DEFERRED_STRUCT_PAGE_INIT.
                         */
-                       if (early_pfn_to_nid(pfn) != nid)
+                       if (pfn_to_nid(pfn) != nid)
                                continue;
                        if (init_section_page_ext(pfn, nid))
                                goto oom;
index a714c4f..e979705 100644 (file)
@@ -491,16 +491,6 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
                delta = freeable / 2;
        }
 
-       /*
-        * Make sure we apply some minimal pressure on default priority
-        * even on small cgroups. Stale objects are not only consuming memory
-        * by themselves, but can also hold a reference to a dying cgroup,
-        * preventing it from being reclaimed. A dying cgroup with all
-        * corresponding structures like per-cpu stats and kmem caches
-        * can be really big, so it may lead to a significant waste of memory.
-        */
-       delta = max_t(unsigned long long, delta, min(freeable, batch_size));
-
        total_scan += delta;
        if (total_scan < 0) {
                pr_err("shrink_slab: %pF negative objects to delete nr=%ld\n",
index 0b0495a..d79221f 100644 (file)
@@ -134,7 +134,8 @@ static void vcc_seq_stop(struct seq_file *seq, void *v)
 static void *vcc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
        v = vcc_walk(seq, 1);
-       *pos += !!PTR_ERR(v);
+       if (v)
+               (*pos)++;
        return v;
 }
 
index 7b80f6f..a9b7919 100644 (file)
@@ -104,6 +104,9 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh)
 
                ret = cfg80211_get_station(real_netdev, neigh->addr, &sinfo);
 
+               /* free the TID stats immediately */
+               cfg80211_sinfo_release_content(&sinfo);
+
                dev_put(real_netdev);
                if (ret == -ENOENT) {
                        /* Node is not associated anymore! It would be
index 899ab05..310a4f3 100644 (file)
@@ -1711,6 +1711,8 @@ static void batadv_dat_put_dhcp(struct batadv_priv *bat_priv, u8 *chaddr,
        batadv_dat_send_data(bat_priv, skb, yiaddr, vid, BATADV_P_DAT_DHT_PUT);
        batadv_dat_send_data(bat_priv, skb, ip_dst, vid, BATADV_P_DAT_DHT_PUT);
 
+       consume_skb(skb);
+
        batadv_dbg(BATADV_DBG_DAT, bat_priv,
                   "Snooped from outgoing DHCPACK (server address): %pI4, %pM (vid: %i)\n",
                   &ip_dst, hw_dst, batadv_print_vid(vid));
index 0a6b9b3..f5811f6 100644 (file)
@@ -47,7 +47,6 @@
 #include <uapi/linux/batadv_packet.h>
 #include <uapi/linux/batman_adv.h>
 
-#include "gateway_common.h"
 #include "hard-interface.h"
 #include "log.h"
 #include "netlink.h"
index 53d03ad..e064de4 100644 (file)
@@ -28,6 +28,7 @@
 #include <linux/stddef.h>
 #include <linux/string.h>
 #include <uapi/linux/batadv_packet.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "gateway_client.h"
 #include "log.h"
index 89bae10..128467a 100644 (file)
 
 struct net_device;
 
-enum batadv_gw_modes {
-       BATADV_GW_MODE_OFF,
-       BATADV_GW_MODE_CLIENT,
-       BATADV_GW_MODE_SERVER,
-};
-
 /**
  * enum batadv_bandwidth_units - bandwidth unit types
  */
index 28c1fb8..96ef7c7 100644 (file)
@@ -20,7 +20,6 @@
 #include "main.h"
 
 #include <linux/atomic.h>
-#include <linux/bug.h>
 #include <linux/byteorder/generic.h>
 #include <linux/errno.h>
 #include <linux/gfp.h>
@@ -179,8 +178,10 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev)
        parent_dev = __dev_get_by_index((struct net *)parent_net,
                                        dev_get_iflink(net_dev));
        /* if we got a NULL parent_dev there is something broken.. */
-       if (WARN(!parent_dev, "Cannot find parent device"))
+       if (!parent_dev) {
+               pr_err("Cannot find parent device\n");
                return false;
+       }
 
        if (batadv_mutual_parents(net_dev, net, parent_dev, parent_net))
                return false;
index 5fe833c..67a58da 100644 (file)
 #include "main.h"
 
 #include <linux/atomic.h>
+#include <linux/bitops.h>
+#include <linux/bug.h>
 #include <linux/byteorder/generic.h>
 #include <linux/cache.h>
+#include <linux/err.h>
 #include <linux/errno.h>
 #include <linux/export.h>
 #include <linux/genetlink.h>
 #include <linux/gfp.h>
 #include <linux/if_ether.h>
+#include <linux/if_vlan.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/list.h>
 #include "bridge_loop_avoidance.h"
 #include "distributed-arp-table.h"
 #include "gateway_client.h"
+#include "gateway_common.h"
 #include "hard-interface.h"
+#include "log.h"
 #include "multicast.h"
+#include "network-coding.h"
 #include "originator.h"
 #include "soft-interface.h"
 #include "tp_meter.h"
 #include "translation-table.h"
 
+struct net;
+
 struct genl_family batadv_netlink_family;
 
 /* multicast groups */
 enum batadv_netlink_multicast_groups {
+       BATADV_NL_MCGRP_CONFIG,
        BATADV_NL_MCGRP_TPMETER,
 };
 
+/**
+ * enum batadv_genl_ops_flags - flags for genl_ops's internal_flags
+ */
+enum batadv_genl_ops_flags {
+       /**
+        * @BATADV_FLAG_NEED_MESH: request requires valid soft interface in
+        *  attribute BATADV_ATTR_MESH_IFINDEX and expects a pointer to it to be
+        *  saved in info->user_ptr[0]
+        */
+       BATADV_FLAG_NEED_MESH = BIT(0),
+
+       /**
+        * @BATADV_FLAG_NEED_HARDIF: request requires valid hard interface in
+        *  attribute BATADV_ATTR_HARD_IFINDEX and expects a pointer to it to be
+        *  saved in info->user_ptr[1]
+        */
+       BATADV_FLAG_NEED_HARDIF = BIT(1),
+
+       /**
+        * @BATADV_FLAG_NEED_VLAN: request requires valid vlan in
+        *  attribute BATADV_ATTR_VLANID and expects a pointer to it to be
+        *  saved in info->user_ptr[1]
+        */
+       BATADV_FLAG_NEED_VLAN = BIT(2),
+};
+
 static const struct genl_multicast_group batadv_netlink_mcgrps[] = {
+       [BATADV_NL_MCGRP_CONFIG] = { .name = BATADV_NL_MCAST_GROUP_CONFIG },
        [BATADV_NL_MCGRP_TPMETER] = { .name = BATADV_NL_MCAST_GROUP_TPMETER },
 };
 
@@ -104,6 +141,26 @@ static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = {
        [BATADV_ATTR_DAT_CACHE_VID]             = { .type = NLA_U16 },
        [BATADV_ATTR_MCAST_FLAGS]               = { .type = NLA_U32 },
        [BATADV_ATTR_MCAST_FLAGS_PRIV]          = { .type = NLA_U32 },
+       [BATADV_ATTR_VLANID]                    = { .type = NLA_U16 },
+       [BATADV_ATTR_AGGREGATED_OGMS_ENABLED]   = { .type = NLA_U8 },
+       [BATADV_ATTR_AP_ISOLATION_ENABLED]      = { .type = NLA_U8 },
+       [BATADV_ATTR_ISOLATION_MARK]            = { .type = NLA_U32 },
+       [BATADV_ATTR_ISOLATION_MASK]            = { .type = NLA_U32 },
+       [BATADV_ATTR_BONDING_ENABLED]           = { .type = NLA_U8 },
+       [BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED]     = { .type = NLA_U8 },
+       [BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED]     = { .type = NLA_U8 },
+       [BATADV_ATTR_FRAGMENTATION_ENABLED]     = { .type = NLA_U8 },
+       [BATADV_ATTR_GW_BANDWIDTH_DOWN]         = { .type = NLA_U32 },
+       [BATADV_ATTR_GW_BANDWIDTH_UP]           = { .type = NLA_U32 },
+       [BATADV_ATTR_GW_MODE]                   = { .type = NLA_U8 },
+       [BATADV_ATTR_GW_SEL_CLASS]              = { .type = NLA_U32 },
+       [BATADV_ATTR_HOP_PENALTY]               = { .type = NLA_U8 },
+       [BATADV_ATTR_LOG_LEVEL]                 = { .type = NLA_U32 },
+       [BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED]      = { .type = NLA_U8 },
+       [BATADV_ATTR_NETWORK_CODING_ENABLED]    = { .type = NLA_U8 },
+       [BATADV_ATTR_ORIG_INTERVAL]             = { .type = NLA_U32 },
+       [BATADV_ATTR_ELP_INTERVAL]              = { .type = NLA_U32 },
+       [BATADV_ATTR_THROUGHPUT_OVERRIDE]       = { .type = NLA_U32 },
 };
 
 /**
@@ -122,20 +179,75 @@ batadv_netlink_get_ifindex(const struct nlmsghdr *nlh, int attrtype)
 }
 
 /**
- * batadv_netlink_mesh_info_put() - fill in generic information about mesh
- *  interface
- * @msg: netlink message to be sent back
- * @soft_iface: interface for which the data should be taken
+ * batadv_netlink_mesh_fill_ap_isolation() - Add ap_isolation softif attribute
+ * @msg: Netlink message to dump into
+ * @bat_priv: the bat priv with all the soft interface information
  *
- * Return: 0 on success, < 0 on error
+ * Return: 0 on success or negative error number in case of failure
  */
-static int
-batadv_netlink_mesh_info_put(struct sk_buff *msg, struct net_device *soft_iface)
+static int batadv_netlink_mesh_fill_ap_isolation(struct sk_buff *msg,
+                                                struct batadv_priv *bat_priv)
+{
+       struct batadv_softif_vlan *vlan;
+       u8 ap_isolation;
+
+       vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS);
+       if (!vlan)
+               return 0;
+
+       ap_isolation = atomic_read(&vlan->ap_isolation);
+       batadv_softif_vlan_put(vlan);
+
+       return nla_put_u8(msg, BATADV_ATTR_AP_ISOLATION_ENABLED,
+                         !!ap_isolation);
+}
+
+/**
+ * batadv_option_set_ap_isolation() - Set ap_isolation from genl msg
+ * @attr: parsed BATADV_ATTR_AP_ISOLATION_ENABLED attribute
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
+static int batadv_netlink_set_mesh_ap_isolation(struct nlattr *attr,
+                                               struct batadv_priv *bat_priv)
+{
+       struct batadv_softif_vlan *vlan;
+
+       vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS);
+       if (!vlan)
+               return -ENOENT;
+
+       atomic_set(&vlan->ap_isolation, !!nla_get_u8(attr));
+       batadv_softif_vlan_put(vlan);
+
+       return 0;
+}
+
+/**
+ * batadv_netlink_mesh_fill() - Fill message with mesh attributes
+ * @msg: Netlink message to dump into
+ * @bat_priv: the bat priv with all the soft interface information
+ * @cmd: type of message to generate
+ * @portid: Port making netlink request
+ * @seq: sequence number for message
+ * @flags: Additional flags for message
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
+static int batadv_netlink_mesh_fill(struct sk_buff *msg,
+                                   struct batadv_priv *bat_priv,
+                                   enum batadv_nl_commands cmd,
+                                   u32 portid, u32 seq, int flags)
 {
-       struct batadv_priv *bat_priv = netdev_priv(soft_iface);
+       struct net_device *soft_iface = bat_priv->soft_iface;
        struct batadv_hard_iface *primary_if = NULL;
        struct net_device *hard_iface;
-       int ret = -ENOBUFS;
+       void *hdr;
+
+       hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, flags, cmd);
+       if (!hdr)
+               return -ENOBUFS;
 
        if (nla_put_string(msg, BATADV_ATTR_VERSION, BATADV_SOURCE_VERSION) ||
            nla_put_string(msg, BATADV_ATTR_ALGO_NAME,
@@ -146,16 +258,16 @@ batadv_netlink_mesh_info_put(struct sk_buff *msg, struct net_device *soft_iface)
                    soft_iface->dev_addr) ||
            nla_put_u8(msg, BATADV_ATTR_TT_TTVN,
                       (u8)atomic_read(&bat_priv->tt.vn)))
-               goto out;
+               goto nla_put_failure;
 
 #ifdef CONFIG_BATMAN_ADV_BLA
        if (nla_put_u16(msg, BATADV_ATTR_BLA_CRC,
                        ntohs(bat_priv->bla.claim_dest.group)))
-               goto out;
+               goto nla_put_failure;
 #endif
 
        if (batadv_mcast_mesh_info_put(msg, bat_priv))
-               goto out;
+               goto nla_put_failure;
 
        primary_if = batadv_primary_if_get_selected(bat_priv);
        if (primary_if && primary_if->if_status == BATADV_IF_ACTIVE) {
@@ -167,77 +279,345 @@ batadv_netlink_mesh_info_put(struct sk_buff *msg, struct net_device *soft_iface)
                                   hard_iface->name) ||
                    nla_put(msg, BATADV_ATTR_HARD_ADDRESS, ETH_ALEN,
                            hard_iface->dev_addr))
-                       goto out;
+                       goto nla_put_failure;
        }
 
-       ret = 0;
+       if (nla_put_u8(msg, BATADV_ATTR_AGGREGATED_OGMS_ENABLED,
+                      !!atomic_read(&bat_priv->aggregated_ogms)))
+               goto nla_put_failure;
+
+       if (batadv_netlink_mesh_fill_ap_isolation(msg, bat_priv))
+               goto nla_put_failure;
+
+       if (nla_put_u32(msg, BATADV_ATTR_ISOLATION_MARK,
+                       bat_priv->isolation_mark))
+               goto nla_put_failure;
+
+       if (nla_put_u32(msg, BATADV_ATTR_ISOLATION_MASK,
+                       bat_priv->isolation_mark_mask))
+               goto nla_put_failure;
+
+       if (nla_put_u8(msg, BATADV_ATTR_BONDING_ENABLED,
+                      !!atomic_read(&bat_priv->bonding)))
+               goto nla_put_failure;
+
+#ifdef CONFIG_BATMAN_ADV_BLA
+       if (nla_put_u8(msg, BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED,
+                      !!atomic_read(&bat_priv->bridge_loop_avoidance)))
+               goto nla_put_failure;
+#endif /* CONFIG_BATMAN_ADV_BLA */
+
+#ifdef CONFIG_BATMAN_ADV_DAT
+       if (nla_put_u8(msg, BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED,
+                      !!atomic_read(&bat_priv->distributed_arp_table)))
+               goto nla_put_failure;
+#endif /* CONFIG_BATMAN_ADV_DAT */
+
+       if (nla_put_u8(msg, BATADV_ATTR_FRAGMENTATION_ENABLED,
+                      !!atomic_read(&bat_priv->fragmentation)))
+               goto nla_put_failure;
+
+       if (nla_put_u32(msg, BATADV_ATTR_GW_BANDWIDTH_DOWN,
+                       atomic_read(&bat_priv->gw.bandwidth_down)))
+               goto nla_put_failure;
+
+       if (nla_put_u32(msg, BATADV_ATTR_GW_BANDWIDTH_UP,
+                       atomic_read(&bat_priv->gw.bandwidth_up)))
+               goto nla_put_failure;
+
+       if (nla_put_u8(msg, BATADV_ATTR_GW_MODE,
+                      atomic_read(&bat_priv->gw.mode)))
+               goto nla_put_failure;
+
+       if (bat_priv->algo_ops->gw.get_best_gw_node &&
+           bat_priv->algo_ops->gw.is_eligible) {
+               /* GW selection class is not available if the routing algorithm
+                * in use does not implement the GW API
+                */
+               if (nla_put_u32(msg, BATADV_ATTR_GW_SEL_CLASS,
+                               atomic_read(&bat_priv->gw.sel_class)))
+                       goto nla_put_failure;
+       }
+
+       if (nla_put_u8(msg, BATADV_ATTR_HOP_PENALTY,
+                      atomic_read(&bat_priv->hop_penalty)))
+               goto nla_put_failure;
+
+#ifdef CONFIG_BATMAN_ADV_DEBUG
+       if (nla_put_u32(msg, BATADV_ATTR_LOG_LEVEL,
+                       atomic_read(&bat_priv->log_level)))
+               goto nla_put_failure;
+#endif /* CONFIG_BATMAN_ADV_DEBUG */
+
+#ifdef CONFIG_BATMAN_ADV_MCAST
+       if (nla_put_u8(msg, BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED,
+                      !atomic_read(&bat_priv->multicast_mode)))
+               goto nla_put_failure;
+#endif /* CONFIG_BATMAN_ADV_MCAST */
+
+#ifdef CONFIG_BATMAN_ADV_NC
+       if (nla_put_u8(msg, BATADV_ATTR_NETWORK_CODING_ENABLED,
+                      !!atomic_read(&bat_priv->network_coding)))
+               goto nla_put_failure;
+#endif /* CONFIG_BATMAN_ADV_NC */
+
+       if (nla_put_u32(msg, BATADV_ATTR_ORIG_INTERVAL,
+                       atomic_read(&bat_priv->orig_interval)))
+               goto nla_put_failure;
 
- out:
        if (primary_if)
                batadv_hardif_put(primary_if);
 
-       return ret;
+       genlmsg_end(msg, hdr);
+       return 0;
+
+nla_put_failure:
+       if (primary_if)
+               batadv_hardif_put(primary_if);
+
+       genlmsg_cancel(msg, hdr);
+       return -EMSGSIZE;
 }
 
 /**
- * batadv_netlink_get_mesh_info() - handle incoming BATADV_CMD_GET_MESH_INFO
- *  netlink request
- * @skb: received netlink message
- * @info: receiver information
+ * batadv_netlink_notify_mesh() - send softif attributes to listener
+ * @bat_priv: the bat priv with all the soft interface information
  *
  * Return: 0 on success, < 0 on error
  */
-static int
-batadv_netlink_get_mesh_info(struct sk_buff *skb, struct genl_info *info)
+int batadv_netlink_notify_mesh(struct batadv_priv *bat_priv)
 {
-       struct net *net = genl_info_net(info);
-       struct net_device *soft_iface;
-       struct sk_buff *msg = NULL;
-       void *msg_head;
-       int ifindex;
+       struct sk_buff *msg;
        int ret;
 
-       if (!info->attrs[BATADV_ATTR_MESH_IFINDEX])
-               return -EINVAL;
-
-       ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]);
-       if (!ifindex)
-               return -EINVAL;
+       msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+       if (!msg)
+               return -ENOMEM;
 
-       soft_iface = dev_get_by_index(net, ifindex);
-       if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
-               ret = -ENODEV;
-               goto out;
+       ret = batadv_netlink_mesh_fill(msg, bat_priv, BATADV_CMD_SET_MESH,
+                                      0, 0, 0);
+       if (ret < 0) {
+               nlmsg_free(msg);
+               return ret;
        }
 
+       genlmsg_multicast_netns(&batadv_netlink_family,
+                               dev_net(bat_priv->soft_iface), msg, 0,
+                               BATADV_NL_MCGRP_CONFIG, GFP_KERNEL);
+
+       return 0;
+}
+
+/**
+ * batadv_netlink_get_mesh() - Get softif attributes
+ * @skb: Netlink message with request data
+ * @info: receiver information
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
+static int batadv_netlink_get_mesh(struct sk_buff *skb, struct genl_info *info)
+{
+       struct batadv_priv *bat_priv = info->user_ptr[0];
+       struct sk_buff *msg;
+       int ret;
+
        msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-       if (!msg) {
-               ret = -ENOMEM;
-               goto out;
+       if (!msg)
+               return -ENOMEM;
+
+       ret = batadv_netlink_mesh_fill(msg, bat_priv, BATADV_CMD_GET_MESH,
+                                      info->snd_portid, info->snd_seq, 0);
+       if (ret < 0) {
+               nlmsg_free(msg);
+               return ret;
        }
 
-       msg_head = genlmsg_put(msg, info->snd_portid, info->snd_seq,
-                              &batadv_netlink_family, 0,
-                              BATADV_CMD_GET_MESH_INFO);
-       if (!msg_head) {
-               ret = -ENOBUFS;
-               goto out;
+       ret = genlmsg_reply(msg, info);
+
+       return ret;
+}
+
+/**
+ * batadv_netlink_set_mesh() - Set softif attributes
+ * @skb: Netlink message with request data
+ * @info: receiver information
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
+static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info)
+{
+       struct batadv_priv *bat_priv = info->user_ptr[0];
+       struct nlattr *attr;
+
+       if (info->attrs[BATADV_ATTR_AGGREGATED_OGMS_ENABLED]) {
+               attr = info->attrs[BATADV_ATTR_AGGREGATED_OGMS_ENABLED];
+
+               atomic_set(&bat_priv->aggregated_ogms, !!nla_get_u8(attr));
        }
 
-       ret = batadv_netlink_mesh_info_put(msg, soft_iface);
+       if (info->attrs[BATADV_ATTR_AP_ISOLATION_ENABLED]) {
+               attr = info->attrs[BATADV_ATTR_AP_ISOLATION_ENABLED];
 
- out:
-       if (soft_iface)
-               dev_put(soft_iface);
+               batadv_netlink_set_mesh_ap_isolation(attr, bat_priv);
+       }
 
-       if (ret) {
-               if (msg)
-                       nlmsg_free(msg);
-               return ret;
+       if (info->attrs[BATADV_ATTR_ISOLATION_MARK]) {
+               attr = info->attrs[BATADV_ATTR_ISOLATION_MARK];
+
+               bat_priv->isolation_mark = nla_get_u32(attr);
        }
 
-       genlmsg_end(msg, msg_head);
-       return genlmsg_reply(msg, info);
+       if (info->attrs[BATADV_ATTR_ISOLATION_MASK]) {
+               attr = info->attrs[BATADV_ATTR_ISOLATION_MASK];
+
+               bat_priv->isolation_mark_mask = nla_get_u32(attr);
+       }
+
+       if (info->attrs[BATADV_ATTR_BONDING_ENABLED]) {
+               attr = info->attrs[BATADV_ATTR_BONDING_ENABLED];
+
+               atomic_set(&bat_priv->bonding, !!nla_get_u8(attr));
+       }
+
+#ifdef CONFIG_BATMAN_ADV_BLA
+       if (info->attrs[BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED]) {
+               attr = info->attrs[BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED];
+
+               atomic_set(&bat_priv->bridge_loop_avoidance,
+                          !!nla_get_u8(attr));
+               batadv_bla_status_update(bat_priv->soft_iface);
+       }
+#endif /* CONFIG_BATMAN_ADV_BLA */
+
+#ifdef CONFIG_BATMAN_ADV_DAT
+       if (info->attrs[BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED]) {
+               attr = info->attrs[BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED];
+
+               atomic_set(&bat_priv->distributed_arp_table,
+                          !!nla_get_u8(attr));
+               batadv_dat_status_update(bat_priv->soft_iface);
+       }
+#endif /* CONFIG_BATMAN_ADV_DAT */
+
+       if (info->attrs[BATADV_ATTR_FRAGMENTATION_ENABLED]) {
+               attr = info->attrs[BATADV_ATTR_FRAGMENTATION_ENABLED];
+
+               atomic_set(&bat_priv->fragmentation, !!nla_get_u8(attr));
+               batadv_update_min_mtu(bat_priv->soft_iface);
+       }
+
+       if (info->attrs[BATADV_ATTR_GW_BANDWIDTH_DOWN]) {
+               attr = info->attrs[BATADV_ATTR_GW_BANDWIDTH_DOWN];
+
+               atomic_set(&bat_priv->gw.bandwidth_down, nla_get_u32(attr));
+               batadv_gw_tvlv_container_update(bat_priv);
+       }
+
+       if (info->attrs[BATADV_ATTR_GW_BANDWIDTH_UP]) {
+               attr = info->attrs[BATADV_ATTR_GW_BANDWIDTH_UP];
+
+               atomic_set(&bat_priv->gw.bandwidth_up, nla_get_u32(attr));
+               batadv_gw_tvlv_container_update(bat_priv);
+       }
+
+       if (info->attrs[BATADV_ATTR_GW_MODE]) {
+               u8 gw_mode;
+
+               attr = info->attrs[BATADV_ATTR_GW_MODE];
+               gw_mode = nla_get_u8(attr);
+
+               if (gw_mode <= BATADV_GW_MODE_SERVER) {
+                       /* Invoking batadv_gw_reselect() is not enough to really
+                        * de-select the current GW. It will only instruct the
+                        * gateway client code to perform a re-election the next
+                        * time that this is needed.
+                        *
+                        * When gw client mode is being switched off the current
+                        * GW must be de-selected explicitly otherwise no GW_ADD
+                        * uevent is thrown on client mode re-activation. This
+                        * is operation is performed in
+                        * batadv_gw_check_client_stop().
+                        */
+                       batadv_gw_reselect(bat_priv);
+
+                       /* always call batadv_gw_check_client_stop() before
+                        * changing the gateway state
+                        */
+                       batadv_gw_check_client_stop(bat_priv);
+                       atomic_set(&bat_priv->gw.mode, gw_mode);
+                       batadv_gw_tvlv_container_update(bat_priv);
+               }
+       }
+
+       if (info->attrs[BATADV_ATTR_GW_SEL_CLASS] &&
+           bat_priv->algo_ops->gw.get_best_gw_node &&
+           bat_priv->algo_ops->gw.is_eligible) {
+               /* setting the GW selection class is allowed only if the routing
+                * algorithm in use implements the GW API
+                */
+
+               u32 sel_class_max = 0xffffffffu;
+               u32 sel_class;
+
+               attr = info->attrs[BATADV_ATTR_GW_SEL_CLASS];
+               sel_class = nla_get_u32(attr);
+
+               if (!bat_priv->algo_ops->gw.store_sel_class)
+                       sel_class_max = BATADV_TQ_MAX_VALUE;
+
+               if (sel_class >= 1 && sel_class <= sel_class_max) {
+                       atomic_set(&bat_priv->gw.sel_class, sel_class);
+                       batadv_gw_reselect(bat_priv);
+               }
+       }
+
+       if (info->attrs[BATADV_ATTR_HOP_PENALTY]) {
+               attr = info->attrs[BATADV_ATTR_HOP_PENALTY];
+
+               atomic_set(&bat_priv->hop_penalty, nla_get_u8(attr));
+       }
+
+#ifdef CONFIG_BATMAN_ADV_DEBUG
+       if (info->attrs[BATADV_ATTR_LOG_LEVEL]) {
+               attr = info->attrs[BATADV_ATTR_LOG_LEVEL];
+
+               atomic_set(&bat_priv->log_level,
+                          nla_get_u32(attr) & BATADV_DBG_ALL);
+       }
+#endif /* CONFIG_BATMAN_ADV_DEBUG */
+
+#ifdef CONFIG_BATMAN_ADV_MCAST
+       if (info->attrs[BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED]) {
+               attr = info->attrs[BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED];
+
+               atomic_set(&bat_priv->multicast_mode, !nla_get_u8(attr));
+       }
+#endif /* CONFIG_BATMAN_ADV_MCAST */
+
+#ifdef CONFIG_BATMAN_ADV_NC
+       if (info->attrs[BATADV_ATTR_NETWORK_CODING_ENABLED]) {
+               attr = info->attrs[BATADV_ATTR_NETWORK_CODING_ENABLED];
+
+               atomic_set(&bat_priv->network_coding, !!nla_get_u8(attr));
+               batadv_nc_status_update(bat_priv->soft_iface);
+       }
+#endif /* CONFIG_BATMAN_ADV_NC */
+
+       if (info->attrs[BATADV_ATTR_ORIG_INTERVAL]) {
+               u32 orig_interval;
+
+               attr = info->attrs[BATADV_ATTR_ORIG_INTERVAL];
+               orig_interval = nla_get_u32(attr);
+
+               orig_interval = min_t(u32, orig_interval, INT_MAX);
+               orig_interval = max_t(u32, orig_interval, 2 * BATADV_JITTER);
+
+               atomic_set(&bat_priv->orig_interval, orig_interval);
+       }
+
+       batadv_netlink_notify_mesh(bat_priv);
+
+       return 0;
 }
 
 /**
@@ -329,40 +709,24 @@ err_genlmsg:
 static int
 batadv_netlink_tp_meter_start(struct sk_buff *skb, struct genl_info *info)
 {
-       struct net *net = genl_info_net(info);
-       struct net_device *soft_iface;
-       struct batadv_priv *bat_priv;
+       struct batadv_priv *bat_priv = info->user_ptr[0];
        struct sk_buff *msg = NULL;
        u32 test_length;
        void *msg_head;
-       int ifindex;
        u32 cookie;
        u8 *dst;
        int ret;
 
-       if (!info->attrs[BATADV_ATTR_MESH_IFINDEX])
-               return -EINVAL;
-
        if (!info->attrs[BATADV_ATTR_ORIG_ADDRESS])
                return -EINVAL;
 
        if (!info->attrs[BATADV_ATTR_TPMETER_TEST_TIME])
                return -EINVAL;
 
-       ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]);
-       if (!ifindex)
-               return -EINVAL;
-
        dst = nla_data(info->attrs[BATADV_ATTR_ORIG_ADDRESS]);
 
        test_length = nla_get_u32(info->attrs[BATADV_ATTR_TPMETER_TEST_TIME]);
 
-       soft_iface = dev_get_by_index(net, ifindex);
-       if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
-               ret = -ENODEV;
-               goto out;
-       }
-
        msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
        if (!msg) {
                ret = -ENOMEM;
@@ -377,15 +741,11 @@ batadv_netlink_tp_meter_start(struct sk_buff *skb, struct genl_info *info)
                goto out;
        }
 
-       bat_priv = netdev_priv(soft_iface);
        batadv_tp_start(bat_priv, dst, test_length, &cookie);
 
        ret = batadv_netlink_tp_meter_put(msg, cookie);
 
  out:
-       if (soft_iface)
-               dev_put(soft_iface);
-
        if (ret) {
                if (msg)
                        nlmsg_free(msg);
@@ -406,65 +766,53 @@ batadv_netlink_tp_meter_start(struct sk_buff *skb, struct genl_info *info)
 static int
 batadv_netlink_tp_meter_cancel(struct sk_buff *skb, struct genl_info *info)
 {
-       struct net *net = genl_info_net(info);
-       struct net_device *soft_iface;
-       struct batadv_priv *bat_priv;
-       int ifindex;
+       struct batadv_priv *bat_priv = info->user_ptr[0];
        u8 *dst;
        int ret = 0;
 
-       if (!info->attrs[BATADV_ATTR_MESH_IFINDEX])
-               return -EINVAL;
-
        if (!info->attrs[BATADV_ATTR_ORIG_ADDRESS])
                return -EINVAL;
 
-       ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]);
-       if (!ifindex)
-               return -EINVAL;
-
        dst = nla_data(info->attrs[BATADV_ATTR_ORIG_ADDRESS]);
 
-       soft_iface = dev_get_by_index(net, ifindex);
-       if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
-               ret = -ENODEV;
-               goto out;
-       }
-
-       bat_priv = netdev_priv(soft_iface);
        batadv_tp_stop(bat_priv, dst, BATADV_TP_REASON_CANCEL);
 
-out:
-       if (soft_iface)
-               dev_put(soft_iface);
-
        return ret;
 }
 
 /**
- * batadv_netlink_dump_hardif_entry() - Dump one hard interface into a message
+ * batadv_netlink_hardif_fill() - Fill message with hardif attributes
  * @msg: Netlink message to dump into
+ * @bat_priv: the bat priv with all the soft interface information
+ * @hard_iface: hard interface which was modified
+ * @cmd: type of message to generate
  * @portid: Port making netlink request
+ * @seq: sequence number for message
+ * @flags: Additional flags for message
  * @cb: Control block containing additional options
- * @hard_iface: Hard interface to dump
  *
- * Return: error code, or 0 on success
+ * Return: 0 on success or negative error number in case of failure
  */
-static int
-batadv_netlink_dump_hardif_entry(struct sk_buff *msg, u32 portid,
-                                struct netlink_callback *cb,
-                                struct batadv_hard_iface *hard_iface)
+static int batadv_netlink_hardif_fill(struct sk_buff *msg,
+                                     struct batadv_priv *bat_priv,
+                                     struct batadv_hard_iface *hard_iface,
+                                     enum batadv_nl_commands cmd,
+                                     u32 portid, u32 seq, int flags,
+                                     struct netlink_callback *cb)
 {
        struct net_device *net_dev = hard_iface->net_dev;
        void *hdr;
 
-       hdr = genlmsg_put(msg, portid, cb->nlh->nlmsg_seq,
-                         &batadv_netlink_family, NLM_F_MULTI,
-                         BATADV_CMD_GET_HARDIFS);
+       hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, flags, cmd);
        if (!hdr)
-               return -EMSGSIZE;
+               return -ENOBUFS;
 
-       genl_dump_check_consistent(cb, hdr);
+       if (cb)
+               genl_dump_check_consistent(cb, hdr);
+
+       if (nla_put_u32(msg, BATADV_ATTR_MESH_IFINDEX,
+                       bat_priv->soft_iface->ifindex))
+               goto nla_put_failure;
 
        if (nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
                        net_dev->ifindex) ||
@@ -479,27 +827,137 @@ batadv_netlink_dump_hardif_entry(struct sk_buff *msg, u32 portid,
                        goto nla_put_failure;
        }
 
+#ifdef CONFIG_BATMAN_ADV_BATMAN_V
+       if (nla_put_u32(msg, BATADV_ATTR_ELP_INTERVAL,
+                       atomic_read(&hard_iface->bat_v.elp_interval)))
+               goto nla_put_failure;
+
+       if (nla_put_u32(msg, BATADV_ATTR_THROUGHPUT_OVERRIDE,
+                       atomic_read(&hard_iface->bat_v.throughput_override)))
+               goto nla_put_failure;
+#endif /* CONFIG_BATMAN_ADV_BATMAN_V */
+
        genlmsg_end(msg, hdr);
        return 0;
 
- nla_put_failure:
+nla_put_failure:
        genlmsg_cancel(msg, hdr);
        return -EMSGSIZE;
 }
 
 /**
- * batadv_netlink_dump_hardifs() - Dump all hard interface into a messages
+ * batadv_netlink_notify_hardif() - send hardif attributes to listener
+ * @bat_priv: the bat priv with all the soft interface information
+ * @hard_iface: hard interface which was modified
+ *
+ * Return: 0 on success, < 0 on error
+ */
+int batadv_netlink_notify_hardif(struct batadv_priv *bat_priv,
+                                struct batadv_hard_iface *hard_iface)
+{
+       struct sk_buff *msg;
+       int ret;
+
+       msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+       if (!msg)
+               return -ENOMEM;
+
+       ret = batadv_netlink_hardif_fill(msg, bat_priv, hard_iface,
+                                        BATADV_CMD_SET_HARDIF, 0, 0, 0, NULL);
+       if (ret < 0) {
+               nlmsg_free(msg);
+               return ret;
+       }
+
+       genlmsg_multicast_netns(&batadv_netlink_family,
+                               dev_net(bat_priv->soft_iface), msg, 0,
+                               BATADV_NL_MCGRP_CONFIG, GFP_KERNEL);
+
+       return 0;
+}
+
+/**
+ * batadv_netlink_get_hardif() - Get hardif attributes
+ * @skb: Netlink message with request data
+ * @info: receiver information
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
+static int batadv_netlink_get_hardif(struct sk_buff *skb,
+                                    struct genl_info *info)
+{
+       struct batadv_hard_iface *hard_iface = info->user_ptr[1];
+       struct batadv_priv *bat_priv = info->user_ptr[0];
+       struct sk_buff *msg;
+       int ret;
+
+       msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+       if (!msg)
+               return -ENOMEM;
+
+       ret = batadv_netlink_hardif_fill(msg, bat_priv, hard_iface,
+                                        BATADV_CMD_GET_HARDIF,
+                                        info->snd_portid, info->snd_seq, 0,
+                                        NULL);
+       if (ret < 0) {
+               nlmsg_free(msg);
+               return ret;
+       }
+
+       ret = genlmsg_reply(msg, info);
+
+       return ret;
+}
+
+/**
+ * batadv_netlink_set_hardif() - Set hardif attributes
+ * @skb: Netlink message with request data
+ * @info: receiver information
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
+static int batadv_netlink_set_hardif(struct sk_buff *skb,
+                                    struct genl_info *info)
+{
+       struct batadv_hard_iface *hard_iface = info->user_ptr[1];
+       struct batadv_priv *bat_priv = info->user_ptr[0];
+
+#ifdef CONFIG_BATMAN_ADV_BATMAN_V
+       struct nlattr *attr;
+
+       if (info->attrs[BATADV_ATTR_ELP_INTERVAL]) {
+               attr = info->attrs[BATADV_ATTR_ELP_INTERVAL];
+
+               atomic_set(&hard_iface->bat_v.elp_interval, nla_get_u32(attr));
+       }
+
+       if (info->attrs[BATADV_ATTR_THROUGHPUT_OVERRIDE]) {
+               attr = info->attrs[BATADV_ATTR_THROUGHPUT_OVERRIDE];
+
+               atomic_set(&hard_iface->bat_v.throughput_override,
+                          nla_get_u32(attr));
+       }
+#endif /* CONFIG_BATMAN_ADV_BATMAN_V */
+
+       batadv_netlink_notify_hardif(bat_priv, hard_iface);
+
+       return 0;
+}
+
+/**
+ * batadv_netlink_dump_hardif() - Dump all hard interface into a messages
  * @msg: Netlink message to dump into
  * @cb: Parameters from query
  *
  * Return: error code, or length of reply message on success
  */
 static int
-batadv_netlink_dump_hardifs(struct sk_buff *msg, struct netlink_callback *cb)
+batadv_netlink_dump_hardif(struct sk_buff *msg, struct netlink_callback *cb)
 {
        struct net *net = sock_net(cb->skb->sk);
        struct net_device *soft_iface;
        struct batadv_hard_iface *hard_iface;
+       struct batadv_priv *bat_priv;
        int ifindex;
        int portid = NETLINK_CB(cb->skb).portid;
        int skip = cb->args[0];
@@ -519,6 +977,8 @@ batadv_netlink_dump_hardifs(struct sk_buff *msg, struct netlink_callback *cb)
                return -ENODEV;
        }
 
+       bat_priv = netdev_priv(soft_iface);
+
        rtnl_lock();
        cb->seq = batadv_hardif_generation << 1 | 1;
 
@@ -529,8 +989,10 @@ batadv_netlink_dump_hardifs(struct sk_buff *msg, struct netlink_callback *cb)
                if (i++ < skip)
                        continue;
 
-               if (batadv_netlink_dump_hardif_entry(msg, portid, cb,
-                                                    hard_iface)) {
+               if (batadv_netlink_hardif_fill(msg, bat_priv, hard_iface,
+                                              BATADV_CMD_GET_HARDIF,
+                                              portid, cb->nlh->nlmsg_seq,
+                                              NLM_F_MULTI, cb)) {
                        i--;
                        break;
                }
@@ -545,24 +1007,361 @@ batadv_netlink_dump_hardifs(struct sk_buff *msg, struct netlink_callback *cb)
        return msg->len;
 }
 
+/**
+ * batadv_netlink_vlan_fill() - Fill message with vlan attributes
+ * @msg: Netlink message to dump into
+ * @bat_priv: the bat priv with all the soft interface information
+ * @vlan: vlan which was modified
+ * @cmd: type of message to generate
+ * @portid: Port making netlink request
+ * @seq: sequence number for message
+ * @flags: Additional flags for message
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
+static int batadv_netlink_vlan_fill(struct sk_buff *msg,
+                                   struct batadv_priv *bat_priv,
+                                   struct batadv_softif_vlan *vlan,
+                                   enum batadv_nl_commands cmd,
+                                   u32 portid, u32 seq, int flags)
+{
+       void *hdr;
+
+       hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, flags, cmd);
+       if (!hdr)
+               return -ENOBUFS;
+
+       if (nla_put_u32(msg, BATADV_ATTR_MESH_IFINDEX,
+                       bat_priv->soft_iface->ifindex))
+               goto nla_put_failure;
+
+       if (nla_put_u32(msg, BATADV_ATTR_VLANID, vlan->vid & VLAN_VID_MASK))
+               goto nla_put_failure;
+
+       if (nla_put_u8(msg, BATADV_ATTR_AP_ISOLATION_ENABLED,
+                      !!atomic_read(&vlan->ap_isolation)))
+               goto nla_put_failure;
+
+       genlmsg_end(msg, hdr);
+       return 0;
+
+nla_put_failure:
+       genlmsg_cancel(msg, hdr);
+       return -EMSGSIZE;
+}
+
+/**
+ * batadv_netlink_notify_vlan() - send vlan attributes to listener
+ * @bat_priv: the bat priv with all the soft interface information
+ * @vlan: vlan which was modified
+ *
+ * Return: 0 on success, < 0 on error
+ */
+int batadv_netlink_notify_vlan(struct batadv_priv *bat_priv,
+                              struct batadv_softif_vlan *vlan)
+{
+       struct sk_buff *msg;
+       int ret;
+
+       msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+       if (!msg)
+               return -ENOMEM;
+
+       ret = batadv_netlink_vlan_fill(msg, bat_priv, vlan,
+                                      BATADV_CMD_SET_VLAN, 0, 0, 0);
+       if (ret < 0) {
+               nlmsg_free(msg);
+               return ret;
+       }
+
+       genlmsg_multicast_netns(&batadv_netlink_family,
+                               dev_net(bat_priv->soft_iface), msg, 0,
+                               BATADV_NL_MCGRP_CONFIG, GFP_KERNEL);
+
+       return 0;
+}
+
+/**
+ * batadv_netlink_get_vlan() - Get vlan attributes
+ * @skb: Netlink message with request data
+ * @info: receiver information
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
+static int batadv_netlink_get_vlan(struct sk_buff *skb, struct genl_info *info)
+{
+       struct batadv_softif_vlan *vlan = info->user_ptr[1];
+       struct batadv_priv *bat_priv = info->user_ptr[0];
+       struct sk_buff *msg;
+       int ret;
+
+       msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+       if (!msg)
+               return -ENOMEM;
+
+       ret = batadv_netlink_vlan_fill(msg, bat_priv, vlan, BATADV_CMD_GET_VLAN,
+                                      info->snd_portid, info->snd_seq, 0);
+       if (ret < 0) {
+               nlmsg_free(msg);
+               return ret;
+       }
+
+       ret = genlmsg_reply(msg, info);
+
+       return ret;
+}
+
+/**
+ * batadv_netlink_set_vlan() - Get vlan attributes
+ * @skb: Netlink message with request data
+ * @info: receiver information
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
+static int batadv_netlink_set_vlan(struct sk_buff *skb, struct genl_info *info)
+{
+       struct batadv_softif_vlan *vlan = info->user_ptr[1];
+       struct batadv_priv *bat_priv = info->user_ptr[0];
+       struct nlattr *attr;
+
+       if (info->attrs[BATADV_ATTR_AP_ISOLATION_ENABLED]) {
+               attr = info->attrs[BATADV_ATTR_AP_ISOLATION_ENABLED];
+
+               atomic_set(&vlan->ap_isolation, !!nla_get_u8(attr));
+       }
+
+       batadv_netlink_notify_vlan(bat_priv, vlan);
+
+       return 0;
+}
+
+/**
+ * batadv_get_softif_from_info() - Retrieve soft interface from genl attributes
+ * @net: the applicable net namespace
+ * @info: receiver information
+ *
+ * Return: Pointer to soft interface (with increased refcnt) on success, error
+ *  pointer on error
+ */
+static struct net_device *
+batadv_get_softif_from_info(struct net *net, struct genl_info *info)
+{
+       struct net_device *soft_iface;
+       int ifindex;
+
+       if (!info->attrs[BATADV_ATTR_MESH_IFINDEX])
+               return ERR_PTR(-EINVAL);
+
+       ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]);
+
+       soft_iface = dev_get_by_index(net, ifindex);
+       if (!soft_iface)
+               return ERR_PTR(-ENODEV);
+
+       if (!batadv_softif_is_valid(soft_iface))
+               goto err_put_softif;
+
+       return soft_iface;
+
+err_put_softif:
+       dev_put(soft_iface);
+
+       return ERR_PTR(-EINVAL);
+}
+
+/**
+ * batadv_get_hardif_from_info() - Retrieve hardif from genl attributes
+ * @bat_priv: the bat priv with all the soft interface information
+ * @net: the applicable net namespace
+ * @info: receiver information
+ *
+ * Return: Pointer to hard interface (with increased refcnt) on success, error
+ *  pointer on error
+ */
+static struct batadv_hard_iface *
+batadv_get_hardif_from_info(struct batadv_priv *bat_priv, struct net *net,
+                           struct genl_info *info)
+{
+       struct batadv_hard_iface *hard_iface;
+       struct net_device *hard_dev;
+       unsigned int hardif_index;
+
+       if (!info->attrs[BATADV_ATTR_HARD_IFINDEX])
+               return ERR_PTR(-EINVAL);
+
+       hardif_index = nla_get_u32(info->attrs[BATADV_ATTR_HARD_IFINDEX]);
+
+       hard_dev = dev_get_by_index(net, hardif_index);
+       if (!hard_dev)
+               return ERR_PTR(-ENODEV);
+
+       hard_iface = batadv_hardif_get_by_netdev(hard_dev);
+       if (!hard_iface)
+               goto err_put_harddev;
+
+       if (hard_iface->soft_iface != bat_priv->soft_iface)
+               goto err_put_hardif;
+
+       /* hard_dev is referenced by hard_iface and not needed here */
+       dev_put(hard_dev);
+
+       return hard_iface;
+
+err_put_hardif:
+       batadv_hardif_put(hard_iface);
+err_put_harddev:
+       dev_put(hard_dev);
+
+       return ERR_PTR(-EINVAL);
+}
+
+/**
+ * batadv_get_vlan_from_info() - Retrieve vlan from genl attributes
+ * @bat_priv: the bat priv with all the soft interface information
+ * @net: the applicable net namespace
+ * @info: receiver information
+ *
+ * Return: Pointer to vlan on success (with increased refcnt), error pointer
+ *  on error
+ */
+static struct batadv_softif_vlan *
+batadv_get_vlan_from_info(struct batadv_priv *bat_priv, struct net *net,
+                         struct genl_info *info)
+{
+       struct batadv_softif_vlan *vlan;
+       u16 vid;
+
+       if (!info->attrs[BATADV_ATTR_VLANID])
+               return ERR_PTR(-EINVAL);
+
+       vid = nla_get_u16(info->attrs[BATADV_ATTR_VLANID]);
+
+       vlan = batadv_softif_vlan_get(bat_priv, vid | BATADV_VLAN_HAS_TAG);
+       if (!vlan)
+               return ERR_PTR(-ENOENT);
+
+       return vlan;
+}
+
+/**
+ * batadv_pre_doit() - Prepare batman-adv genl doit request
+ * @ops: requested netlink operation
+ * @skb: Netlink message with request data
+ * @info: receiver information
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
+static int batadv_pre_doit(const struct genl_ops *ops, struct sk_buff *skb,
+                          struct genl_info *info)
+{
+       struct net *net = genl_info_net(info);
+       struct batadv_hard_iface *hard_iface;
+       struct batadv_priv *bat_priv = NULL;
+       struct batadv_softif_vlan *vlan;
+       struct net_device *soft_iface;
+       u8 user_ptr1_flags;
+       u8 mesh_dep_flags;
+       int ret;
+
+       user_ptr1_flags = BATADV_FLAG_NEED_HARDIF | BATADV_FLAG_NEED_VLAN;
+       if (WARN_ON(hweight8(ops->internal_flags & user_ptr1_flags) > 1))
+               return -EINVAL;
+
+       mesh_dep_flags = BATADV_FLAG_NEED_HARDIF | BATADV_FLAG_NEED_VLAN;
+       if (WARN_ON((ops->internal_flags & mesh_dep_flags) &&
+                   (~ops->internal_flags & BATADV_FLAG_NEED_MESH)))
+               return -EINVAL;
+
+       if (ops->internal_flags & BATADV_FLAG_NEED_MESH) {
+               soft_iface = batadv_get_softif_from_info(net, info);
+               if (IS_ERR(soft_iface))
+                       return PTR_ERR(soft_iface);
+
+               bat_priv = netdev_priv(soft_iface);
+               info->user_ptr[0] = bat_priv;
+       }
+
+       if (ops->internal_flags & BATADV_FLAG_NEED_HARDIF) {
+               hard_iface = batadv_get_hardif_from_info(bat_priv, net, info);
+               if (IS_ERR(hard_iface)) {
+                       ret = PTR_ERR(hard_iface);
+                       goto err_put_softif;
+               }
+
+               info->user_ptr[1] = hard_iface;
+       }
+
+       if (ops->internal_flags & BATADV_FLAG_NEED_VLAN) {
+               vlan = batadv_get_vlan_from_info(bat_priv, net, info);
+               if (IS_ERR(vlan)) {
+                       ret = PTR_ERR(vlan);
+                       goto err_put_softif;
+               }
+
+               info->user_ptr[1] = vlan;
+       }
+
+       return 0;
+
+err_put_softif:
+       if (bat_priv)
+               dev_put(bat_priv->soft_iface);
+
+       return ret;
+}
+
+/**
+ * batadv_post_doit() - End batman-adv genl doit request
+ * @ops: requested netlink operation
+ * @skb: Netlink message with request data
+ * @info: receiver information
+ */
+static void batadv_post_doit(const struct genl_ops *ops, struct sk_buff *skb,
+                            struct genl_info *info)
+{
+       struct batadv_hard_iface *hard_iface;
+       struct batadv_softif_vlan *vlan;
+       struct batadv_priv *bat_priv;
+
+       if (ops->internal_flags & BATADV_FLAG_NEED_HARDIF &&
+           info->user_ptr[1]) {
+               hard_iface = info->user_ptr[1];
+
+               batadv_hardif_put(hard_iface);
+       }
+
+       if (ops->internal_flags & BATADV_FLAG_NEED_VLAN && info->user_ptr[1]) {
+               vlan = info->user_ptr[1];
+               batadv_softif_vlan_put(vlan);
+       }
+
+       if (ops->internal_flags & BATADV_FLAG_NEED_MESH && info->user_ptr[0]) {
+               bat_priv = info->user_ptr[0];
+               dev_put(bat_priv->soft_iface);
+       }
+}
+
 static const struct genl_ops batadv_netlink_ops[] = {
        {
-               .cmd = BATADV_CMD_GET_MESH_INFO,
-               .flags = GENL_ADMIN_PERM,
+               .cmd = BATADV_CMD_GET_MESH,
+               /* can be retrieved by unprivileged users */
                .policy = batadv_netlink_policy,
-               .doit = batadv_netlink_get_mesh_info,
+               .doit = batadv_netlink_get_mesh,
+               .internal_flags = BATADV_FLAG_NEED_MESH,
        },
        {
                .cmd = BATADV_CMD_TP_METER,
                .flags = GENL_ADMIN_PERM,
                .policy = batadv_netlink_policy,
                .doit = batadv_netlink_tp_meter_start,
+               .internal_flags = BATADV_FLAG_NEED_MESH,
        },
        {
                .cmd = BATADV_CMD_TP_METER_CANCEL,
                .flags = GENL_ADMIN_PERM,
                .policy = batadv_netlink_policy,
                .doit = batadv_netlink_tp_meter_cancel,
+               .internal_flags = BATADV_FLAG_NEED_MESH,
        },
        {
                .cmd = BATADV_CMD_GET_ROUTING_ALGOS,
@@ -571,10 +1370,13 @@ static const struct genl_ops batadv_netlink_ops[] = {
                .dumpit = batadv_algo_dump,
        },
        {
-               .cmd = BATADV_CMD_GET_HARDIFS,
-               .flags = GENL_ADMIN_PERM,
+               .cmd = BATADV_CMD_GET_HARDIF,
+               /* can be retrieved by unprivileged users */
                .policy = batadv_netlink_policy,
-               .dumpit = batadv_netlink_dump_hardifs,
+               .dumpit = batadv_netlink_dump_hardif,
+               .doit = batadv_netlink_get_hardif,
+               .internal_flags = BATADV_FLAG_NEED_MESH |
+                                 BATADV_FLAG_NEED_HARDIF,
        },
        {
                .cmd = BATADV_CMD_GET_TRANSTABLE_LOCAL,
@@ -630,7 +1432,37 @@ static const struct genl_ops batadv_netlink_ops[] = {
                .policy = batadv_netlink_policy,
                .dumpit = batadv_mcast_flags_dump,
        },
-
+       {
+               .cmd = BATADV_CMD_SET_MESH,
+               .flags = GENL_ADMIN_PERM,
+               .policy = batadv_netlink_policy,
+               .doit = batadv_netlink_set_mesh,
+               .internal_flags = BATADV_FLAG_NEED_MESH,
+       },
+       {
+               .cmd = BATADV_CMD_SET_HARDIF,
+               .flags = GENL_ADMIN_PERM,
+               .policy = batadv_netlink_policy,
+               .doit = batadv_netlink_set_hardif,
+               .internal_flags = BATADV_FLAG_NEED_MESH |
+                                 BATADV_FLAG_NEED_HARDIF,
+       },
+       {
+               .cmd = BATADV_CMD_GET_VLAN,
+               /* can be retrieved by unprivileged users */
+               .policy = batadv_netlink_policy,
+               .doit = batadv_netlink_get_vlan,
+               .internal_flags = BATADV_FLAG_NEED_MESH |
+                                 BATADV_FLAG_NEED_VLAN,
+       },
+       {
+               .cmd = BATADV_CMD_SET_VLAN,
+               .flags = GENL_ADMIN_PERM,
+               .policy = batadv_netlink_policy,
+               .doit = batadv_netlink_set_vlan,
+               .internal_flags = BATADV_FLAG_NEED_MESH |
+                                 BATADV_FLAG_NEED_VLAN,
+       },
 };
 
 struct genl_family batadv_netlink_family __ro_after_init = {
@@ -639,6 +1471,8 @@ struct genl_family batadv_netlink_family __ro_after_init = {
        .version = 1,
        .maxattr = BATADV_ATTR_MAX,
        .netnsok = true,
+       .pre_doit = batadv_pre_doit,
+       .post_doit = batadv_post_doit,
        .module = THIS_MODULE,
        .ops = batadv_netlink_ops,
        .n_ops = ARRAY_SIZE(batadv_netlink_ops),
index 216484b..7273368 100644 (file)
@@ -34,6 +34,12 @@ int batadv_netlink_tpmeter_notify(struct batadv_priv *bat_priv, const u8 *dst,
                                  u8 result, u32 test_time, u64 total_bytes,
                                  u32 cookie);
 
+int batadv_netlink_notify_mesh(struct batadv_priv *bat_priv);
+int batadv_netlink_notify_hardif(struct batadv_priv *bat_priv,
+                                struct batadv_hard_iface *hard_iface);
+int batadv_netlink_notify_vlan(struct batadv_priv *bat_priv,
+                              struct batadv_softif_vlan *vlan);
+
 extern struct genl_family batadv_netlink_family;
 
 #endif /* _NET_BATMAN_ADV_NETLINK_H_ */
index b14fb34..2e36723 100644 (file)
 #include <linux/string.h>
 #include <linux/types.h>
 #include <uapi/linux/batadv_packet.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "bat_algo.h"
 #include "bridge_loop_avoidance.h"
 #include "debugfs.h"
 #include "distributed-arp-table.h"
 #include "gateway_client.h"
-#include "gateway_common.h"
 #include "hard-interface.h"
 #include "multicast.h"
 #include "network-coding.h"
@@ -222,12 +222,16 @@ static netdev_tx_t batadv_interface_tx(struct sk_buff *skb,
 
        netif_trans_update(soft_iface);
        vid = batadv_get_vid(skb, 0);
+
+       skb_reset_mac_header(skb);
        ethhdr = eth_hdr(skb);
 
        proto = ethhdr->h_proto;
 
        switch (ntohs(proto)) {
        case ETH_P_8021Q:
+               if (!pskb_may_pull(skb, sizeof(*vhdr)))
+                       goto dropped;
                vhdr = vlan_eth_hdr(skb);
                proto = vhdr->h_vlan_encapsulated_proto;
 
index e1b8162..0b4b3fb 100644 (file)
@@ -40,6 +40,7 @@
 #include <linux/stringify.h>
 #include <linux/workqueue.h>
 #include <uapi/linux/batadv_packet.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "bridge_loop_avoidance.h"
 #include "distributed-arp-table.h"
@@ -47,6 +48,7 @@
 #include "gateway_common.h"
 #include "hard-interface.h"
 #include "log.h"
+#include "netlink.h"
 #include "network-coding.h"
 #include "soft-interface.h"
 
@@ -153,9 +155,14 @@ ssize_t batadv_store_##_name(struct kobject *kobj,                 \
 {                                                                      \
        struct net_device *net_dev = batadv_kobj_to_netdev(kobj);       \
        struct batadv_priv *bat_priv = netdev_priv(net_dev);            \
+       ssize_t length;                                                 \
+                                                                       \
+       length = __batadv_store_bool_attr(buff, count, _post_func, attr,\
+                                         &bat_priv->_name, net_dev);   \
                                                                        \
-       return __batadv_store_bool_attr(buff, count, _post_func, attr,  \
-                                       &bat_priv->_name, net_dev);     \
+       batadv_netlink_notify_mesh(bat_priv);                           \
+                                                                       \
+       return length;                                                  \
 }
 
 #define BATADV_ATTR_SIF_SHOW_BOOL(_name)                               \
@@ -185,11 +192,16 @@ ssize_t batadv_store_##_name(struct kobject *kobj,                        \
 {                                                                      \
        struct net_device *net_dev = batadv_kobj_to_netdev(kobj);       \
        struct batadv_priv *bat_priv = netdev_priv(net_dev);            \
+       ssize_t length;                                                 \
                                                                        \
-       return __batadv_store_uint_attr(buff, count, _min, _max,        \
-                                       _post_func, attr,               \
-                                       &bat_priv->_var, net_dev,       \
-                                       NULL);  \
+       length = __batadv_store_uint_attr(buff, count, _min, _max,      \
+                                         _post_func, attr,             \
+                                         &bat_priv->_var, net_dev,     \
+                                         NULL);                        \
+                                                                       \
+       batadv_netlink_notify_mesh(bat_priv);                           \
+                                                                       \
+       return length;                                                  \
 }
 
 #define BATADV_ATTR_SIF_SHOW_UINT(_name, _var)                         \
@@ -222,6 +234,11 @@ ssize_t batadv_store_vlan_##_name(struct kobject *kobj,                    \
                                              attr, &vlan->_name,       \
                                              bat_priv->soft_iface);    \
                                                                        \
+       if (vlan->vid)                                                  \
+               batadv_netlink_notify_vlan(bat_priv, vlan);             \
+       else                                                            \
+               batadv_netlink_notify_mesh(bat_priv);                   \
+                                                                       \
        batadv_softif_vlan_put(vlan);                                   \
        return res;                                                     \
 }
@@ -255,6 +272,7 @@ ssize_t batadv_store_##_name(struct kobject *kobj,                  \
 {                                                                      \
        struct net_device *net_dev = batadv_kobj_to_netdev(kobj);       \
        struct batadv_hard_iface *hard_iface;                           \
+       struct batadv_priv *bat_priv;                                   \
        ssize_t length;                                                 \
                                                                        \
        hard_iface = batadv_hardif_get_by_netdev(net_dev);              \
@@ -267,6 +285,11 @@ ssize_t batadv_store_##_name(struct kobject *kobj,                 \
                                          hard_iface->soft_iface,       \
                                          net_dev);                     \
                                                                        \
+       if (hard_iface->soft_iface) {                                   \
+               bat_priv = netdev_priv(hard_iface->soft_iface);         \
+               batadv_netlink_notify_hardif(bat_priv, hard_iface);     \
+       }                                                               \
+                                                                       \
        batadv_hardif_put(hard_iface);                          \
        return length;                                                  \
 }
@@ -536,6 +559,9 @@ static ssize_t batadv_store_gw_mode(struct kobject *kobj,
        batadv_gw_check_client_stop(bat_priv);
        atomic_set(&bat_priv->gw.mode, (unsigned int)gw_mode_tmp);
        batadv_gw_tvlv_container_update(bat_priv);
+
+       batadv_netlink_notify_mesh(bat_priv);
+
        return count;
 }
 
@@ -562,6 +588,7 @@ static ssize_t batadv_store_gw_sel_class(struct kobject *kobj,
                                         size_t count)
 {
        struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj);
+       ssize_t length;
 
        /* setting the GW selection class is allowed only if the routing
         * algorithm in use implements the GW API
@@ -577,10 +604,14 @@ static ssize_t batadv_store_gw_sel_class(struct kobject *kobj,
                return bat_priv->algo_ops->gw.store_sel_class(bat_priv, buff,
                                                              count);
 
-       return __batadv_store_uint_attr(buff, count, 1, BATADV_TQ_MAX_VALUE,
-                                       batadv_post_gw_reselect, attr,
-                                       &bat_priv->gw.sel_class,
-                                       bat_priv->soft_iface, NULL);
+       length = __batadv_store_uint_attr(buff, count, 1, BATADV_TQ_MAX_VALUE,
+                                         batadv_post_gw_reselect, attr,
+                                         &bat_priv->gw.sel_class,
+                                         bat_priv->soft_iface, NULL);
+
+       batadv_netlink_notify_mesh(bat_priv);
+
+       return length;
 }
 
 static ssize_t batadv_show_gw_bwidth(struct kobject *kobj,
@@ -600,12 +631,18 @@ static ssize_t batadv_store_gw_bwidth(struct kobject *kobj,
                                      struct attribute *attr, char *buff,
                                      size_t count)
 {
+       struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj);
        struct net_device *net_dev = batadv_kobj_to_netdev(kobj);
+       ssize_t length;
 
        if (buff[count - 1] == '\n')
                buff[count - 1] = '\0';
 
-       return batadv_gw_bandwidth_set(net_dev, buff, count);
+       length = batadv_gw_bandwidth_set(net_dev, buff, count);
+
+       batadv_netlink_notify_mesh(bat_priv);
+
+       return length;
 }
 
 /**
@@ -673,6 +710,8 @@ static ssize_t batadv_store_isolation_mark(struct kobject *kobj,
                    "New skb mark for extended isolation: %#.8x/%#.8x\n",
                    bat_priv->isolation_mark, bat_priv->isolation_mark_mask);
 
+       batadv_netlink_notify_mesh(bat_priv);
+
        return count;
 }
 
@@ -1077,6 +1116,7 @@ static ssize_t batadv_store_throughput_override(struct kobject *kobj,
                                                struct attribute *attr,
                                                char *buff, size_t count)
 {
+       struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj);
        struct net_device *net_dev = batadv_kobj_to_netdev(kobj);
        struct batadv_hard_iface *hard_iface;
        u32 tp_override;
@@ -1107,6 +1147,8 @@ static ssize_t batadv_store_throughput_override(struct kobject *kobj,
 
        atomic_set(&hard_iface->bat_v.throughput_override, tp_override);
 
+       batadv_netlink_notify_hardif(bat_priv, hard_iface);
+
 out:
        batadv_hardif_put(hard_iface);
        return count;
index 1fb885a..4a048fd 100644 (file)
@@ -1018,8 +1018,7 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
                if (!nsrcs)
                        return -EINVAL;
 
-               grec_len = sizeof(*grec) +
-                          sizeof(struct in6_addr) * ntohs(*nsrcs);
+               grec_len = struct_size(grec, grec_src, ntohs(*nsrcs));
 
                if (!ipv6_mc_may_pull(skb, len + grec_len))
                        return -EINVAL;
index 7fbdba5..1d7502a 100644 (file)
@@ -116,6 +116,8 @@ static struct devlink *devlink_get_from_attrs(struct net *net,
        busname = nla_data(attrs[DEVLINK_ATTR_BUS_NAME]);
        devname = nla_data(attrs[DEVLINK_ATTR_DEV_NAME]);
 
+       lockdep_assert_held(&devlink_mutex);
+
        list_for_each_entry(devlink, &devlink_list, list) {
                if (strcmp(devlink->dev->bus->name, busname) == 0 &&
                    strcmp(dev_name(devlink->dev), devname) == 0 &&
@@ -2701,11 +2703,6 @@ static const struct devlink_param devlink_param_generic[] = {
                .name = DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_NAME,
                .type = DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_TYPE,
        },
-       {
-               .id = DEVLINK_PARAM_GENERIC_ID_WOL,
-               .name = DEVLINK_PARAM_GENERIC_WOL_NAME,
-               .type = DEVLINK_PARAM_GENERIC_WOL_TYPE,
-       },
 };
 
 static int devlink_param_generic_verify(const struct devlink_param *param)
@@ -2858,6 +2855,7 @@ static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink,
                                 u32 portid, u32 seq, int flags)
 {
        union devlink_param_value param_value[DEVLINK_PARAM_CMODE_MAX + 1];
+       bool param_value_set[DEVLINK_PARAM_CMODE_MAX + 1] = {};
        const struct devlink_param *param = param_item->param;
        struct devlink_param_gset_ctx ctx;
        struct nlattr *param_values_list;
@@ -2876,12 +2874,15 @@ static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink,
                                return -EOPNOTSUPP;
                        param_value[i] = param_item->driverinit_value;
                } else {
+                       if (!param_item->published)
+                               continue;
                        ctx.cmode = i;
                        err = devlink_param_get(devlink, param, &ctx);
                        if (err)
                                return err;
                        param_value[i] = ctx.val;
                }
+               param_value_set[i] = true;
        }
 
        hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
@@ -2916,7 +2917,7 @@ static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink,
                goto param_nest_cancel;
 
        for (i = 0; i <= DEVLINK_PARAM_CMODE_MAX; i++) {
-               if (!devlink_param_cmode_is_supported(param, i))
+               if (!param_value_set[i])
                        continue;
                err = devlink_nl_param_value_fill_one(msg, param->type,
                                                      i, param_value[i]);
@@ -3625,44 +3626,56 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
                                             struct netlink_callback *cb)
 {
        u64 ret_offset, start_offset, end_offset = 0;
-       struct nlattr *attrs[DEVLINK_ATTR_MAX + 1];
        const struct genl_ops *ops = cb->data;
        struct devlink_region *region;
        struct nlattr *chunks_attr;
        const char *region_name;
        struct devlink *devlink;
+       struct nlattr **attrs;
        bool dump = true;
        void *hdr;
        int err;
 
        start_offset = *((u64 *)&cb->args[0]);
 
+       attrs = kmalloc_array(DEVLINK_ATTR_MAX + 1, sizeof(*attrs), GFP_KERNEL);
+       if (!attrs)
+               return -ENOMEM;
+
        err = nlmsg_parse(cb->nlh, GENL_HDRLEN + devlink_nl_family.hdrsize,
                          attrs, DEVLINK_ATTR_MAX, ops->policy, cb->extack);
        if (err)
-               goto out;
+               goto out_free;
 
+       mutex_lock(&devlink_mutex);
        devlink = devlink_get_from_attrs(sock_net(cb->skb->sk), attrs);
-       if (IS_ERR(devlink))
-               goto out;
+       if (IS_ERR(devlink)) {
+               err = PTR_ERR(devlink);
+               goto out_dev;
+       }
 
-       mutex_lock(&devlink_mutex);
        mutex_lock(&devlink->lock);
 
        if (!attrs[DEVLINK_ATTR_REGION_NAME] ||
-           !attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID])
+           !attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]) {
+               err = -EINVAL;
                goto out_unlock;
+       }
 
        region_name = nla_data(attrs[DEVLINK_ATTR_REGION_NAME]);
        region = devlink_region_get_by_name(devlink, region_name);
-       if (!region)
+       if (!region) {
+               err = -EINVAL;
                goto out_unlock;
+       }
 
        hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
                          &devlink_nl_family, NLM_F_ACK | NLM_F_MULTI,
                          DEVLINK_CMD_REGION_READ);
-       if (!hdr)
+       if (!hdr) {
+               err = -EMSGSIZE;
                goto out_unlock;
+       }
 
        err = devlink_nl_put_handle(skb, devlink);
        if (err)
@@ -3673,8 +3686,10 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
                goto nla_put_failure;
 
        chunks_attr = nla_nest_start(skb, DEVLINK_ATTR_REGION_CHUNKS);
-       if (!chunks_attr)
+       if (!chunks_attr) {
+               err = -EMSGSIZE;
                goto nla_put_failure;
+       }
 
        if (attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR] &&
            attrs[DEVLINK_ATTR_REGION_CHUNK_LEN]) {
@@ -3697,8 +3712,10 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
                goto nla_put_failure;
 
        /* Check if there was any progress done to prevent infinite loop */
-       if (ret_offset == start_offset)
+       if (ret_offset == start_offset) {
+               err = -EINVAL;
                goto nla_put_failure;
+       }
 
        *((u64 *)&cb->args[0]) = ret_offset;
 
@@ -3706,6 +3723,7 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
        genlmsg_end(skb, hdr);
        mutex_unlock(&devlink->lock);
        mutex_unlock(&devlink_mutex);
+       kfree(attrs);
 
        return skb->len;
 
@@ -3713,9 +3731,11 @@ nla_put_failure:
        genlmsg_cancel(skb, hdr);
 out_unlock:
        mutex_unlock(&devlink->lock);
+out_dev:
        mutex_unlock(&devlink_mutex);
-out:
-       return 0;
+out_free:
+       kfree(attrs);
+       return err;
 }
 
 struct devlink_info_req {
@@ -4351,11 +4371,8 @@ static int devlink_fmsg_snd(struct devlink_fmsg *fmsg,
                err = -EMSGSIZE;
                goto nla_put_failure;
        }
-       err = genlmsg_reply(skb, info);
-       if (err)
-               return err;
 
-       return 0;
+       return genlmsg_reply(skb, info);
 
 nla_put_failure:
        nlmsg_free(skb);
@@ -5237,6 +5254,14 @@ EXPORT_SYMBOL_GPL(devlink_unregister);
  */
 void devlink_free(struct devlink *devlink)
 {
+       WARN_ON(!list_empty(&devlink->reporter_list));
+       WARN_ON(!list_empty(&devlink->region_list));
+       WARN_ON(!list_empty(&devlink->param_list));
+       WARN_ON(!list_empty(&devlink->resource_list));
+       WARN_ON(!list_empty(&devlink->dpipe_table_list));
+       WARN_ON(!list_empty(&devlink->sb_list));
+       WARN_ON(!list_empty(&devlink->port_list));
+
        kfree(devlink);
 }
 EXPORT_SYMBOL_GPL(devlink_free);
@@ -5887,6 +5912,48 @@ void devlink_params_unregister(struct devlink *devlink,
 EXPORT_SYMBOL_GPL(devlink_params_unregister);
 
 /**
+ *     devlink_params_publish - publish configuration parameters
+ *
+ *     @devlink: devlink
+ *
+ *     Publish previously registered configuration parameters.
+ */
+void devlink_params_publish(struct devlink *devlink)
+{
+       struct devlink_param_item *param_item;
+
+       list_for_each_entry(param_item, &devlink->param_list, list) {
+               if (param_item->published)
+                       continue;
+               param_item->published = true;
+               devlink_param_notify(devlink, 0, param_item,
+                                    DEVLINK_CMD_PARAM_NEW);
+       }
+}
+EXPORT_SYMBOL_GPL(devlink_params_publish);
+
+/**
+ *     devlink_params_unpublish - unpublish configuration parameters
+ *
+ *     @devlink: devlink
+ *
+ *     Unpublish previously registered configuration parameters.
+ */
+void devlink_params_unpublish(struct devlink *devlink)
+{
+       struct devlink_param_item *param_item;
+
+       list_for_each_entry(param_item, &devlink->param_list, list) {
+               if (!param_item->published)
+                       continue;
+               param_item->published = false;
+               devlink_param_notify(devlink, 0, param_item,
+                                    DEVLINK_CMD_PARAM_DEL);
+       }
+}
+EXPORT_SYMBOL_GPL(devlink_params_unpublish);
+
+/**
  *     devlink_port_params_register - register port configuration parameters
  *
  *     @devlink_port: devlink port
@@ -6339,7 +6406,7 @@ void devlink_compat_running_version(struct net_device *dev,
        list_for_each_entry(devlink, &devlink_list, list) {
                mutex_lock(&devlink->lock);
                list_for_each_entry(devlink_port, &devlink->port_list, list) {
-                       if (devlink_port->type == DEVLINK_PORT_TYPE_ETH ||
+                       if (devlink_port->type == DEVLINK_PORT_TYPE_ETH &&
                            devlink_port->type_dev == dev) {
                                __devlink_compat_running_version(devlink,
                                                                 buf, len);
index 0fbf392..d2c47cd 100644 (file)
@@ -3020,17 +3020,15 @@ ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input)
                const struct ethtool_flow_ext *ext_h_spec = &fs->h_ext;
                const struct ethtool_flow_ext *ext_m_spec = &fs->m_ext;
 
-               if (ext_m_spec->h_dest) {
-                       memcpy(match->key.eth_addrs.dst, ext_h_spec->h_dest,
-                              ETH_ALEN);
-                       memcpy(match->mask.eth_addrs.dst, ext_m_spec->h_dest,
-                              ETH_ALEN);
-
-                       match->dissector.used_keys |=
-                               BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS);
-                       match->dissector.offset[FLOW_DISSECTOR_KEY_ETH_ADDRS] =
-                               offsetof(struct ethtool_rx_flow_key, eth_addrs);
-               }
+               memcpy(match->key.eth_addrs.dst, ext_h_spec->h_dest,
+                      ETH_ALEN);
+               memcpy(match->mask.eth_addrs.dst, ext_m_spec->h_dest,
+                      ETH_ALEN);
+
+               match->dissector.used_keys |=
+                       BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS);
+               match->dissector.offset[FLOW_DISSECTOR_KEY_ETH_ADDRS] =
+                       offsetof(struct ethtool_rx_flow_key, eth_addrs);
        }
 
        act = &flow->rule->action.entries[0];
index a78deb2..b584cb4 100644 (file)
@@ -4127,10 +4127,12 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
                /* Only some socketops are supported */
                switch (optname) {
                case SO_RCVBUF:
+                       val = min_t(u32, val, sysctl_rmem_max);
                        sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
                        sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF);
                        break;
                case SO_SNDBUF:
+                       val = min_t(u32, val, sysctl_wmem_max);
                        sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
                        sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);
                        break;
index 43a932c..5b2252c 100644 (file)
@@ -136,17 +136,19 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
        if (!(pool->p.flags & PP_FLAG_DMA_MAP))
                goto skip_dma_map;
 
-       /* Setup DMA mapping: use page->private for DMA-addr
+       /* Setup DMA mapping: use 'struct page' area for storing DMA-addr
+        * since dma_addr_t can be either 32 or 64 bits and does not always fit
+        * into page private data (i.e 32bit cpu with 64bit DMA caps)
         * This mapping is kept for lifetime of page, until leaving pool.
         */
-       dma = dma_map_page(pool->p.dev, page, 0,
-                          (PAGE_SIZE << pool->p.order),
-                          pool->p.dma_dir);
+       dma = dma_map_page_attrs(pool->p.dev, page, 0,
+                                (PAGE_SIZE << pool->p.order),
+                                pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC);
        if (dma_mapping_error(pool->p.dev, dma)) {
                put_page(page);
                return NULL;
        }
-       set_page_private(page, dma); /* page->private = dma; */
+       page->dma_addr = dma;
 
 skip_dma_map:
        /* When page just alloc'ed is should/must have refcnt 1. */
@@ -175,13 +177,17 @@ EXPORT_SYMBOL(page_pool_alloc_pages);
 static void __page_pool_clean_page(struct page_pool *pool,
                                   struct page *page)
 {
+       dma_addr_t dma;
+
        if (!(pool->p.flags & PP_FLAG_DMA_MAP))
                return;
 
+       dma = page->dma_addr;
        /* DMA unmap */
-       dma_unmap_page(pool->p.dev, page_private(page),
-                      PAGE_SIZE << pool->p.order, pool->p.dma_dir);
-       set_page_private(page, 0);
+       dma_unmap_page_attrs(pool->p.dev, dma,
+                            PAGE_SIZE << pool->p.order, pool->p.dma_dir,
+                            DMA_ATTR_SKIP_CPU_SYNC);
+       page->dma_addr = 0;
 }
 
 /* Return a page to the page allocator, cleaning up our state */
index e76ed8d..ae6f06e 100644 (file)
@@ -554,8 +554,7 @@ static void sk_psock_destroy_deferred(struct work_struct *gc)
        struct sk_psock *psock = container_of(gc, struct sk_psock, gc);
 
        /* No sk_callback_lock since already detached. */
-       if (psock->parser.enabled)
-               strp_done(&psock->parser.strp);
+       strp_done(&psock->parser.strp);
 
        cancel_work_sync(&psock->work);
 
index 71ded4d..f4b8b78 100644 (file)
@@ -785,6 +785,10 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
                 */
                val = min_t(u32, val, sysctl_wmem_max);
 set_sndbuf:
+               /* Ensure val * 2 fits into an int, to prevent max_t()
+                * from treating it as a negative value.
+                */
+               val = min_t(int, val, INT_MAX / 2);
                sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
                sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);
                /* Wake up sending tasks if we upped the value. */
@@ -796,6 +800,12 @@ set_sndbuf:
                        ret = -EPERM;
                        break;
                }
+
+               /* No negative values (to prevent underflow, as val will be
+                * multiplied by 2).
+                */
+               if (val < 0)
+                       val = 0;
                goto set_sndbuf;
 
        case SO_RCVBUF:
@@ -806,6 +816,10 @@ set_sndbuf:
                 */
                val = min_t(u32, val, sysctl_rmem_max);
 set_rcvbuf:
+               /* Ensure val * 2 fits into an int, to prevent max_t()
+                * from treating it as a negative value.
+                */
+               val = min_t(int, val, INT_MAX / 2);
                sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
                /*
                 * We double it on the way in to account for
@@ -830,6 +844,12 @@ set_rcvbuf:
                        ret = -EPERM;
                        break;
                }
+
+               /* No negative values (to prevent underflow, as val will be
+                * multiplied by 2).
+                */
+               if (val < 0)
+                       val = 0;
                goto set_rcvbuf;
 
        case SO_KEEPALIVE:
@@ -2475,7 +2495,7 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
        }
 
        if (sk_has_memory_pressure(sk)) {
-               int alloc;
+               u64 alloc;
 
                if (!sk_under_memory_pressure(sk))
                        return 1;
index 6eb837a..baaaeb2 100644 (file)
@@ -202,7 +202,7 @@ static inline void ccid_hc_tx_packet_recv(struct ccid *ccid, struct sock *sk,
 static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk,
                                           u8 pkt, u8 opt, u8 *val, u8 len)
 {
-       if (ccid->ccid_ops->ccid_hc_tx_parse_options == NULL)
+       if (!ccid || !ccid->ccid_ops->ccid_hc_tx_parse_options)
                return 0;
        return ccid->ccid_ops->ccid_hc_tx_parse_options(sk, pkt, opt, val, len);
 }
@@ -214,7 +214,7 @@ static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk,
 static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk,
                                           u8 pkt, u8 opt, u8 *val, u8 len)
 {
-       if (ccid->ccid_ops->ccid_hc_rx_parse_options == NULL)
+       if (!ccid || !ccid->ccid_ops->ccid_hc_rx_parse_options)
                return 0;
        return ccid->ccid_ops->ccid_hc_rx_parse_options(sk, pkt, opt, val, len);
 }
index a191702..8c431e0 100644 (file)
@@ -767,11 +767,10 @@ static int dsa_switch_probe(struct dsa_switch *ds)
 
 struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n)
 {
-       size_t size = sizeof(struct dsa_switch) + n * sizeof(struct dsa_port);
        struct dsa_switch *ds;
        int i;
 
-       ds = devm_kzalloc(dev, size, GFP_KERNEL);
+       ds = devm_kzalloc(dev, struct_size(ds, ports, n), GFP_KERNEL);
        if (!ds)
                return NULL;
 
index 79e97d2..c58f339 100644 (file)
@@ -248,6 +248,8 @@ static void dsa_master_reset_mtu(struct net_device *dev)
        rtnl_unlock();
 }
 
+static struct lock_class_key dsa_master_addr_list_lock_key;
+
 int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)
 {
        int ret;
@@ -261,6 +263,8 @@ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)
        wmb();
 
        dev->dsa_ptr = cpu_dp;
+       lockdep_set_class(&dev->addr_list_lock,
+                         &dsa_master_addr_list_lock_key);
 
        ret = dsa_master_ethtool_setup(dev);
        if (ret)
index 70395a0..2e5e7c0 100644 (file)
@@ -140,11 +140,14 @@ static int dsa_slave_close(struct net_device *dev)
 static void dsa_slave_change_rx_flags(struct net_device *dev, int change)
 {
        struct net_device *master = dsa_slave_to_master(dev);
-
-       if (change & IFF_ALLMULTI)
-               dev_set_allmulti(master, dev->flags & IFF_ALLMULTI ? 1 : -1);
-       if (change & IFF_PROMISC)
-               dev_set_promiscuity(master, dev->flags & IFF_PROMISC ? 1 : -1);
+       if (dev->flags & IFF_UP) {
+               if (change & IFF_ALLMULTI)
+                       dev_set_allmulti(master,
+                                        dev->flags & IFF_ALLMULTI ? 1 : -1);
+               if (change & IFF_PROMISC)
+                       dev_set_promiscuity(master,
+                                           dev->flags & IFF_PROMISC ? 1 : -1);
+       }
 }
 
 static void dsa_slave_set_rx_mode(struct net_device *dev)
@@ -644,7 +647,7 @@ static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e)
        int ret;
 
        /* Port's PHY and MAC both need to be EEE capable */
-       if (!dev->phydev && !dp->pl)
+       if (!dev->phydev || !dp->pl)
                return -ENODEV;
 
        if (!ds->ops->set_mac_eee)
@@ -664,7 +667,7 @@ static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e)
        int ret;
 
        /* Port's PHY and MAC both need to be EEE capable */
-       if (!dev->phydev && !dp->pl)
+       if (!dev->phydev || !dp->pl)
                return -ENODEV;
 
        if (!ds->ops->get_mac_eee)
index da71b9e..927e9c8 100644 (file)
@@ -67,6 +67,8 @@ static struct sk_buff *ksz_common_rcv(struct sk_buff *skb,
 
        pskb_trim_rcsum(skb, skb->len - len);
 
+       skb->offload_fwd_mark = true;
+
        return skb;
 }
 
index 1a4e9ff..5731670 100644 (file)
@@ -108,6 +108,7 @@ static size_t inet_sk_attr_size(struct sock *sk,
                + nla_total_size(1) /* INET_DIAG_TOS */
                + nla_total_size(1) /* INET_DIAG_TCLASS */
                + nla_total_size(4) /* INET_DIAG_MARK */
+               + nla_total_size(4) /* INET_DIAG_CLASS_ID */
                + nla_total_size(sizeof(struct inet_diag_meminfo))
                + nla_total_size(sizeof(struct inet_diag_msg))
                + nla_total_size(SK_MEMINFO_VARS * sizeof(u32))
@@ -287,12 +288,19 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
                        goto errout;
        }
 
-       if (ext & (1 << (INET_DIAG_CLASS_ID - 1))) {
+       if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) ||
+           ext & (1 << (INET_DIAG_TCLASS - 1))) {
                u32 classid = 0;
 
 #ifdef CONFIG_SOCK_CGROUP_DATA
                classid = sock_cgroup_classid(&sk->sk_cgrp_data);
 #endif
+               /* Fallback to socket priority if class id isn't set.
+                * Classful qdiscs use it as direct reference to class.
+                * For cgroup2 classid is always zero.
+                */
+               if (!classid)
+                       classid = sk->sk_priority;
 
                if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid))
                        goto errout;
index d757b96..be77859 100644 (file)
@@ -216,6 +216,7 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base,
                        atomic_set(&p->rid, 0);
                        p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
                        p->rate_tokens = 0;
+                       p->n_redirects = 0;
                        /* 60*HZ is arbitrary, but chosen enough high so that the first
                         * calculation of tokens is at its maximum.
                         */
index d1cef66..ccee941 100644 (file)
@@ -1381,12 +1381,17 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
 {
        struct ip_tunnel *t = netdev_priv(dev);
        struct ip_tunnel_parm *p = &t->parms;
+       __be16 o_flags = p->o_flags;
+
+       if ((t->erspan_ver == 1 || t->erspan_ver == 2) &&
+           !t->collect_md)
+               o_flags |= TUNNEL_KEY;
 
        if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
            nla_put_be16(skb, IFLA_GRE_IFLAGS,
                         gre_tnl_flags_to_gre_flags(p->i_flags)) ||
            nla_put_be16(skb, IFLA_GRE_OFLAGS,
-                        gre_tnl_flags_to_gre_flags(p->o_flags)) ||
+                        gre_tnl_flags_to_gre_flags(o_flags)) ||
            nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
            nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
            nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
index e26165a..4b07eb8 100644 (file)
@@ -215,6 +215,7 @@ int nf_nat_icmp_reply_translation(struct sk_buff *skb,
 
        /* Change outer to look like the reply to an incoming packet */
        nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
+       target.dst.protonum = IPPROTO_ICMP;
        if (!nf_nat_ipv4_manip_pkt(skb, 0, &target, manip))
                return 0;
 
index a0aa13b..0a8a60c 100644 (file)
@@ -105,6 +105,8 @@ static void fast_csum(struct snmp_ctx *ctx, unsigned char offset)
 int snmp_version(void *context, size_t hdrlen, unsigned char tag,
                 const void *data, size_t datalen)
 {
+       if (datalen != 1)
+               return -EINVAL;
        if (*(unsigned char *)data > 1)
                return -ENOTSUPP;
        return 1;
@@ -114,8 +116,11 @@ int snmp_helper(void *context, size_t hdrlen, unsigned char tag,
                const void *data, size_t datalen)
 {
        struct snmp_ctx *ctx = (struct snmp_ctx *)context;
-       __be32 *pdata = (__be32 *)data;
+       __be32 *pdata;
 
+       if (datalen != 4)
+               return -EINVAL;
+       pdata = (__be32 *)data;
        if (*pdata == ctx->from) {
                pr_debug("%s: %pI4 to %pI4\n", __func__,
                         (void *)&ctx->from, (void *)&ctx->to);
index 16259ea..ecc12a7 100644 (file)
@@ -887,13 +887,15 @@ void ip_rt_send_redirect(struct sk_buff *skb)
        /* No redirected packets during ip_rt_redirect_silence;
         * reset the algorithm.
         */
-       if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence))
+       if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence)) {
                peer->rate_tokens = 0;
+               peer->n_redirects = 0;
+       }
 
        /* Too many ignored redirects; do not send anything
         * set dst.rate_last to the last seen redirected packet.
         */
-       if (peer->rate_tokens >= ip_rt_redirect_number) {
+       if (peer->n_redirects >= ip_rt_redirect_number) {
                peer->rate_last = jiffies;
                goto out_put_peer;
        }
@@ -910,6 +912,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
                icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw);
                peer->rate_last = jiffies;
                ++peer->rate_tokens;
+               ++peer->n_redirects;
 #ifdef CONFIG_IP_ROUTE_VERBOSE
                if (log_martians &&
                    peer->rate_tokens == ip_rt_redirect_number)
index dcb1d43..da5a210 100644 (file)
@@ -1200,7 +1200,8 @@ check_cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long *expires)
        list_for_each_entry(ifa, &idev->addr_list, if_list) {
                if (ifa == ifp)
                        continue;
-               if (!ipv6_prefix_equal(&ifa->addr, &ifp->addr,
+               if (ifa->prefix_len != ifp->prefix_len ||
+                   !ipv6_prefix_equal(&ifa->addr, &ifp->addr,
                                       ifp->prefix_len))
                        continue;
                if (ifa->flags & (IFA_F_PERMANENT | IFA_F_NOPREFIXROUTE))
index e081e69..65a4f96 100644 (file)
@@ -2098,12 +2098,17 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
 {
        struct ip6_tnl *t = netdev_priv(dev);
        struct __ip6_tnl_parm *p = &t->parms;
+       __be16 o_flags = p->o_flags;
+
+       if ((p->erspan_ver == 1 || p->erspan_ver == 2) &&
+           !p->collect_md)
+               o_flags |= TUNNEL_KEY;
 
        if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
            nla_put_be16(skb, IFLA_GRE_IFLAGS,
                         gre_tnl_flags_to_gre_flags(p->i_flags)) ||
            nla_put_be16(skb, IFLA_GRE_OFLAGS,
-                        gre_tnl_flags_to_gre_flags(p->o_flags)) ||
+                        gre_tnl_flags_to_gre_flags(o_flags)) ||
            nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
            nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
            nla_put_in6_addr(skb, IFLA_GRE_LOCAL, &p->laddr) ||
index 8b075f0..6d0b1f3 100644 (file)
@@ -23,9 +23,11 @@ int ip6_route_me_harder(struct net *net, struct sk_buff *skb)
        struct sock *sk = sk_to_full_sk(skb->sk);
        unsigned int hh_len;
        struct dst_entry *dst;
+       int strict = (ipv6_addr_type(&iph->daddr) &
+                     (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
        struct flowi6 fl6 = {
                .flowi6_oif = sk && sk->sk_bound_dev_if ? sk->sk_bound_dev_if :
-                       rt6_need_strict(&iph->daddr) ? skb_dst(skb)->dev->ifindex : 0,
+                       strict ? skb_dst(skb)->dev->ifindex : 0,
                .flowi6_mark = skb->mark,
                .flowi6_uid = sock_net_uid(net, sk),
                .daddr = iph->daddr,
index 9c914db..f0ec319 100644 (file)
@@ -226,6 +226,7 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
        }
 
        nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
+       target.dst.protonum = IPPROTO_ICMPV6;
        if (!nf_nat_ipv6_manip_pkt(skb, 0, &target, manip))
                return 0;
 
index dc066fd..87a0561 100644 (file)
@@ -2277,14 +2277,8 @@ static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
 
 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
 {
-       bool from_set;
-
-       rcu_read_lock();
-       from_set = !!rcu_dereference(rt->from);
-       rcu_read_unlock();
-
        return !(rt->rt6i_flags & RTF_CACHE) &&
-               (rt->rt6i_flags & RTF_PCPU || from_set);
+               (rt->rt6i_flags & RTF_PCPU || rcu_access_pointer(rt->from));
 }
 
 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
index 8d0ba75..9b2f272 100644 (file)
@@ -221,9 +221,7 @@ static int seg6_genl_get_tunsrc(struct sk_buff *skb, struct genl_info *info)
        rcu_read_unlock();
 
        genlmsg_end(msg, hdr);
-       genlmsg_reply(msg, info);
-
-       return 0;
+       return genlmsg_reply(msg, info);
 
 nla_put_failure:
        rcu_read_unlock();
index 8181ee7..ee5403c 100644 (file)
@@ -146,6 +146,8 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
        } else {
                ip6_flow_hdr(hdr, 0, flowlabel);
                hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb));
+
+               memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
        }
 
        hdr->nexthdr = NEXTHDR_ROUTING;
index 1e03305..e8a1dab 100644 (file)
@@ -546,7 +546,8 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
        }
 
        err = 0;
-       if (!ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4, type, data_len))
+       if (__in6_dev_get(skb->dev) &&
+           !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4, type, data_len))
                goto out;
 
        if (t->parms.iph.daddr == 0)
index 26f1d43..fed6bec 100644 (file)
@@ -83,8 +83,7 @@
 #define L2TP_SLFLAG_S     0x40000000
 #define L2TP_SL_SEQ_MASK   0x00ffffff
 
-#define L2TP_HDR_SIZE_SEQ              10
-#define L2TP_HDR_SIZE_NOSEQ            6
+#define L2TP_HDR_SIZE_MAX              14
 
 /* Default trace flags */
 #define L2TP_DEFAULT_DEBUG_FLAGS       0
@@ -808,7 +807,7 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
        __skb_pull(skb, sizeof(struct udphdr));
 
        /* Short packet? */
-       if (!pskb_may_pull(skb, L2TP_HDR_SIZE_SEQ)) {
+       if (!pskb_may_pull(skb, L2TP_HDR_SIZE_MAX)) {
                l2tp_info(tunnel, L2TP_MSG_DATA,
                          "%s: recv short packet (len=%d)\n",
                          tunnel->name, skb->len);
@@ -884,6 +883,10 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
                goto error;
        }
 
+       if (tunnel->version == L2TP_HDR_VER_3 &&
+           l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr))
+               goto error;
+
        l2tp_recv_common(session, skb, ptr, optr, hdrflags, length);
        l2tp_session_dec_refcount(session);
 
index 9c9afe9..b2ce902 100644 (file)
@@ -301,6 +301,26 @@ static inline bool l2tp_tunnel_uses_xfrm(const struct l2tp_tunnel *tunnel)
 }
 #endif
 
+static inline int l2tp_v3_ensure_opt_in_linear(struct l2tp_session *session, struct sk_buff *skb,
+                                              unsigned char **ptr, unsigned char **optr)
+{
+       int opt_len = session->peer_cookie_len + l2tp_get_l2specific_len(session);
+
+       if (opt_len > 0) {
+               int off = *ptr - *optr;
+
+               if (!pskb_may_pull(skb, off + opt_len))
+                       return -1;
+
+               if (skb->data != *optr) {
+                       *optr = skb->data;
+                       *ptr = skb->data + off;
+               }
+       }
+
+       return 0;
+}
+
 #define l2tp_printk(ptr, type, func, fmt, ...)                         \
 do {                                                                   \
        if (((ptr)->debug) & (type))                                    \
index 35f6f86..d4c6052 100644 (file)
@@ -165,6 +165,9 @@ static int l2tp_ip_recv(struct sk_buff *skb)
                print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length);
        }
 
+       if (l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr))
+               goto discard_sess;
+
        l2tp_recv_common(session, skb, ptr, optr, 0, skb->len);
        l2tp_session_dec_refcount(session);
 
index 237f1a4..0ae6899 100644 (file)
@@ -178,6 +178,9 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
                print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length);
        }
 
+       if (l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr))
+               goto discard_sess;
+
        l2tp_recv_common(session, skb, ptr, optr, 0, skb->len);
        l2tp_session_dec_refcount(session);
 
index e94b1a0..2c4cd41 100644 (file)
@@ -8,7 +8,7 @@
  * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
  * Copyright 2007-2010, Intel Corporation
  * Copyright(c) 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018 Intel Corporation
+ * Copyright (C) 2018 - 2019 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -366,6 +366,8 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
 
        set_bit(HT_AGG_STATE_STOPPING, &tid_tx->state);
 
+       ieee80211_agg_stop_txq(sta, tid);
+
        spin_unlock_bh(&sta->lock);
 
        ht_dbg(sta->sdata, "Tx BA session stop requested for %pM tid %u\n",
index 61c7ea9..8a49a74 100644 (file)
@@ -1945,9 +1945,16 @@ static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata,
                                int head_need, bool may_encrypt)
 {
        struct ieee80211_local *local = sdata->local;
+       struct ieee80211_hdr *hdr;
+       bool enc_tailroom;
        int tail_need = 0;
 
-       if (may_encrypt && sdata->crypto_tx_tailroom_needed_cnt) {
+       hdr = (struct ieee80211_hdr *) skb->data;
+       enc_tailroom = may_encrypt &&
+                      (sdata->crypto_tx_tailroom_needed_cnt ||
+                       ieee80211_is_mgmt(hdr->frame_control));
+
+       if (enc_tailroom) {
                tail_need = IEEE80211_ENCRYPT_TAILROOM;
                tail_need -= skb_tailroom(skb);
                tail_need = max_t(int, tail_need, 0);
@@ -1955,8 +1962,7 @@ static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata,
 
        if (skb_cloned(skb) &&
            (!ieee80211_hw_check(&local->hw, SUPPORTS_CLONED_SKBS) ||
-            !skb_clone_writable(skb, ETH_HLEN) ||
-            (may_encrypt && sdata->crypto_tx_tailroom_needed_cnt)))
+            !skb_clone_writable(skb, ETH_HLEN) || enc_tailroom))
                I802_DEBUG_INC(local->tx_expand_skb_head_cloned);
        else if (head_need || tail_need)
                I802_DEBUG_INC(local->tx_expand_skb_head);
index d0eb38b..ba950ae 100644 (file)
@@ -5,7 +5,7 @@
  * Copyright 2007      Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright (C) 2015-2017     Intel Deutschland GmbH
- * Copyright (C) 2018 Intel Corporation
+ * Copyright (C) 2018-2019 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -2146,6 +2146,10 @@ int ieee80211_reconfig(struct ieee80211_local *local)
                case NL80211_IFTYPE_AP_VLAN:
                case NL80211_IFTYPE_MONITOR:
                        break;
+               case NL80211_IFTYPE_ADHOC:
+                       if (sdata->vif.bss_conf.ibss_joined)
+                               WARN_ON(drv_join_ibss(local, sdata));
+                       /* fall through */
                default:
                        ieee80211_reconfig_stations(sdata);
                        /* fall through */
index 94f53a9..dda8930 100644 (file)
@@ -183,8 +183,8 @@ static int mpls_build_state(struct nlattr *nla,
                           &n_labels, NULL, extack))
                return -EINVAL;
 
-       newts = lwtunnel_state_alloc(sizeof(*tun_encap_info) +
-                                    n_labels * sizeof(u32));
+       newts = lwtunnel_state_alloc(struct_size(tun_encap_info, label,
+                                                n_labels));
        if (!newts)
                return -ENOMEM;
 
index cad48d0..8401cef 100644 (file)
@@ -29,6 +29,7 @@ config        IP_VS_IPV6
        bool "IPv6 support for IPVS"
        depends on IPV6 = y || IP_VS = IPV6
        select IP6_NF_IPTABLES
+       select NF_DEFRAG_IPV6
        ---help---
          Add IPv6 support to IPVS.
 
index e969dad..43bbaa3 100644 (file)
@@ -1566,14 +1566,12 @@ ip_vs_try_to_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
                /* sorry, all this trouble for a no-hit :) */
                IP_VS_DBG_PKT(12, af, pp, skb, iph->off,
                              "ip_vs_in: packet continues traversal as normal");
-               if (iph->fragoffs) {
-                       /* Fragment that couldn't be mapped to a conn entry
-                        * is missing module nf_defrag_ipv6
-                        */
-                       IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n");
+
+               /* Fragment couldn't be mapped to a conn entry */
+               if (iph->fragoffs)
                        IP_VS_DBG_PKT(7, af, pp, skb, iph->off,
                                      "unhandled fragment");
-               }
+
                *verdict = NF_ACCEPT;
                return 0;
        }
index 7d63186..86afacb 100644 (file)
@@ -43,6 +43,7 @@
 #ifdef CONFIG_IP_VS_IPV6
 #include <net/ipv6.h>
 #include <net/ip6_route.h>
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
 #endif
 #include <net/route.h>
 #include <net/sock.h>
@@ -895,6 +896,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
 {
        struct ip_vs_dest *dest;
        unsigned int atype, i;
+       int ret = 0;
 
        EnterFunction(2);
 
@@ -905,6 +907,10 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
                        atype & IPV6_ADDR_LINKLOCAL) &&
                        !__ip_vs_addr_is_local_v6(svc->ipvs->net, &udest->addr.in6))
                        return -EINVAL;
+
+               ret = nf_defrag_ipv6_enable(svc->ipvs->net);
+               if (ret)
+                       return ret;
        } else
 #endif
        {
@@ -1228,6 +1234,10 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
                        ret = -EINVAL;
                        goto out_err;
                }
+
+               ret = nf_defrag_ipv6_enable(ipvs->net);
+               if (ret)
+                       goto out_err;
        }
 #endif
 
index 815956a..08ee034 100644 (file)
@@ -1042,6 +1042,22 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
                }
 
                if (nf_ct_key_equal(h, tuple, zone, net)) {
+                       /* Tuple is taken already, so caller will need to find
+                        * a new source port to use.
+                        *
+                        * Only exception:
+                        * If the *original tuples* are identical, then both
+                        * conntracks refer to the same flow.
+                        * This is a rare situation, it can occur e.g. when
+                        * more than one UDP packet is sent from same socket
+                        * in different threads.
+                        *
+                        * Let nf_ct_resolve_clash() deal with this later.
+                        */
+                       if (nf_ct_tuple_equal(&ignored_conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+                                             &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple))
+                               continue;
+
                        NF_CT_STAT_INC_ATOMIC(net, found);
                        rcu_read_unlock();
                        return 1;
index e92bedd..5ca5ec8 100644 (file)
@@ -131,6 +131,23 @@ static void nft_trans_destroy(struct nft_trans *trans)
        kfree(trans);
 }
 
+static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set)
+{
+       struct net *net = ctx->net;
+       struct nft_trans *trans;
+
+       if (!nft_set_is_anonymous(set))
+               return;
+
+       list_for_each_entry_reverse(trans, &net->nft.commit_list, list) {
+               if (trans->msg_type == NFT_MSG_NEWSET &&
+                   nft_trans_set(trans) == set) {
+                       nft_trans_set_bound(trans) = true;
+                       break;
+               }
+       }
+}
+
 static int nf_tables_register_hook(struct net *net,
                                   const struct nft_table *table,
                                   struct nft_chain *chain)
@@ -226,18 +243,6 @@ static int nft_delchain(struct nft_ctx *ctx)
        return err;
 }
 
-/* either expr ops provide both activate/deactivate, or neither */
-static bool nft_expr_check_ops(const struct nft_expr_ops *ops)
-{
-       if (!ops)
-               return true;
-
-       if (WARN_ON_ONCE((!ops->activate ^ !ops->deactivate)))
-               return false;
-
-       return true;
-}
-
 static void nft_rule_expr_activate(const struct nft_ctx *ctx,
                                   struct nft_rule *rule)
 {
@@ -253,14 +258,15 @@ static void nft_rule_expr_activate(const struct nft_ctx *ctx,
 }
 
 static void nft_rule_expr_deactivate(const struct nft_ctx *ctx,
-                                    struct nft_rule *rule)
+                                    struct nft_rule *rule,
+                                    enum nft_trans_phase phase)
 {
        struct nft_expr *expr;
 
        expr = nft_expr_first(rule);
        while (expr != nft_expr_last(rule) && expr->ops) {
                if (expr->ops->deactivate)
-                       expr->ops->deactivate(ctx, expr);
+                       expr->ops->deactivate(ctx, expr, phase);
 
                expr = nft_expr_next(expr);
        }
@@ -311,7 +317,7 @@ static int nft_delrule(struct nft_ctx *ctx, struct nft_rule *rule)
                nft_trans_destroy(trans);
                return err;
        }
-       nft_rule_expr_deactivate(ctx, rule);
+       nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_PREPARE);
 
        return 0;
 }
@@ -1972,9 +1978,6 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk,
  */
 int nft_register_expr(struct nft_expr_type *type)
 {
-       if (!nft_expr_check_ops(type->ops))
-               return -EINVAL;
-
        nfnl_lock(NFNL_SUBSYS_NFTABLES);
        if (type->family == NFPROTO_UNSPEC)
                list_add_tail_rcu(&type->list, &nf_tables_expressions);
@@ -2122,10 +2125,6 @@ static int nf_tables_expr_parse(const struct nft_ctx *ctx,
                        err = PTR_ERR(ops);
                        goto err1;
                }
-               if (!nft_expr_check_ops(ops)) {
-                       err = -EINVAL;
-                       goto err1;
-               }
        } else
                ops = type->ops;
 
@@ -2554,7 +2553,7 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
 static void nf_tables_rule_release(const struct nft_ctx *ctx,
                                   struct nft_rule *rule)
 {
-       nft_rule_expr_deactivate(ctx, rule);
+       nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_RELEASE);
        nf_tables_rule_destroy(ctx, rule);
 }
 
@@ -3760,39 +3759,30 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
 bind:
        binding->chain = ctx->chain;
        list_add_tail_rcu(&binding->list, &set->bindings);
+       nft_set_trans_bind(ctx, set);
+
        return 0;
 }
 EXPORT_SYMBOL_GPL(nf_tables_bind_set);
 
-void nf_tables_rebind_set(const struct nft_ctx *ctx, struct nft_set *set,
-                         struct nft_set_binding *binding)
-{
-       if (list_empty(&set->bindings) && nft_set_is_anonymous(set) &&
-           nft_is_active(ctx->net, set))
-               list_add_tail_rcu(&set->list, &ctx->table->sets);
-
-       list_add_tail_rcu(&binding->list, &set->bindings);
-}
-EXPORT_SYMBOL_GPL(nf_tables_rebind_set);
-
 void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
-                         struct nft_set_binding *binding)
+                         struct nft_set_binding *binding, bool event)
 {
        list_del_rcu(&binding->list);
 
-       if (list_empty(&set->bindings) && nft_set_is_anonymous(set) &&
-           nft_is_active(ctx->net, set))
+       if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) {
                list_del_rcu(&set->list);
+               if (event)
+                       nf_tables_set_notify(ctx, set, NFT_MSG_DELSET,
+                                            GFP_KERNEL);
+       }
 }
 EXPORT_SYMBOL_GPL(nf_tables_unbind_set);
 
 void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set)
 {
-       if (list_empty(&set->bindings) && nft_set_is_anonymous(set) &&
-           nft_is_active(ctx->net, set)) {
-               nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, GFP_ATOMIC);
+       if (list_empty(&set->bindings) && nft_set_is_anonymous(set))
                nft_set_destroy(set);
-       }
 }
 EXPORT_SYMBOL_GPL(nf_tables_destroy_set);
 
@@ -6621,6 +6611,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
                        nf_tables_rule_notify(&trans->ctx,
                                              nft_trans_rule(trans),
                                              NFT_MSG_DELRULE);
+                       nft_rule_expr_deactivate(&trans->ctx,
+                                                nft_trans_rule(trans),
+                                                NFT_TRANS_COMMIT);
                        break;
                case NFT_MSG_NEWSET:
                        nft_clear(net, nft_trans_set(trans));
@@ -6707,7 +6700,8 @@ static void nf_tables_abort_release(struct nft_trans *trans)
                nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans));
                break;
        case NFT_MSG_NEWSET:
-               nft_set_destroy(nft_trans_set(trans));
+               if (!nft_trans_set_bound(trans))
+                       nft_set_destroy(nft_trans_set(trans));
                break;
        case NFT_MSG_NEWSETELEM:
                nft_set_elem_destroy(nft_trans_elem_set(trans),
@@ -6768,7 +6762,9 @@ static int __nf_tables_abort(struct net *net)
                case NFT_MSG_NEWRULE:
                        trans->ctx.chain->use--;
                        list_del_rcu(&nft_trans_rule(trans)->list);
-                       nft_rule_expr_deactivate(&trans->ctx, nft_trans_rule(trans));
+                       nft_rule_expr_deactivate(&trans->ctx,
+                                                nft_trans_rule(trans),
+                                                NFT_TRANS_ABORT);
                        break;
                case NFT_MSG_DELRULE:
                        trans->ctx.chain->use++;
@@ -6778,7 +6774,8 @@ static int __nf_tables_abort(struct net *net)
                        break;
                case NFT_MSG_NEWSET:
                        trans->ctx.table->use--;
-                       list_del_rcu(&nft_trans_set(trans)->list);
+                       if (!nft_trans_set_bound(trans))
+                               list_del_rcu(&nft_trans_set(trans)->list);
                        break;
                case NFT_MSG_DELSET:
                        trans->ctx.table->use++;
index 5eb2694..0a4bad5 100644 (file)
@@ -61,6 +61,21 @@ static struct nft_compat_net *nft_compat_pernet(struct net *net)
        return net_generic(net, nft_compat_net_id);
 }
 
+static void nft_xt_get(struct nft_xt *xt)
+{
+       /* refcount_inc() warns on 0 -> 1 transition, but we can't
+        * init the reference count to 1 in .select_ops -- we can't
+        * undo such an increase when another expression inside the same
+        * rule fails afterwards.
+        */
+       if (xt->listcnt == 0)
+               refcount_set(&xt->refcnt, 1);
+       else
+               refcount_inc(&xt->refcnt);
+
+       xt->listcnt++;
+}
+
 static bool nft_xt_put(struct nft_xt *xt)
 {
        if (refcount_dec_and_test(&xt->refcnt)) {
@@ -291,7 +306,7 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
                return -EINVAL;
 
        nft_xt = container_of(expr->ops, struct nft_xt, ops);
-       refcount_inc(&nft_xt->refcnt);
+       nft_xt_get(nft_xt);
        return 0;
 }
 
@@ -300,6 +315,7 @@ nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
 {
        struct xt_target *target = expr->ops->data;
        void *info = nft_expr_priv(expr);
+       struct module *me = target->me;
        struct xt_tgdtor_param par;
 
        par.net = ctx->net;
@@ -310,7 +326,7 @@ nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
                par.target->destroy(&par);
 
        if (nft_xt_put(container_of(expr->ops, struct nft_xt, ops)))
-               module_put(target->me);
+               module_put(me);
 }
 
 static int nft_extension_dump_info(struct sk_buff *skb, int attr,
@@ -504,7 +520,7 @@ __nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
                return ret;
 
        nft_xt = container_of(expr->ops, struct nft_xt, ops);
-       refcount_inc(&nft_xt->refcnt);
+       nft_xt_get(nft_xt);
        return 0;
 }
 
@@ -558,41 +574,16 @@ nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
        __nft_match_destroy(ctx, expr, nft_expr_priv(expr));
 }
 
-static void nft_compat_activate(const struct nft_ctx *ctx,
-                               const struct nft_expr *expr,
-                               struct list_head *h)
-{
-       struct nft_xt *xt = container_of(expr->ops, struct nft_xt, ops);
-
-       if (xt->listcnt == 0)
-               list_add(&xt->head, h);
-
-       xt->listcnt++;
-}
-
-static void nft_compat_activate_mt(const struct nft_ctx *ctx,
-                                  const struct nft_expr *expr)
-{
-       struct nft_compat_net *cn = nft_compat_pernet(ctx->net);
-
-       nft_compat_activate(ctx, expr, &cn->nft_match_list);
-}
-
-static void nft_compat_activate_tg(const struct nft_ctx *ctx,
-                                  const struct nft_expr *expr)
-{
-       struct nft_compat_net *cn = nft_compat_pernet(ctx->net);
-
-       nft_compat_activate(ctx, expr, &cn->nft_target_list);
-}
-
 static void nft_compat_deactivate(const struct nft_ctx *ctx,
-                                 const struct nft_expr *expr)
+                                 const struct nft_expr *expr,
+                                 enum nft_trans_phase phase)
 {
        struct nft_xt *xt = container_of(expr->ops, struct nft_xt, ops);
 
-       if (--xt->listcnt == 0)
-               list_del_init(&xt->head);
+       if (phase == NFT_TRANS_ABORT || phase == NFT_TRANS_COMMIT) {
+               if (--xt->listcnt == 0)
+                       list_del_init(&xt->head);
+       }
 }
 
 static void
@@ -848,7 +839,6 @@ nft_match_select_ops(const struct nft_ctx *ctx,
        nft_match->ops.eval = nft_match_eval;
        nft_match->ops.init = nft_match_init;
        nft_match->ops.destroy = nft_match_destroy;
-       nft_match->ops.activate = nft_compat_activate_mt;
        nft_match->ops.deactivate = nft_compat_deactivate;
        nft_match->ops.dump = nft_match_dump;
        nft_match->ops.validate = nft_match_validate;
@@ -866,7 +856,7 @@ nft_match_select_ops(const struct nft_ctx *ctx,
 
        nft_match->ops.size = matchsize;
 
-       nft_match->listcnt = 1;
+       nft_match->listcnt = 0;
        list_add(&nft_match->head, &cn->nft_match_list);
 
        return &nft_match->ops;
@@ -953,7 +943,6 @@ nft_target_select_ops(const struct nft_ctx *ctx,
        nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize));
        nft_target->ops.init = nft_target_init;
        nft_target->ops.destroy = nft_target_destroy;
-       nft_target->ops.activate = nft_compat_activate_tg;
        nft_target->ops.deactivate = nft_compat_deactivate;
        nft_target->ops.dump = nft_target_dump;
        nft_target->ops.validate = nft_target_validate;
@@ -964,7 +953,7 @@ nft_target_select_ops(const struct nft_ctx *ctx,
        else
                nft_target->ops.eval = nft_target_eval_xt;
 
-       nft_target->listcnt = 1;
+       nft_target->listcnt = 0;
        list_add(&nft_target->head, &cn->nft_target_list);
 
        return &nft_target->ops;
index 9658493..a8a74a1 100644 (file)
@@ -234,20 +234,17 @@ err1:
        return err;
 }
 
-static void nft_dynset_activate(const struct nft_ctx *ctx,
-                               const struct nft_expr *expr)
-{
-       struct nft_dynset *priv = nft_expr_priv(expr);
-
-       nf_tables_rebind_set(ctx, priv->set, &priv->binding);
-}
-
 static void nft_dynset_deactivate(const struct nft_ctx *ctx,
-                                 const struct nft_expr *expr)
+                                 const struct nft_expr *expr,
+                                 enum nft_trans_phase phase)
 {
        struct nft_dynset *priv = nft_expr_priv(expr);
 
-       nf_tables_unbind_set(ctx, priv->set, &priv->binding);
+       if (phase == NFT_TRANS_PREPARE)
+               return;
+
+       nf_tables_unbind_set(ctx, priv->set, &priv->binding,
+                            phase == NFT_TRANS_COMMIT);
 }
 
 static void nft_dynset_destroy(const struct nft_ctx *ctx,
@@ -295,7 +292,6 @@ static const struct nft_expr_ops nft_dynset_ops = {
        .eval           = nft_dynset_eval,
        .init           = nft_dynset_init,
        .destroy        = nft_dynset_destroy,
-       .activate       = nft_dynset_activate,
        .deactivate     = nft_dynset_deactivate,
        .dump           = nft_dynset_dump,
 };
index 3e5ed78..5ec4312 100644 (file)
@@ -72,10 +72,14 @@ static void nft_immediate_activate(const struct nft_ctx *ctx,
 }
 
 static void nft_immediate_deactivate(const struct nft_ctx *ctx,
-                                    const struct nft_expr *expr)
+                                    const struct nft_expr *expr,
+                                    enum nft_trans_phase phase)
 {
        const struct nft_immediate_expr *priv = nft_expr_priv(expr);
 
+       if (phase == NFT_TRANS_COMMIT)
+               return;
+
        return nft_data_release(&priv->data, nft_dreg_to_type(priv->dreg));
 }
 
index 227b2b1..14496da 100644 (file)
@@ -121,20 +121,17 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
        return 0;
 }
 
-static void nft_lookup_activate(const struct nft_ctx *ctx,
-                               const struct nft_expr *expr)
-{
-       struct nft_lookup *priv = nft_expr_priv(expr);
-
-       nf_tables_rebind_set(ctx, priv->set, &priv->binding);
-}
-
 static void nft_lookup_deactivate(const struct nft_ctx *ctx,
-                                 const struct nft_expr *expr)
+                                 const struct nft_expr *expr,
+                                 enum nft_trans_phase phase)
 {
        struct nft_lookup *priv = nft_expr_priv(expr);
 
-       nf_tables_unbind_set(ctx, priv->set, &priv->binding);
+       if (phase == NFT_TRANS_PREPARE)
+               return;
+
+       nf_tables_unbind_set(ctx, priv->set, &priv->binding,
+                            phase == NFT_TRANS_COMMIT);
 }
 
 static void nft_lookup_destroy(const struct nft_ctx *ctx,
@@ -225,7 +222,6 @@ static const struct nft_expr_ops nft_lookup_ops = {
        .size           = NFT_EXPR_SIZE(sizeof(struct nft_lookup)),
        .eval           = nft_lookup_eval,
        .init           = nft_lookup_init,
-       .activate       = nft_lookup_activate,
        .deactivate     = nft_lookup_deactivate,
        .destroy        = nft_lookup_destroy,
        .dump           = nft_lookup_dump,
index c1f2adf..79ef074 100644 (file)
@@ -156,20 +156,17 @@ nla_put_failure:
        return -1;
 }
 
-static void nft_objref_map_activate(const struct nft_ctx *ctx,
-                                   const struct nft_expr *expr)
-{
-       struct nft_objref_map *priv = nft_expr_priv(expr);
-
-       nf_tables_rebind_set(ctx, priv->set, &priv->binding);
-}
-
 static void nft_objref_map_deactivate(const struct nft_ctx *ctx,
-                                     const struct nft_expr *expr)
+                                     const struct nft_expr *expr,
+                                     enum nft_trans_phase phase)
 {
        struct nft_objref_map *priv = nft_expr_priv(expr);
 
-       nf_tables_unbind_set(ctx, priv->set, &priv->binding);
+       if (phase == NFT_TRANS_PREPARE)
+               return;
+
+       nf_tables_unbind_set(ctx, priv->set, &priv->binding,
+                            phase == NFT_TRANS_COMMIT);
 }
 
 static void nft_objref_map_destroy(const struct nft_ctx *ctx,
@@ -186,7 +183,6 @@ static const struct nft_expr_ops nft_objref_map_ops = {
        .size           = NFT_EXPR_SIZE(sizeof(struct nft_objref_map)),
        .eval           = nft_objref_map_eval,
        .init           = nft_objref_map_init,
-       .activate       = nft_objref_map_activate,
        .deactivate     = nft_objref_map_deactivate,
        .destroy        = nft_objref_map_destroy,
        .dump           = nft_objref_map_dump,
index aecadd4..13e1ac3 100644 (file)
@@ -1899,7 +1899,7 @@ static int __init xt_init(void)
                seqcount_init(&per_cpu(xt_recseq, i));
        }
 
-       xt = kmalloc_array(NFPROTO_NUMPROTO, sizeof(struct xt_af), GFP_KERNEL);
+       xt = kcalloc(NFPROTO_NUMPROTO, sizeof(struct xt_af), GFP_KERNEL);
        if (!xt)
                return -ENOMEM;
 
index 3b1a789..1cd1d83 100644 (file)
@@ -4292,7 +4292,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
                rb->frames_per_block = req->tp_block_size / req->tp_frame_size;
                if (unlikely(rb->frames_per_block == 0))
                        goto out;
-               if (unlikely(req->tp_block_size > UINT_MAX / req->tp_block_nr))
+               if (unlikely(rb->frames_per_block > UINT_MAX / req->tp_block_nr))
                        goto out;
                if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
                                        req->tp_frame_nr))
index 762d2c6..17c9d9f 100644 (file)
@@ -78,10 +78,10 @@ struct rds_sock *rds_find_bound(const struct in6_addr *addr, __be16 port,
        __rds_create_bind_key(key, addr, port, scope_id);
        rcu_read_lock();
        rs = rhashtable_lookup(&bind_hash_table, key, ht_parms);
-       if (rs && !sock_flag(rds_rs_to_sk(rs), SOCK_DEAD))
-               rds_sock_addref(rs);
-       else
+       if (rs && (sock_flag(rds_rs_to_sk(rs), SOCK_DEAD) ||
+                  !refcount_inc_not_zero(&rds_rs_to_sk(rs)->sk_refcnt)))
                rs = NULL;
+
        rcu_read_unlock();
 
        rdsdebug("returning rs %p for %pI6c:%u\n", rs, addr,
index eaf19eb..3f7bb11 100644 (file)
@@ -596,6 +596,7 @@ error_requeue_call:
        }
 error_no_call:
        release_sock(&rx->sk);
+error_trace:
        trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, 0, 0, 0, ret);
        return ret;
 
@@ -604,7 +605,7 @@ wait_interrupted:
 wait_error:
        finish_wait(sk_sleep(&rx->sk), &wait);
        call = NULL;
-       goto error_no_call;
+       goto error_trace;
 }
 
 /**
index d4b8355..aecf1bf 100644 (file)
@@ -543,7 +543,7 @@ int tcf_register_action(struct tc_action_ops *act,
 
        write_lock(&act_mod_lock);
        list_for_each_entry(a, &act_base, head) {
-               if (act->type == a->type || (strcmp(act->kind, a->kind) == 0)) {
+               if (act->id == a->id || (strcmp(act->kind, a->kind) == 0)) {
                        write_unlock(&act_mod_lock);
                        unregister_pernet_subsys(ops);
                        return -EEXIST;
index c763384..aa5c38d 100644 (file)
@@ -396,7 +396,7 @@ static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index)
 
 static struct tc_action_ops act_bpf_ops __read_mostly = {
        .kind           =       "bpf",
-       .type           =       TCA_ACT_BPF,
+       .id             =       TCA_ID_BPF,
        .owner          =       THIS_MODULE,
        .act            =       tcf_bpf_act,
        .dump           =       tcf_bpf_dump,
index 8475913..5d24993 100644 (file)
@@ -204,7 +204,7 @@ static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index)
 
 static struct tc_action_ops act_connmark_ops = {
        .kind           =       "connmark",
-       .type           =       TCA_ACT_CONNMARK,
+       .id             =       TCA_ID_CONNMARK,
        .owner          =       THIS_MODULE,
        .act            =       tcf_connmark_act,
        .dump           =       tcf_connmark_dump,
index 3dc25b7..945fb34 100644 (file)
@@ -660,7 +660,7 @@ static size_t tcf_csum_get_fill_size(const struct tc_action *act)
 
 static struct tc_action_ops act_csum_ops = {
        .kind           = "csum",
-       .type           = TCA_ACT_CSUM,
+       .id             = TCA_ID_CSUM,
        .owner          = THIS_MODULE,
        .act            = tcf_csum_act,
        .dump           = tcf_csum_dump,
index b61c20e..93da000 100644 (file)
@@ -253,7 +253,7 @@ static size_t tcf_gact_get_fill_size(const struct tc_action *act)
 
 static struct tc_action_ops act_gact_ops = {
        .kind           =       "gact",
-       .type           =       TCA_ACT_GACT,
+       .id             =       TCA_ID_GACT,
        .owner          =       THIS_MODULE,
        .act            =       tcf_gact_act,
        .stats_update   =       tcf_gact_stats_update,
index 30b63fa..9b1f2b3 100644 (file)
@@ -864,7 +864,7 @@ static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index)
 
 static struct tc_action_ops act_ife_ops = {
        .kind = "ife",
-       .type = TCA_ACT_IFE,
+       .id = TCA_ID_IFE,
        .owner = THIS_MODULE,
        .act = tcf_ife_act,
        .dump = tcf_ife_dump,
index 8af6c11..1bad190 100644 (file)
@@ -338,7 +338,7 @@ static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index)
 
 static struct tc_action_ops act_ipt_ops = {
        .kind           =       "ipt",
-       .type           =       TCA_ACT_IPT,
+       .id             =       TCA_ID_IPT,
        .owner          =       THIS_MODULE,
        .act            =       tcf_ipt_act,
        .dump           =       tcf_ipt_dump,
@@ -387,7 +387,7 @@ static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index)
 
 static struct tc_action_ops act_xt_ops = {
        .kind           =       "xt",
-       .type           =       TCA_ACT_XT,
+       .id             =       TCA_ID_XT,
        .owner          =       THIS_MODULE,
        .act            =       tcf_ipt_act,
        .dump           =       tcf_ipt_dump,
index c8cf4d1..6692fd0 100644 (file)
@@ -400,7 +400,7 @@ static void tcf_mirred_put_dev(struct net_device *dev)
 
 static struct tc_action_ops act_mirred_ops = {
        .kind           =       "mirred",
-       .type           =       TCA_ACT_MIRRED,
+       .id             =       TCA_ID_MIRRED,
        .owner          =       THIS_MODULE,
        .act            =       tcf_mirred_act,
        .stats_update   =       tcf_stats_update,
index c5c1e23..543eab9 100644 (file)
@@ -304,7 +304,7 @@ static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index)
 
 static struct tc_action_ops act_nat_ops = {
        .kind           =       "nat",
-       .type           =       TCA_ACT_NAT,
+       .id             =       TCA_ID_NAT,
        .owner          =       THIS_MODULE,
        .act            =       tcf_nat_act,
        .dump           =       tcf_nat_dump,
index 2b372a0..a803738 100644 (file)
@@ -406,7 +406,7 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
        struct tcf_t t;
        int s;
 
-       s = sizeof(*opt) + p->tcfp_nkeys * sizeof(struct tc_pedit_key);
+       s = struct_size(opt, keys, p->tcfp_nkeys);
 
        /* netlink spinlocks held above us - must use ATOMIC */
        opt = kzalloc(s, GFP_ATOMIC);
@@ -470,7 +470,7 @@ static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index)
 
 static struct tc_action_ops act_pedit_ops = {
        .kind           =       "pedit",
-       .type           =       TCA_ACT_PEDIT,
+       .id             =       TCA_ID_PEDIT,
        .owner          =       THIS_MODULE,
        .act            =       tcf_pedit_act,
        .dump           =       tcf_pedit_dump,
index ec8ec55..8271a62 100644 (file)
@@ -366,7 +366,7 @@ MODULE_LICENSE("GPL");
 
 static struct tc_action_ops act_police_ops = {
        .kind           =       "police",
-       .type           =       TCA_ID_POLICE,
+       .id             =       TCA_ID_POLICE,
        .owner          =       THIS_MODULE,
        .act            =       tcf_police_act,
        .dump           =       tcf_police_dump,
index 1a0c682..203e399 100644 (file)
@@ -233,7 +233,7 @@ static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index)
 
 static struct tc_action_ops act_sample_ops = {
        .kind     = "sample",
-       .type     = TCA_ACT_SAMPLE,
+       .id       = TCA_ID_SAMPLE,
        .owner    = THIS_MODULE,
        .act      = tcf_sample_act,
        .dump     = tcf_sample_dump,
index 902957b..d54cb60 100644 (file)
@@ -19,8 +19,6 @@
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
 
-#define TCA_ACT_SIMP 22
-
 #include <linux/tc_act/tc_defact.h>
 #include <net/tc_act/tc_defact.h>
 
@@ -197,7 +195,7 @@ static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index)
 
 static struct tc_action_ops act_simp_ops = {
        .kind           =       "simple",
-       .type           =       TCA_ACT_SIMP,
+       .id             =       TCA_ID_SIMP,
        .owner          =       THIS_MODULE,
        .act            =       tcf_simp_act,
        .dump           =       tcf_simp_dump,
index 64dba37..39f8a67 100644 (file)
@@ -305,7 +305,7 @@ static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index)
 
 static struct tc_action_ops act_skbedit_ops = {
        .kind           =       "skbedit",
-       .type           =       TCA_ACT_SKBEDIT,
+       .id             =       TCA_ID_SKBEDIT,
        .owner          =       THIS_MODULE,
        .act            =       tcf_skbedit_act,
        .dump           =       tcf_skbedit_dump,
index 59710a1..7bac1d7 100644 (file)
@@ -260,7 +260,7 @@ static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index)
 
 static struct tc_action_ops act_skbmod_ops = {
        .kind           =       "skbmod",
-       .type           =       TCA_ACT_SKBMOD,
+       .id             =       TCA_ACT_SKBMOD,
        .owner          =       THIS_MODULE,
        .act            =       tcf_skbmod_act,
        .dump           =       tcf_skbmod_dump,
index 8b43fe0..9104b8e 100644 (file)
@@ -563,7 +563,7 @@ static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index)
 
 static struct tc_action_ops act_tunnel_key_ops = {
        .kind           =       "tunnel_key",
-       .type           =       TCA_ACT_TUNNEL_KEY,
+       .id             =       TCA_ID_TUNNEL_KEY,
        .owner          =       THIS_MODULE,
        .act            =       tunnel_key_act,
        .dump           =       tunnel_key_dump,
index 93fdaf7..ac00615 100644 (file)
@@ -297,7 +297,7 @@ static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index)
 
 static struct tc_action_ops act_vlan_ops = {
        .kind           =       "vlan",
-       .type           =       TCA_ACT_VLAN,
+       .id             =       TCA_ID_VLAN,
        .owner          =       THIS_MODULE,
        .act            =       tcf_vlan_act,
        .dump           =       tcf_vlan_dump,
index 02cf6d2..9ad5389 100644 (file)
@@ -38,7 +38,6 @@
 #include <net/tc_act/tc_csum.h>
 #include <net/tc_act/tc_gact.h>
 #include <net/tc_act/tc_skbedit.h>
-#include <net/tc_act/tc_mirred.h>
 
 extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];
 
@@ -69,7 +68,8 @@ static const struct tcf_proto_ops *__tcf_proto_lookup_ops(const char *kind)
 }
 
 static const struct tcf_proto_ops *
-tcf_proto_lookup_ops(const char *kind, struct netlink_ext_ack *extack)
+tcf_proto_lookup_ops(const char *kind, bool rtnl_held,
+                    struct netlink_ext_ack *extack)
 {
        const struct tcf_proto_ops *ops;
 
@@ -77,9 +77,11 @@ tcf_proto_lookup_ops(const char *kind, struct netlink_ext_ack *extack)
        if (ops)
                return ops;
 #ifdef CONFIG_MODULES
-       rtnl_unlock();
+       if (rtnl_held)
+               rtnl_unlock();
        request_module("cls_%s", kind);
-       rtnl_lock();
+       if (rtnl_held)
+               rtnl_lock();
        ops = __tcf_proto_lookup_ops(kind);
        /* We dropped the RTNL semaphore in order to perform
         * the module load. So, even if we succeeded in loading
@@ -160,8 +162,26 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp)
        return TC_H_MAJ(first);
 }
 
+static bool tcf_proto_is_unlocked(const char *kind)
+{
+       const struct tcf_proto_ops *ops;
+       bool ret;
+
+       ops = tcf_proto_lookup_ops(kind, false, NULL);
+       /* On error return false to take rtnl lock. Proto lookup/create
+        * functions will perform lookup again and properly handle errors.
+        */
+       if (IS_ERR(ops))
+               return false;
+
+       ret = !!(ops->flags & TCF_PROTO_OPS_DOIT_UNLOCKED);
+       module_put(ops->owner);
+       return ret;
+}
+
 static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
                                          u32 prio, struct tcf_chain *chain,
+                                         bool rtnl_held,
                                          struct netlink_ext_ack *extack)
 {
        struct tcf_proto *tp;
@@ -171,7 +191,7 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
        if (!tp)
                return ERR_PTR(-ENOBUFS);
 
-       tp->ops = tcf_proto_lookup_ops(kind, extack);
+       tp->ops = tcf_proto_lookup_ops(kind, rtnl_held, extack);
        if (IS_ERR(tp->ops)) {
                err = PTR_ERR(tp->ops);
                goto errout;
@@ -180,6 +200,8 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
        tp->protocol = protocol;
        tp->prio = prio;
        tp->chain = chain;
+       spin_lock_init(&tp->lock);
+       refcount_set(&tp->refcnt, 1);
 
        err = tp->ops->init(tp);
        if (err) {
@@ -193,14 +215,75 @@ errout:
        return ERR_PTR(err);
 }
 
-static void tcf_proto_destroy(struct tcf_proto *tp,
+static void tcf_proto_get(struct tcf_proto *tp)
+{
+       refcount_inc(&tp->refcnt);
+}
+
+static void tcf_chain_put(struct tcf_chain *chain);
+
+static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held,
                              struct netlink_ext_ack *extack)
 {
-       tp->ops->destroy(tp, extack);
+       tp->ops->destroy(tp, rtnl_held, extack);
+       tcf_chain_put(tp->chain);
        module_put(tp->ops->owner);
        kfree_rcu(tp, rcu);
 }
 
+static void tcf_proto_put(struct tcf_proto *tp, bool rtnl_held,
+                         struct netlink_ext_ack *extack)
+{
+       if (refcount_dec_and_test(&tp->refcnt))
+               tcf_proto_destroy(tp, rtnl_held, extack);
+}
+
+static int walker_noop(struct tcf_proto *tp, void *d, struct tcf_walker *arg)
+{
+       return -1;
+}
+
+static bool tcf_proto_is_empty(struct tcf_proto *tp, bool rtnl_held)
+{
+       struct tcf_walker walker = { .fn = walker_noop, };
+
+       if (tp->ops->walk) {
+               tp->ops->walk(tp, &walker, rtnl_held);
+               return !walker.stop;
+       }
+       return true;
+}
+
+static bool tcf_proto_check_delete(struct tcf_proto *tp, bool rtnl_held)
+{
+       spin_lock(&tp->lock);
+       if (tcf_proto_is_empty(tp, rtnl_held))
+               tp->deleting = true;
+       spin_unlock(&tp->lock);
+       return tp->deleting;
+}
+
+static void tcf_proto_mark_delete(struct tcf_proto *tp)
+{
+       spin_lock(&tp->lock);
+       tp->deleting = true;
+       spin_unlock(&tp->lock);
+}
+
+static bool tcf_proto_is_deleting(struct tcf_proto *tp)
+{
+       bool deleting;
+
+       spin_lock(&tp->lock);
+       deleting = tp->deleting;
+       spin_unlock(&tp->lock);
+
+       return deleting;
+}
+
+#define ASSERT_BLOCK_LOCKED(block)                                     \
+       lockdep_assert_held(&(block)->lock)
+
 struct tcf_filter_chain_list_item {
        struct list_head list;
        tcf_chain_head_change_t *chain_head_change;
@@ -212,10 +295,13 @@ static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
 {
        struct tcf_chain *chain;
 
+       ASSERT_BLOCK_LOCKED(block);
+
        chain = kzalloc(sizeof(*chain), GFP_KERNEL);
        if (!chain)
                return NULL;
        list_add_tail(&chain->list, &block->chain_list);
+       mutex_init(&chain->filter_chain_lock);
        chain->block = block;
        chain->index = chain_index;
        chain->refcnt = 1;
@@ -239,29 +325,59 @@ static void tcf_chain0_head_change(struct tcf_chain *chain,
 
        if (chain->index)
                return;
+
+       mutex_lock(&block->lock);
        list_for_each_entry(item, &block->chain0.filter_chain_list, list)
                tcf_chain_head_change_item(item, tp_head);
+       mutex_unlock(&block->lock);
 }
 
-static void tcf_chain_destroy(struct tcf_chain *chain)
+/* Returns true if block can be safely freed. */
+
+static bool tcf_chain_detach(struct tcf_chain *chain)
 {
        struct tcf_block *block = chain->block;
 
+       ASSERT_BLOCK_LOCKED(block);
+
        list_del(&chain->list);
        if (!chain->index)
                block->chain0.chain = NULL;
+
+       if (list_empty(&block->chain_list) &&
+           refcount_read(&block->refcnt) == 0)
+               return true;
+
+       return false;
+}
+
+static void tcf_block_destroy(struct tcf_block *block)
+{
+       mutex_destroy(&block->lock);
+       kfree_rcu(block, rcu);
+}
+
+static void tcf_chain_destroy(struct tcf_chain *chain, bool free_block)
+{
+       struct tcf_block *block = chain->block;
+
+       mutex_destroy(&chain->filter_chain_lock);
        kfree(chain);
-       if (list_empty(&block->chain_list) && !refcount_read(&block->refcnt))
-               kfree_rcu(block, rcu);
+       if (free_block)
+               tcf_block_destroy(block);
 }
 
 static void tcf_chain_hold(struct tcf_chain *chain)
 {
+       ASSERT_BLOCK_LOCKED(chain->block);
+
        ++chain->refcnt;
 }
 
 static bool tcf_chain_held_by_acts_only(struct tcf_chain *chain)
 {
+       ASSERT_BLOCK_LOCKED(chain->block);
+
        /* In case all the references are action references, this
         * chain should not be shown to the user.
         */
@@ -273,6 +389,8 @@ static struct tcf_chain *tcf_chain_lookup(struct tcf_block *block,
 {
        struct tcf_chain *chain;
 
+       ASSERT_BLOCK_LOCKED(block);
+
        list_for_each_entry(chain, &block->chain_list, list) {
                if (chain->index == chain_index)
                        return chain;
@@ -287,31 +405,40 @@ static struct tcf_chain *__tcf_chain_get(struct tcf_block *block,
                                         u32 chain_index, bool create,
                                         bool by_act)
 {
-       struct tcf_chain *chain = tcf_chain_lookup(block, chain_index);
+       struct tcf_chain *chain = NULL;
+       bool is_first_reference;
 
+       mutex_lock(&block->lock);
+       chain = tcf_chain_lookup(block, chain_index);
        if (chain) {
                tcf_chain_hold(chain);
        } else {
                if (!create)
-                       return NULL;
+                       goto errout;
                chain = tcf_chain_create(block, chain_index);
                if (!chain)
-                       return NULL;
+                       goto errout;
        }
 
        if (by_act)
                ++chain->action_refcnt;
+       is_first_reference = chain->refcnt - chain->action_refcnt == 1;
+       mutex_unlock(&block->lock);
 
        /* Send notification only in case we got the first
         * non-action reference. Until then, the chain acts only as
         * a placeholder for actions pointing to it and user ought
         * not know about them.
         */
-       if (chain->refcnt - chain->action_refcnt == 1 && !by_act)
+       if (is_first_reference && !by_act)
                tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
                                RTM_NEWCHAIN, false);
 
        return chain;
+
+errout:
+       mutex_unlock(&block->lock);
+       return chain;
 }
 
 static struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
@@ -326,51 +453,94 @@ struct tcf_chain *tcf_chain_get_by_act(struct tcf_block *block, u32 chain_index)
 }
 EXPORT_SYMBOL(tcf_chain_get_by_act);
 
-static void tc_chain_tmplt_del(struct tcf_chain *chain);
+static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops,
+                              void *tmplt_priv);
+static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
+                                 void *tmplt_priv, u32 chain_index,
+                                 struct tcf_block *block, struct sk_buff *oskb,
+                                 u32 seq, u16 flags, bool unicast);
 
-static void __tcf_chain_put(struct tcf_chain *chain, bool by_act)
+static void __tcf_chain_put(struct tcf_chain *chain, bool by_act,
+                           bool explicitly_created)
 {
+       struct tcf_block *block = chain->block;
+       const struct tcf_proto_ops *tmplt_ops;
+       bool is_last, free_block = false;
+       unsigned int refcnt;
+       void *tmplt_priv;
+       u32 chain_index;
+
+       mutex_lock(&block->lock);
+       if (explicitly_created) {
+               if (!chain->explicitly_created) {
+                       mutex_unlock(&block->lock);
+                       return;
+               }
+               chain->explicitly_created = false;
+       }
+
        if (by_act)
                chain->action_refcnt--;
-       chain->refcnt--;
+
+       /* tc_chain_notify_delete can't be called while holding block lock.
+        * However, when block is unlocked chain can be changed concurrently, so
+        * save these to temporary variables.
+        */
+       refcnt = --chain->refcnt;
+       is_last = refcnt - chain->action_refcnt == 0;
+       tmplt_ops = chain->tmplt_ops;
+       tmplt_priv = chain->tmplt_priv;
+       chain_index = chain->index;
+
+       if (refcnt == 0)
+               free_block = tcf_chain_detach(chain);
+       mutex_unlock(&block->lock);
 
        /* The last dropped non-action reference will trigger notification. */
-       if (chain->refcnt - chain->action_refcnt == 0 && !by_act)
-               tc_chain_notify(chain, NULL, 0, 0, RTM_DELCHAIN, false);
+       if (is_last && !by_act) {
+               tc_chain_notify_delete(tmplt_ops, tmplt_priv, chain_index,
+                                      block, NULL, 0, 0, false);
+               /* Last reference to chain, no need to lock. */
+               chain->flushing = false;
+       }
 
-       if (chain->refcnt == 0) {
-               tc_chain_tmplt_del(chain);
-               tcf_chain_destroy(chain);
+       if (refcnt == 0) {
+               tc_chain_tmplt_del(tmplt_ops, tmplt_priv);
+               tcf_chain_destroy(chain, free_block);
        }
 }
 
 static void tcf_chain_put(struct tcf_chain *chain)
 {
-       __tcf_chain_put(chain, false);
+       __tcf_chain_put(chain, false, false);
 }
 
 void tcf_chain_put_by_act(struct tcf_chain *chain)
 {
-       __tcf_chain_put(chain, true);
+       __tcf_chain_put(chain, true, false);
 }
 EXPORT_SYMBOL(tcf_chain_put_by_act);
 
 static void tcf_chain_put_explicitly_created(struct tcf_chain *chain)
 {
-       if (chain->explicitly_created)
-               tcf_chain_put(chain);
+       __tcf_chain_put(chain, false, true);
 }
 
-static void tcf_chain_flush(struct tcf_chain *chain)
+static void tcf_chain_flush(struct tcf_chain *chain, bool rtnl_held)
 {
-       struct tcf_proto *tp = rtnl_dereference(chain->filter_chain);
+       struct tcf_proto *tp, *tp_next;
 
+       mutex_lock(&chain->filter_chain_lock);
+       tp = tcf_chain_dereference(chain->filter_chain, chain);
+       RCU_INIT_POINTER(chain->filter_chain, NULL);
        tcf_chain0_head_change(chain, NULL);
+       chain->flushing = true;
+       mutex_unlock(&chain->filter_chain_lock);
+
        while (tp) {
-               RCU_INIT_POINTER(chain->filter_chain, tp->next);
-               tcf_proto_destroy(tp, NULL);
-               tp = rtnl_dereference(chain->filter_chain);
-               tcf_chain_put(chain);
+               tp_next = rcu_dereference_protected(tp->next, 1);
+               tcf_proto_put(tp, rtnl_held, NULL);
+               tp = tp_next;
        }
 }
 
@@ -692,8 +862,8 @@ tcf_chain0_head_change_cb_add(struct tcf_block *block,
                              struct tcf_block_ext_info *ei,
                              struct netlink_ext_ack *extack)
 {
-       struct tcf_chain *chain0 = block->chain0.chain;
        struct tcf_filter_chain_list_item *item;
+       struct tcf_chain *chain0;
 
        item = kmalloc(sizeof(*item), GFP_KERNEL);
        if (!item) {
@@ -702,9 +872,32 @@ tcf_chain0_head_change_cb_add(struct tcf_block *block,
        }
        item->chain_head_change = ei->chain_head_change;
        item->chain_head_change_priv = ei->chain_head_change_priv;
-       if (chain0 && chain0->filter_chain)
-               tcf_chain_head_change_item(item, chain0->filter_chain);
-       list_add(&item->list, &block->chain0.filter_chain_list);
+
+       mutex_lock(&block->lock);
+       chain0 = block->chain0.chain;
+       if (chain0)
+               tcf_chain_hold(chain0);
+       else
+               list_add(&item->list, &block->chain0.filter_chain_list);
+       mutex_unlock(&block->lock);
+
+       if (chain0) {
+               struct tcf_proto *tp_head;
+
+               mutex_lock(&chain0->filter_chain_lock);
+
+               tp_head = tcf_chain_dereference(chain0->filter_chain, chain0);
+               if (tp_head)
+                       tcf_chain_head_change_item(item, tp_head);
+
+               mutex_lock(&block->lock);
+               list_add(&item->list, &block->chain0.filter_chain_list);
+               mutex_unlock(&block->lock);
+
+               mutex_unlock(&chain0->filter_chain_lock);
+               tcf_chain_put(chain0);
+       }
+
        return 0;
 }
 
@@ -712,20 +905,23 @@ static void
 tcf_chain0_head_change_cb_del(struct tcf_block *block,
                              struct tcf_block_ext_info *ei)
 {
-       struct tcf_chain *chain0 = block->chain0.chain;
        struct tcf_filter_chain_list_item *item;
 
+       mutex_lock(&block->lock);
        list_for_each_entry(item, &block->chain0.filter_chain_list, list) {
                if ((!ei->chain_head_change && !ei->chain_head_change_priv) ||
                    (item->chain_head_change == ei->chain_head_change &&
                     item->chain_head_change_priv == ei->chain_head_change_priv)) {
-                       if (chain0)
+                       if (block->chain0.chain)
                                tcf_chain_head_change_item(item, NULL);
                        list_del(&item->list);
+                       mutex_unlock(&block->lock);
+
                        kfree(item);
                        return;
                }
        }
+       mutex_unlock(&block->lock);
        WARN_ON(1);
 }
 
@@ -772,6 +968,7 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
                NL_SET_ERR_MSG(extack, "Memory allocation for block failed");
                return ERR_PTR(-ENOMEM);
        }
+       mutex_init(&block->lock);
        INIT_LIST_HEAD(&block->chain_list);
        INIT_LIST_HEAD(&block->cb_list);
        INIT_LIST_HEAD(&block->owner_list);
@@ -807,157 +1004,241 @@ static struct tcf_block *tcf_block_refcnt_get(struct net *net, u32 block_index)
        return block;
 }
 
-static void tcf_block_flush_all_chains(struct tcf_block *block)
+static struct tcf_chain *
+__tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain)
 {
-       struct tcf_chain *chain;
+       mutex_lock(&block->lock);
+       if (chain)
+               chain = list_is_last(&chain->list, &block->chain_list) ?
+                       NULL : list_next_entry(chain, list);
+       else
+               chain = list_first_entry_or_null(&block->chain_list,
+                                                struct tcf_chain, list);
 
-       /* Hold a refcnt for all chains, so that they don't disappear
-        * while we are iterating.
-        */
-       list_for_each_entry(chain, &block->chain_list, list)
+       /* skip all action-only chains */
+       while (chain && tcf_chain_held_by_acts_only(chain))
+               chain = list_is_last(&chain->list, &block->chain_list) ?
+                       NULL : list_next_entry(chain, list);
+
+       if (chain)
                tcf_chain_hold(chain);
+       mutex_unlock(&block->lock);
 
-       list_for_each_entry(chain, &block->chain_list, list)
-               tcf_chain_flush(chain);
+       return chain;
 }
 
-static void tcf_block_put_all_chains(struct tcf_block *block)
+/* Function to be used by all clients that want to iterate over all chains on
+ * block. It properly obtains block->lock and takes reference to chain before
+ * returning it. Users of this function must be tolerant to concurrent chain
+ * insertion/deletion or ensure that no concurrent chain modification is
+ * possible. Note that all netlink dump callbacks cannot guarantee to provide
+ * consistent dump because rtnl lock is released each time skb is filled with
+ * data and sent to user-space.
+ */
+
+struct tcf_chain *
+tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain)
 {
-       struct tcf_chain *chain, *tmp;
+       struct tcf_chain *chain_next = __tcf_get_next_chain(block, chain);
 
-       /* At this point, all the chains should have refcnt >= 1. */
-       list_for_each_entry_safe(chain, tmp, &block->chain_list, list) {
-               tcf_chain_put_explicitly_created(chain);
+       if (chain)
                tcf_chain_put(chain);
-       }
+
+       return chain_next;
 }
+EXPORT_SYMBOL(tcf_get_next_chain);
 
-static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q,
-                           struct tcf_block_ext_info *ei)
+static struct tcf_proto *
+__tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp)
 {
-       if (refcount_dec_and_test(&block->refcnt)) {
-               /* Flushing/putting all chains will cause the block to be
-                * deallocated when last chain is freed. However, if chain_list
-                * is empty, block has to be manually deallocated. After block
-                * reference counter reached 0, it is no longer possible to
-                * increment it or add new chains to block.
-                */
-               bool free_block = list_empty(&block->chain_list);
+       u32 prio = 0;
 
-               if (tcf_block_shared(block))
-                       tcf_block_remove(block, block->net);
-               if (!free_block)
-                       tcf_block_flush_all_chains(block);
+       ASSERT_RTNL();
+       mutex_lock(&chain->filter_chain_lock);
 
-               if (q)
-                       tcf_block_offload_unbind(block, q, ei);
+       if (!tp) {
+               tp = tcf_chain_dereference(chain->filter_chain, chain);
+       } else if (tcf_proto_is_deleting(tp)) {
+               /* 'deleting' flag is set and chain->filter_chain_lock was
+                * unlocked, which means next pointer could be invalid. Restart
+                * search.
+                */
+               prio = tp->prio + 1;
+               tp = tcf_chain_dereference(chain->filter_chain, chain);
 
-               if (free_block)
-                       kfree_rcu(block, rcu);
-               else
-                       tcf_block_put_all_chains(block);
-       } else if (q) {
-               tcf_block_offload_unbind(block, q, ei);
+               for (; tp; tp = tcf_chain_dereference(tp->next, chain))
+                       if (!tp->deleting && tp->prio >= prio)
+                               break;
+       } else {
+               tp = tcf_chain_dereference(tp->next, chain);
        }
+
+       if (tp)
+               tcf_proto_get(tp);
+
+       mutex_unlock(&chain->filter_chain_lock);
+
+       return tp;
+}
+
+/* Function to be used by all clients that want to iterate over all tp's on
+ * chain. Users of this function must be tolerant to concurrent tp
+ * insertion/deletion or ensure that no concurrent chain modification is
+ * possible. Note that all netlink dump callbacks cannot guarantee to provide
+ * consistent dump because rtnl lock is released each time skb is filled with
+ * data and sent to user-space.
+ */
+
+struct tcf_proto *
+tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp,
+                  bool rtnl_held)
+{
+       struct tcf_proto *tp_next = __tcf_get_next_proto(chain, tp);
+
+       if (tp)
+               tcf_proto_put(tp, rtnl_held, NULL);
+
+       return tp_next;
 }
+EXPORT_SYMBOL(tcf_get_next_proto);
 
-static void tcf_block_refcnt_put(struct tcf_block *block)
+static void tcf_block_flush_all_chains(struct tcf_block *block, bool rtnl_held)
 {
-       __tcf_block_put(block, NULL, NULL);
+       struct tcf_chain *chain;
+
+       /* Last reference to block. At this point chains cannot be added or
+        * removed concurrently.
+        */
+       for (chain = tcf_get_next_chain(block, NULL);
+            chain;
+            chain = tcf_get_next_chain(block, chain)) {
+               tcf_chain_put_explicitly_created(chain);
+               tcf_chain_flush(chain, rtnl_held);
+       }
 }
 
-/* Find tcf block.
- * Set q, parent, cl when appropriate.
+/* Lookup Qdisc and increments its reference counter.
+ * Set parent, if necessary.
  */
 
-static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q,
-                                       u32 *parent, unsigned long *cl,
-                                       int ifindex, u32 block_index,
-                                       struct netlink_ext_ack *extack)
+static int __tcf_qdisc_find(struct net *net, struct Qdisc **q,
+                           u32 *parent, int ifindex, bool rtnl_held,
+                           struct netlink_ext_ack *extack)
 {
-       struct tcf_block *block;
+       const struct Qdisc_class_ops *cops;
+       struct net_device *dev;
        int err = 0;
 
-       if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
-               block = tcf_block_refcnt_get(net, block_index);
-               if (!block) {
-                       NL_SET_ERR_MSG(extack, "Block of given index was not found");
-                       return ERR_PTR(-EINVAL);
-               }
-       } else {
-               const struct Qdisc_class_ops *cops;
-               struct net_device *dev;
-
-               rcu_read_lock();
+       if (ifindex == TCM_IFINDEX_MAGIC_BLOCK)
+               return 0;
 
-               /* Find link */
-               dev = dev_get_by_index_rcu(net, ifindex);
-               if (!dev) {
-                       rcu_read_unlock();
-                       return ERR_PTR(-ENODEV);
-               }
+       rcu_read_lock();
 
-               /* Find qdisc */
-               if (!*parent) {
-                       *q = dev->qdisc;
-                       *parent = (*q)->handle;
-               } else {
-                       *q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent));
-                       if (!*q) {
-                               NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
-                               err = -EINVAL;
-                               goto errout_rcu;
-                       }
-               }
+       /* Find link */
+       dev = dev_get_by_index_rcu(net, ifindex);
+       if (!dev) {
+               rcu_read_unlock();
+               return -ENODEV;
+       }
 
-               *q = qdisc_refcount_inc_nz(*q);
+       /* Find qdisc */
+       if (!*parent) {
+               *q = dev->qdisc;
+               *parent = (*q)->handle;
+       } else {
+               *q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent));
                if (!*q) {
                        NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
                        err = -EINVAL;
                        goto errout_rcu;
                }
+       }
 
-               /* Is it classful? */
-               cops = (*q)->ops->cl_ops;
-               if (!cops) {
-                       NL_SET_ERR_MSG(extack, "Qdisc not classful");
-                       err = -EINVAL;
-                       goto errout_rcu;
-               }
+       *q = qdisc_refcount_inc_nz(*q);
+       if (!*q) {
+               NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
+               err = -EINVAL;
+               goto errout_rcu;
+       }
 
-               if (!cops->tcf_block) {
-                       NL_SET_ERR_MSG(extack, "Class doesn't support blocks");
-                       err = -EOPNOTSUPP;
-                       goto errout_rcu;
-               }
+       /* Is it classful? */
+       cops = (*q)->ops->cl_ops;
+       if (!cops) {
+               NL_SET_ERR_MSG(extack, "Qdisc not classful");
+               err = -EINVAL;
+               goto errout_qdisc;
+       }
 
-               /* At this point we know that qdisc is not noop_qdisc,
-                * which means that qdisc holds a reference to net_device
-                * and we hold a reference to qdisc, so it is safe to release
-                * rcu read lock.
-                */
-               rcu_read_unlock();
+       if (!cops->tcf_block) {
+               NL_SET_ERR_MSG(extack, "Class doesn't support blocks");
+               err = -EOPNOTSUPP;
+               goto errout_qdisc;
+       }
 
-               /* Do we search for filter, attached to class? */
-               if (TC_H_MIN(*parent)) {
-                       *cl = cops->find(*q, *parent);
-                       if (*cl == 0) {
-                               NL_SET_ERR_MSG(extack, "Specified class doesn't exist");
-                               err = -ENOENT;
-                               goto errout_qdisc;
-                       }
+errout_rcu:
+       /* At this point we know that qdisc is not noop_qdisc,
+        * which means that qdisc holds a reference to net_device
+        * and we hold a reference to qdisc, so it is safe to release
+        * rcu read lock.
+        */
+       rcu_read_unlock();
+       return err;
+
+errout_qdisc:
+       rcu_read_unlock();
+
+       if (rtnl_held)
+               qdisc_put(*q);
+       else
+               qdisc_put_unlocked(*q);
+       *q = NULL;
+
+       return err;
+}
+
+static int __tcf_qdisc_cl_find(struct Qdisc *q, u32 parent, unsigned long *cl,
+                              int ifindex, struct netlink_ext_ack *extack)
+{
+       if (ifindex == TCM_IFINDEX_MAGIC_BLOCK)
+               return 0;
+
+       /* Do we search for filter, attached to class? */
+       if (TC_H_MIN(parent)) {
+               const struct Qdisc_class_ops *cops = q->ops->cl_ops;
+
+               *cl = cops->find(q, parent);
+               if (*cl == 0) {
+                       NL_SET_ERR_MSG(extack, "Specified class doesn't exist");
+                       return -ENOENT;
                }
+       }
+
+       return 0;
+}
 
-               /* And the last stroke */
-               block = cops->tcf_block(*q, *cl, extack);
+static struct tcf_block *__tcf_block_find(struct net *net, struct Qdisc *q,
+                                         unsigned long cl, int ifindex,
+                                         u32 block_index,
+                                         struct netlink_ext_ack *extack)
+{
+       struct tcf_block *block;
+
+       if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
+               block = tcf_block_refcnt_get(net, block_index);
                if (!block) {
-                       err = -EINVAL;
-                       goto errout_qdisc;
+                       NL_SET_ERR_MSG(extack, "Block of given index was not found");
+                       return ERR_PTR(-EINVAL);
                }
+       } else {
+               const struct Qdisc_class_ops *cops = q->ops->cl_ops;
+
+               block = cops->tcf_block(q, cl, extack);
+               if (!block)
+                       return ERR_PTR(-EINVAL);
+
                if (tcf_block_shared(block)) {
                        NL_SET_ERR_MSG(extack, "This filter block is shared. Please use the block index to manipulate the filters");
-                       err = -EOPNOTSUPP;
-                       goto errout_qdisc;
+                       return ERR_PTR(-EOPNOTSUPP);
                }
 
                /* Always take reference to block in order to support execution
@@ -970,24 +1251,89 @@ static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q,
        }
 
        return block;
+}
+
+static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q,
+                           struct tcf_block_ext_info *ei, bool rtnl_held)
+{
+       if (refcount_dec_and_mutex_lock(&block->refcnt, &block->lock)) {
+               /* Flushing/putting all chains will cause the block to be
+                * deallocated when last chain is freed. However, if chain_list
+                * is empty, block has to be manually deallocated. After block
+                * reference counter reached 0, it is no longer possible to
+                * increment it or add new chains to block.
+                */
+               bool free_block = list_empty(&block->chain_list);
+
+               mutex_unlock(&block->lock);
+               if (tcf_block_shared(block))
+                       tcf_block_remove(block, block->net);
+
+               if (q)
+                       tcf_block_offload_unbind(block, q, ei);
+
+               if (free_block)
+                       tcf_block_destroy(block);
+               else
+                       tcf_block_flush_all_chains(block, rtnl_held);
+       } else if (q) {
+               tcf_block_offload_unbind(block, q, ei);
+       }
+}
+
+static void tcf_block_refcnt_put(struct tcf_block *block, bool rtnl_held)
+{
+       __tcf_block_put(block, NULL, NULL, rtnl_held);
+}
+
+/* Find tcf block.
+ * Set q, parent, cl when appropriate.
+ */
+
+static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q,
+                                       u32 *parent, unsigned long *cl,
+                                       int ifindex, u32 block_index,
+                                       struct netlink_ext_ack *extack)
+{
+       struct tcf_block *block;
+       int err = 0;
+
+       ASSERT_RTNL();
+
+       err = __tcf_qdisc_find(net, q, parent, ifindex, true, extack);
+       if (err)
+               goto errout;
+
+       err = __tcf_qdisc_cl_find(*q, *parent, cl, ifindex, extack);
+       if (err)
+               goto errout_qdisc;
+
+       block = __tcf_block_find(net, *q, *cl, ifindex, block_index, extack);
+       if (IS_ERR(block))
+               goto errout_qdisc;
+
+       return block;
 
-errout_rcu:
-       rcu_read_unlock();
 errout_qdisc:
-       if (*q) {
+       if (*q)
                qdisc_put(*q);
-               *q = NULL;
-       }
+errout:
+       *q = NULL;
        return ERR_PTR(err);
 }
 
-static void tcf_block_release(struct Qdisc *q, struct tcf_block *block)
+static void tcf_block_release(struct Qdisc *q, struct tcf_block *block,
+                             bool rtnl_held)
 {
        if (!IS_ERR_OR_NULL(block))
-               tcf_block_refcnt_put(block);
+               tcf_block_refcnt_put(block, rtnl_held);
 
-       if (q)
-               qdisc_put(q);
+       if (q) {
+               if (rtnl_held)
+                       qdisc_put(q);
+               else
+                       qdisc_put_unlocked(q);
+       }
 }
 
 struct tcf_block_owner_item {
@@ -1095,7 +1441,7 @@ err_chain0_head_change_cb_add:
        tcf_block_owner_del(block, q, ei->binder_type);
 err_block_owner_add:
 err_block_insert:
-       tcf_block_refcnt_put(block);
+       tcf_block_refcnt_put(block, true);
        return err;
 }
 EXPORT_SYMBOL(tcf_block_get_ext);
@@ -1132,7 +1478,7 @@ void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
        tcf_chain0_head_change_cb_del(block, ei);
        tcf_block_owner_del(block, q, ei->binder_type);
 
-       __tcf_block_put(block, q, ei);
+       __tcf_block_put(block, q, ei, true);
 }
 EXPORT_SYMBOL(tcf_block_put_ext);
 
@@ -1189,13 +1535,19 @@ tcf_block_playback_offloads(struct tcf_block *block, tc_setup_cb_t *cb,
                            void *cb_priv, bool add, bool offload_in_use,
                            struct netlink_ext_ack *extack)
 {
-       struct tcf_chain *chain;
-       struct tcf_proto *tp;
+       struct tcf_chain *chain, *chain_prev;
+       struct tcf_proto *tp, *tp_prev;
        int err;
 
-       list_for_each_entry(chain, &block->chain_list, list) {
-               for (tp = rtnl_dereference(chain->filter_chain); tp;
-                    tp = rtnl_dereference(tp->next)) {
+       for (chain = __tcf_get_next_chain(block, NULL);
+            chain;
+            chain_prev = chain,
+                    chain = __tcf_get_next_chain(block, chain),
+                    tcf_chain_put(chain_prev)) {
+               for (tp = __tcf_get_next_proto(chain, NULL); tp;
+                    tp_prev = tp,
+                            tp = __tcf_get_next_proto(chain, tp),
+                            tcf_proto_put(tp_prev, true, NULL)) {
                        if (tp->ops->reoffload) {
                                err = tp->ops->reoffload(tp, add, cb, cb_priv,
                                                         extack);
@@ -1212,6 +1564,8 @@ tcf_block_playback_offloads(struct tcf_block *block, tc_setup_cb_t *cb,
        return 0;
 
 err_playback_remove:
+       tcf_proto_put(tp, true, NULL);
+       tcf_chain_put(chain);
        tcf_block_playback_offloads(block, cb, cb_priv, false, offload_in_use,
                                    extack);
        return err;
@@ -1337,32 +1691,116 @@ struct tcf_chain_info {
        struct tcf_proto __rcu *next;
 };
 
-static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain_info *chain_info)
+static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain *chain,
+                                          struct tcf_chain_info *chain_info)
 {
-       return rtnl_dereference(*chain_info->pprev);
+       return tcf_chain_dereference(*chain_info->pprev, chain);
 }
 
-static void tcf_chain_tp_insert(struct tcf_chain *chain,
-                               struct tcf_chain_info *chain_info,
-                               struct tcf_proto *tp)
+static int tcf_chain_tp_insert(struct tcf_chain *chain,
+                              struct tcf_chain_info *chain_info,
+                              struct tcf_proto *tp)
 {
+       if (chain->flushing)
+               return -EAGAIN;
+
        if (*chain_info->pprev == chain->filter_chain)
                tcf_chain0_head_change(chain, tp);
-       RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain_info));
+       tcf_proto_get(tp);
+       RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info));
        rcu_assign_pointer(*chain_info->pprev, tp);
-       tcf_chain_hold(chain);
+
+       return 0;
 }
 
 static void tcf_chain_tp_remove(struct tcf_chain *chain,
                                struct tcf_chain_info *chain_info,
                                struct tcf_proto *tp)
 {
-       struct tcf_proto *next = rtnl_dereference(chain_info->next);
+       struct tcf_proto *next = tcf_chain_dereference(chain_info->next, chain);
 
+       tcf_proto_mark_delete(tp);
        if (tp == chain->filter_chain)
                tcf_chain0_head_change(chain, next);
        RCU_INIT_POINTER(*chain_info->pprev, next);
-       tcf_chain_put(chain);
+}
+
+static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
+                                          struct tcf_chain_info *chain_info,
+                                          u32 protocol, u32 prio,
+                                          bool prio_allocate);
+
+/* Try to insert new proto.
+ * If proto with specified priority already exists, free new proto
+ * and return existing one.
+ */
+
+static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain,
+                                                   struct tcf_proto *tp_new,
+                                                   u32 protocol, u32 prio,
+                                                   bool rtnl_held)
+{
+       struct tcf_chain_info chain_info;
+       struct tcf_proto *tp;
+       int err = 0;
+
+       mutex_lock(&chain->filter_chain_lock);
+
+       tp = tcf_chain_tp_find(chain, &chain_info,
+                              protocol, prio, false);
+       if (!tp)
+               err = tcf_chain_tp_insert(chain, &chain_info, tp_new);
+       mutex_unlock(&chain->filter_chain_lock);
+
+       if (tp) {
+               tcf_proto_destroy(tp_new, rtnl_held, NULL);
+               tp_new = tp;
+       } else if (err) {
+               tcf_proto_destroy(tp_new, rtnl_held, NULL);
+               tp_new = ERR_PTR(err);
+       }
+
+       return tp_new;
+}
+
+static void tcf_chain_tp_delete_empty(struct tcf_chain *chain,
+                                     struct tcf_proto *tp, bool rtnl_held,
+                                     struct netlink_ext_ack *extack)
+{
+       struct tcf_chain_info chain_info;
+       struct tcf_proto *tp_iter;
+       struct tcf_proto **pprev;
+       struct tcf_proto *next;
+
+       mutex_lock(&chain->filter_chain_lock);
+
+       /* Atomically find and remove tp from chain. */
+       for (pprev = &chain->filter_chain;
+            (tp_iter = tcf_chain_dereference(*pprev, chain));
+            pprev = &tp_iter->next) {
+               if (tp_iter == tp) {
+                       chain_info.pprev = pprev;
+                       chain_info.next = tp_iter->next;
+                       WARN_ON(tp_iter->deleting);
+                       break;
+               }
+       }
+       /* Verify that tp still exists and no new filters were inserted
+        * concurrently.
+        * Mark tp for deletion if it is empty.
+        */
+       if (!tp_iter || !tcf_proto_check_delete(tp, rtnl_held)) {
+               mutex_unlock(&chain->filter_chain_lock);
+               return;
+       }
+
+       next = tcf_chain_dereference(chain_info.next, chain);
+       if (tp == chain->filter_chain)
+               tcf_chain0_head_change(chain, next);
+       RCU_INIT_POINTER(*chain_info.pprev, next);
+       mutex_unlock(&chain->filter_chain_lock);
+
+       tcf_proto_put(tp, rtnl_held, extack);
 }
 
 static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
@@ -1375,7 +1813,8 @@ static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
 
        /* Check the chain for existence of proto-tcf with this priority */
        for (pprev = &chain->filter_chain;
-            (tp = rtnl_dereference(*pprev)); pprev = &tp->next) {
+            (tp = tcf_chain_dereference(*pprev, chain));
+            pprev = &tp->next) {
                if (tp->prio >= prio) {
                        if (tp->prio == prio) {
                                if (prio_allocate ||
@@ -1388,14 +1827,20 @@ static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
                }
        }
        chain_info->pprev = pprev;
-       chain_info->next = tp ? tp->next : NULL;
+       if (tp) {
+               chain_info->next = tp->next;
+               tcf_proto_get(tp);
+       } else {
+               chain_info->next = NULL;
+       }
        return tp;
 }
 
 static int tcf_fill_node(struct net *net, struct sk_buff *skb,
                         struct tcf_proto *tp, struct tcf_block *block,
                         struct Qdisc *q, u32 parent, void *fh,
-                        u32 portid, u32 seq, u16 flags, int event)
+                        u32 portid, u32 seq, u16 flags, int event,
+                        bool rtnl_held)
 {
        struct tcmsg *tcm;
        struct nlmsghdr  *nlh;
@@ -1423,7 +1868,8 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb,
        if (!fh) {
                tcm->tcm_handle = 0;
        } else {
-               if (tp->ops->dump && tp->ops->dump(net, tp, fh, skb, tcm) < 0)
+               if (tp->ops->dump &&
+                   tp->ops->dump(net, tp, fh, skb, tcm, rtnl_held) < 0)
                        goto nla_put_failure;
        }
        nlh->nlmsg_len = skb_tail_pointer(skb) - b;
@@ -1438,7 +1884,8 @@ nla_put_failure:
 static int tfilter_notify(struct net *net, struct sk_buff *oskb,
                          struct nlmsghdr *n, struct tcf_proto *tp,
                          struct tcf_block *block, struct Qdisc *q,
-                         u32 parent, void *fh, int event, bool unicast)
+                         u32 parent, void *fh, int event, bool unicast,
+                         bool rtnl_held)
 {
        struct sk_buff *skb;
        u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
@@ -1448,7 +1895,8 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
                return -ENOBUFS;
 
        if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
-                         n->nlmsg_seq, n->nlmsg_flags, event) <= 0) {
+                         n->nlmsg_seq, n->nlmsg_flags, event,
+                         rtnl_held) <= 0) {
                kfree_skb(skb);
                return -EINVAL;
        }
@@ -1464,7 +1912,7 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
                              struct nlmsghdr *n, struct tcf_proto *tp,
                              struct tcf_block *block, struct Qdisc *q,
                              u32 parent, void *fh, bool unicast, bool *last,
-                             struct netlink_ext_ack *extack)
+                             bool rtnl_held, struct netlink_ext_ack *extack)
 {
        struct sk_buff *skb;
        u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
@@ -1475,13 +1923,14 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
                return -ENOBUFS;
 
        if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
-                         n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER) <= 0) {
+                         n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER,
+                         rtnl_held) <= 0) {
                NL_SET_ERR_MSG(extack, "Failed to build del event notification");
                kfree_skb(skb);
                return -EINVAL;
        }
 
-       err = tp->ops->delete(tp, fh, last, extack);
+       err = tp->ops->delete(tp, fh, last, rtnl_held, extack);
        if (err) {
                kfree_skb(skb);
                return err;
@@ -1500,14 +1949,21 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
 static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
                                 struct tcf_block *block, struct Qdisc *q,
                                 u32 parent, struct nlmsghdr *n,
-                                struct tcf_chain *chain, int event)
+                                struct tcf_chain *chain, int event,
+                                bool rtnl_held)
 {
        struct tcf_proto *tp;
 
-       for (tp = rtnl_dereference(chain->filter_chain);
-            tp; tp = rtnl_dereference(tp->next))
+       for (tp = tcf_get_next_proto(chain, NULL, rtnl_held);
+            tp; tp = tcf_get_next_proto(chain, tp, rtnl_held))
                tfilter_notify(net, oskb, n, tp, block,
-                              q, parent, NULL, event, false);
+                              q, parent, NULL, event, false, rtnl_held);
+}
+
+static void tfilter_put(struct tcf_proto *tp, void *fh)
+{
+       if (tp->ops->put && fh)
+               tp->ops->put(tp, fh);
 }
 
 static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
@@ -1530,6 +1986,7 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
        void *fh;
        int err;
        int tp_created;
+       bool rtnl_held = false;
 
        if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
                return -EPERM;
@@ -1546,7 +2003,9 @@ replay:
        prio = TC_H_MAJ(t->tcm_info);
        prio_allocate = false;
        parent = t->tcm_parent;
+       tp = NULL;
        cl = 0;
+       block = NULL;
 
        if (prio == 0) {
                /* If no priority is provided by the user,
@@ -1563,8 +2022,27 @@ replay:
 
        /* Find head of filter chain. */
 
-       block = tcf_block_find(net, &q, &parent, &cl,
-                              t->tcm_ifindex, t->tcm_block_index, extack);
+       err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
+       if (err)
+               return err;
+
+       /* Take rtnl mutex if rtnl_held was set to true on previous iteration,
+        * block is shared (no qdisc found), qdisc is not unlocked, classifier
+        * type is not specified, classifier is not unlocked.
+        */
+       if (rtnl_held ||
+           (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
+           !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) {
+               rtnl_held = true;
+               rtnl_lock();
+       }
+
+       err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
+       if (err)
+               goto errout;
+
+       block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
+                                extack);
        if (IS_ERR(block)) {
                err = PTR_ERR(block);
                goto errout;
@@ -1583,40 +2061,62 @@ replay:
                goto errout;
        }
 
+       mutex_lock(&chain->filter_chain_lock);
        tp = tcf_chain_tp_find(chain, &chain_info, protocol,
                               prio, prio_allocate);
        if (IS_ERR(tp)) {
                NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
                err = PTR_ERR(tp);
-               goto errout;
+               goto errout_locked;
        }
 
        if (tp == NULL) {
+               struct tcf_proto *tp_new = NULL;
+
+               if (chain->flushing) {
+                       err = -EAGAIN;
+                       goto errout_locked;
+               }
+
                /* Proto-tcf does not exist, create new one */
 
                if (tca[TCA_KIND] == NULL || !protocol) {
                        NL_SET_ERR_MSG(extack, "Filter kind and protocol must be specified");
                        err = -EINVAL;
-                       goto errout;
+                       goto errout_locked;
                }
 
                if (!(n->nlmsg_flags & NLM_F_CREATE)) {
                        NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
                        err = -ENOENT;
-                       goto errout;
+                       goto errout_locked;
                }
 
                if (prio_allocate)
-                       prio = tcf_auto_prio(tcf_chain_tp_prev(&chain_info));
+                       prio = tcf_auto_prio(tcf_chain_tp_prev(chain,
+                                                              &chain_info));
 
-               tp = tcf_proto_create(nla_data(tca[TCA_KIND]),
-                                     protocol, prio, chain, extack);
+               mutex_unlock(&chain->filter_chain_lock);
+               tp_new = tcf_proto_create(nla_data(tca[TCA_KIND]),
+                                         protocol, prio, chain, rtnl_held,
+                                         extack);
+               if (IS_ERR(tp_new)) {
+                       err = PTR_ERR(tp_new);
+                       goto errout_tp;
+               }
+
+               tp_created = 1;
+               tp = tcf_chain_tp_insert_unique(chain, tp_new, protocol, prio,
+                                               rtnl_held);
                if (IS_ERR(tp)) {
                        err = PTR_ERR(tp);
-                       goto errout;
+                       goto errout_tp;
                }
-               tp_created = 1;
-       } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
+       } else {
+               mutex_unlock(&chain->filter_chain_lock);
+       }
+
+       if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
                NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
                err = -EINVAL;
                goto errout;
@@ -1631,6 +2131,7 @@ replay:
                        goto errout;
                }
        } else if (n->nlmsg_flags & NLM_F_EXCL) {
+               tfilter_put(tp, fh);
                NL_SET_ERR_MSG(extack, "Filter already exists");
                err = -EEXIST;
                goto errout;
@@ -1644,25 +2145,41 @@ replay:
 
        err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
                              n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE,
-                             extack);
+                             rtnl_held, extack);
        if (err == 0) {
-               if (tp_created)
-                       tcf_chain_tp_insert(chain, &chain_info, tp);
                tfilter_notify(net, skb, n, tp, block, q, parent, fh,
-                              RTM_NEWTFILTER, false);
-       } else {
-               if (tp_created)
-                       tcf_proto_destroy(tp, NULL);
+                              RTM_NEWTFILTER, false, rtnl_held);
+               tfilter_put(tp, fh);
        }
 
 errout:
-       if (chain)
-               tcf_chain_put(chain);
-       tcf_block_release(q, block);
-       if (err == -EAGAIN)
+       if (err && tp_created)
+               tcf_chain_tp_delete_empty(chain, tp, rtnl_held, NULL);
+errout_tp:
+       if (chain) {
+               if (tp && !IS_ERR(tp))
+                       tcf_proto_put(tp, rtnl_held, NULL);
+               if (!tp_created)
+                       tcf_chain_put(chain);
+       }
+       tcf_block_release(q, block, rtnl_held);
+
+       if (rtnl_held)
+               rtnl_unlock();
+
+       if (err == -EAGAIN) {
+               /* Take rtnl lock in case EAGAIN is caused by concurrent flush
+                * of target chain.
+                */
+               rtnl_held = true;
                /* Replay the request. */
                goto replay;
+       }
        return err;
+
+errout_locked:
+       mutex_unlock(&chain->filter_chain_lock);
+       goto errout;
 }
 
 static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
@@ -1678,11 +2195,12 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
        struct Qdisc *q = NULL;
        struct tcf_chain_info chain_info;
        struct tcf_chain *chain = NULL;
-       struct tcf_block *block;
+       struct tcf_block *block = NULL;
        struct tcf_proto *tp = NULL;
        unsigned long cl = 0;
        void *fh = NULL;
        int err;
+       bool rtnl_held = false;
 
        if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
                return -EPERM;
@@ -1703,8 +2221,27 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 
        /* Find head of filter chain. */
 
-       block = tcf_block_find(net, &q, &parent, &cl,
-                              t->tcm_ifindex, t->tcm_block_index, extack);
+       err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
+       if (err)
+               return err;
+
+       /* Take rtnl mutex if flushing whole chain, block is shared (no qdisc
+        * found), qdisc is not unlocked, classifier type is not specified,
+        * classifier is not unlocked.
+        */
+       if (!prio ||
+           (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
+           !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) {
+               rtnl_held = true;
+               rtnl_lock();
+       }
+
+       err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
+       if (err)
+               goto errout;
+
+       block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
+                                extack);
        if (IS_ERR(block)) {
                err = PTR_ERR(block);
                goto errout;
@@ -1732,56 +2269,69 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 
        if (prio == 0) {
                tfilter_notify_chain(net, skb, block, q, parent, n,
-                                    chain, RTM_DELTFILTER);
-               tcf_chain_flush(chain);
+                                    chain, RTM_DELTFILTER, rtnl_held);
+               tcf_chain_flush(chain, rtnl_held);
                err = 0;
                goto errout;
        }
 
+       mutex_lock(&chain->filter_chain_lock);
        tp = tcf_chain_tp_find(chain, &chain_info, protocol,
                               prio, false);
        if (!tp || IS_ERR(tp)) {
                NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
                err = tp ? PTR_ERR(tp) : -ENOENT;
-               goto errout;
+               goto errout_locked;
        } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
                NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
                err = -EINVAL;
+               goto errout_locked;
+       } else if (t->tcm_handle == 0) {
+               tcf_chain_tp_remove(chain, &chain_info, tp);
+               mutex_unlock(&chain->filter_chain_lock);
+
+               tcf_proto_put(tp, rtnl_held, NULL);
+               tfilter_notify(net, skb, n, tp, block, q, parent, fh,
+                              RTM_DELTFILTER, false, rtnl_held);
+               err = 0;
                goto errout;
        }
+       mutex_unlock(&chain->filter_chain_lock);
 
        fh = tp->ops->get(tp, t->tcm_handle);
 
        if (!fh) {
-               if (t->tcm_handle == 0) {
-                       tcf_chain_tp_remove(chain, &chain_info, tp);
-                       tfilter_notify(net, skb, n, tp, block, q, parent, fh,
-                                      RTM_DELTFILTER, false);
-                       tcf_proto_destroy(tp, extack);
-                       err = 0;
-               } else {
-                       NL_SET_ERR_MSG(extack, "Specified filter handle not found");
-                       err = -ENOENT;
-               }
+               NL_SET_ERR_MSG(extack, "Specified filter handle not found");
+               err = -ENOENT;
        } else {
                bool last;
 
                err = tfilter_del_notify(net, skb, n, tp, block,
                                         q, parent, fh, false, &last,
-                                        extack);
+                                        rtnl_held, extack);
+
                if (err)
                        goto errout;
-               if (last) {
-                       tcf_chain_tp_remove(chain, &chain_info, tp);
-                       tcf_proto_destroy(tp, extack);
-               }
+               if (last)
+                       tcf_chain_tp_delete_empty(chain, tp, rtnl_held, extack);
        }
 
 errout:
-       if (chain)
+       if (chain) {
+               if (tp && !IS_ERR(tp))
+                       tcf_proto_put(tp, rtnl_held, NULL);
                tcf_chain_put(chain);
-       tcf_block_release(q, block);
+       }
+       tcf_block_release(q, block, rtnl_held);
+
+       if (rtnl_held)
+               rtnl_unlock();
+
        return err;
+
+errout_locked:
+       mutex_unlock(&chain->filter_chain_lock);
+       goto errout;
 }
 
 static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
@@ -1797,11 +2347,12 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
        struct Qdisc *q = NULL;
        struct tcf_chain_info chain_info;
        struct tcf_chain *chain = NULL;
-       struct tcf_block *block;
+       struct tcf_block *block = NULL;
        struct tcf_proto *tp = NULL;
        unsigned long cl = 0;
        void *fh = NULL;
        int err;
+       bool rtnl_held = false;
 
        err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
        if (err < 0)
@@ -1819,8 +2370,26 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 
        /* Find head of filter chain. */
 
-       block = tcf_block_find(net, &q, &parent, &cl,
-                              t->tcm_ifindex, t->tcm_block_index, extack);
+       err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
+       if (err)
+               return err;
+
+       /* Take rtnl mutex if block is shared (no qdisc found), qdisc is not
+        * unlocked, classifier type is not specified, classifier is not
+        * unlocked.
+        */
+       if ((q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
+           !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) {
+               rtnl_held = true;
+               rtnl_lock();
+       }
+
+       err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
+       if (err)
+               goto errout;
+
+       block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
+                                extack);
        if (IS_ERR(block)) {
                err = PTR_ERR(block);
                goto errout;
@@ -1839,8 +2408,10 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
                goto errout;
        }
 
+       mutex_lock(&chain->filter_chain_lock);
        tp = tcf_chain_tp_find(chain, &chain_info, protocol,
                               prio, false);
+       mutex_unlock(&chain->filter_chain_lock);
        if (!tp || IS_ERR(tp)) {
                NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
                err = tp ? PTR_ERR(tp) : -ENOENT;
@@ -1858,15 +2429,23 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
                err = -ENOENT;
        } else {
                err = tfilter_notify(net, skb, n, tp, block, q, parent,
-                                    fh, RTM_NEWTFILTER, true);
+                                    fh, RTM_NEWTFILTER, true, rtnl_held);
                if (err < 0)
                        NL_SET_ERR_MSG(extack, "Failed to send filter notify message");
        }
 
+       tfilter_put(tp, fh);
 errout:
-       if (chain)
+       if (chain) {
+               if (tp && !IS_ERR(tp))
+                       tcf_proto_put(tp, rtnl_held, NULL);
                tcf_chain_put(chain);
-       tcf_block_release(q, block);
+       }
+       tcf_block_release(q, block, rtnl_held);
+
+       if (rtnl_held)
+               rtnl_unlock();
+
        return err;
 }
 
@@ -1887,7 +2466,7 @@ static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
        return tcf_fill_node(net, a->skb, tp, a->block, a->q, a->parent,
                             n, NETLINK_CB(a->cb->skb).portid,
                             a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
-                            RTM_NEWTFILTER);
+                            RTM_NEWTFILTER, true);
 }
 
 static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
@@ -1897,11 +2476,15 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
        struct net *net = sock_net(skb->sk);
        struct tcf_block *block = chain->block;
        struct tcmsg *tcm = nlmsg_data(cb->nlh);
+       struct tcf_proto *tp, *tp_prev;
        struct tcf_dump_args arg;
-       struct tcf_proto *tp;
 
-       for (tp = rtnl_dereference(chain->filter_chain);
-            tp; tp = rtnl_dereference(tp->next), (*p_index)++) {
+       for (tp = __tcf_get_next_proto(chain, NULL);
+            tp;
+            tp_prev = tp,
+                    tp = __tcf_get_next_proto(chain, tp),
+                    tcf_proto_put(tp_prev, true, NULL),
+                    (*p_index)++) {
                if (*p_index < index_start)
                        continue;
                if (TC_H_MAJ(tcm->tcm_info) &&
@@ -1917,9 +2500,8 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
                        if (tcf_fill_node(net, skb, tp, block, q, parent, NULL,
                                          NETLINK_CB(cb->skb).portid,
                                          cb->nlh->nlmsg_seq, NLM_F_MULTI,
-                                         RTM_NEWTFILTER) <= 0)
-                               return false;
-
+                                         RTM_NEWTFILTER, true) <= 0)
+                               goto errout;
                        cb->args[1] = 1;
                }
                if (!tp->ops->walk)
@@ -1934,23 +2516,27 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
                arg.w.skip = cb->args[1] - 1;
                arg.w.count = 0;
                arg.w.cookie = cb->args[2];
-               tp->ops->walk(tp, &arg.w);
+               tp->ops->walk(tp, &arg.w, true);
                cb->args[2] = arg.w.cookie;
                cb->args[1] = arg.w.count + 1;
                if (arg.w.stop)
-                       return false;
+                       goto errout;
        }
        return true;
+
+errout:
+       tcf_proto_put(tp, true, NULL);
+       return false;
 }
 
 /* called with RTNL */
 static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 {
+       struct tcf_chain *chain, *chain_prev;
        struct net *net = sock_net(skb->sk);
        struct nlattr *tca[TCA_MAX + 1];
        struct Qdisc *q = NULL;
        struct tcf_block *block;
-       struct tcf_chain *chain;
        struct tcmsg *tcm = nlmsg_data(cb->nlh);
        long index_start;
        long index;
@@ -2014,19 +2600,24 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
        index_start = cb->args[0];
        index = 0;
 
-       list_for_each_entry(chain, &block->chain_list, list) {
+       for (chain = __tcf_get_next_chain(block, NULL);
+            chain;
+            chain_prev = chain,
+                    chain = __tcf_get_next_chain(block, chain),
+                    tcf_chain_put(chain_prev)) {
                if (tca[TCA_CHAIN] &&
                    nla_get_u32(tca[TCA_CHAIN]) != chain->index)
                        continue;
                if (!tcf_chain_dump(chain, q, parent, skb, cb,
                                    index_start, &index)) {
+                       tcf_chain_put(chain);
                        err = -EMSGSIZE;
                        break;
                }
        }
 
        if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK)
-               tcf_block_refcnt_put(block);
+               tcf_block_refcnt_put(block, true);
        cb->args[0] = index;
 
 out:
@@ -2036,8 +2627,10 @@ out:
        return skb->len;
 }
 
-static int tc_chain_fill_node(struct tcf_chain *chain, struct net *net,
-                             struct sk_buff *skb, struct tcf_block *block,
+static int tc_chain_fill_node(const struct tcf_proto_ops *tmplt_ops,
+                             void *tmplt_priv, u32 chain_index,
+                             struct net *net, struct sk_buff *skb,
+                             struct tcf_block *block,
                              u32 portid, u32 seq, u16 flags, int event)
 {
        unsigned char *b = skb_tail_pointer(skb);
@@ -2046,8 +2639,8 @@ static int tc_chain_fill_node(struct tcf_chain *chain, struct net *net,
        struct tcmsg *tcm;
        void *priv;
 
-       ops = chain->tmplt_ops;
-       priv = chain->tmplt_priv;
+       ops = tmplt_ops;
+       priv = tmplt_priv;
 
        nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
        if (!nlh)
@@ -2065,7 +2658,7 @@ static int tc_chain_fill_node(struct tcf_chain *chain, struct net *net,
                tcm->tcm_block_index = block->index;
        }
 
-       if (nla_put_u32(skb, TCA_CHAIN, chain->index))
+       if (nla_put_u32(skb, TCA_CHAIN, chain_index))
                goto nla_put_failure;
 
        if (ops) {
@@ -2096,7 +2689,8 @@ static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
        if (!skb)
                return -ENOBUFS;
 
-       if (tc_chain_fill_node(chain, net, skb, block, portid,
+       if (tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv,
+                              chain->index, net, skb, block, portid,
                               seq, flags, event) <= 0) {
                kfree_skb(skb);
                return -EINVAL;
@@ -2108,6 +2702,31 @@ static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
        return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO);
 }
 
+static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
+                                 void *tmplt_priv, u32 chain_index,
+                                 struct tcf_block *block, struct sk_buff *oskb,
+                                 u32 seq, u16 flags, bool unicast)
+{
+       u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+       struct net *net = block->net;
+       struct sk_buff *skb;
+
+       skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+       if (!skb)
+               return -ENOBUFS;
+
+       if (tc_chain_fill_node(tmplt_ops, tmplt_priv, chain_index, net, skb,
+                              block, portid, seq, flags, RTM_DELCHAIN) <= 0) {
+               kfree_skb(skb);
+               return -EINVAL;
+       }
+
+       if (unicast)
+               return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+
+       return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO);
+}
+
 static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net,
                              struct nlattr **tca,
                              struct netlink_ext_ack *extack)
@@ -2119,7 +2738,7 @@ static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net,
        if (!tca[TCA_KIND])
                return 0;
 
-       ops = tcf_proto_lookup_ops(nla_data(tca[TCA_KIND]), extack);
+       ops = tcf_proto_lookup_ops(nla_data(tca[TCA_KIND]), true, extack);
        if (IS_ERR(ops))
                return PTR_ERR(ops);
        if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) {
@@ -2137,16 +2756,15 @@ static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net,
        return 0;
 }
 
-static void tc_chain_tmplt_del(struct tcf_chain *chain)
+static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops,
+                              void *tmplt_priv)
 {
-       const struct tcf_proto_ops *ops = chain->tmplt_ops;
-
        /* If template ops are set, no work to do for us. */
-       if (!ops)
+       if (!tmplt_ops)
                return;
 
-       ops->tmplt_destroy(chain->tmplt_priv);
-       module_put(ops->owner);
+       tmplt_ops->tmplt_destroy(tmplt_priv);
+       module_put(tmplt_ops->owner);
 }
 
 /* Add/delete/get a chain */
@@ -2189,6 +2807,8 @@ replay:
                err = -EINVAL;
                goto errout_block;
        }
+
+       mutex_lock(&block->lock);
        chain = tcf_chain_lookup(block, chain_index);
        if (n->nlmsg_type == RTM_NEWCHAIN) {
                if (chain) {
@@ -2200,54 +2820,61 @@ replay:
                        } else {
                                NL_SET_ERR_MSG(extack, "Filter chain already exists");
                                err = -EEXIST;
-                               goto errout_block;
+                               goto errout_block_locked;
                        }
                } else {
                        if (!(n->nlmsg_flags & NLM_F_CREATE)) {
                                NL_SET_ERR_MSG(extack, "Need both RTM_NEWCHAIN and NLM_F_CREATE to create a new chain");
                                err = -ENOENT;
-                               goto errout_block;
+                               goto errout_block_locked;
                        }
                        chain = tcf_chain_create(block, chain_index);
                        if (!chain) {
                                NL_SET_ERR_MSG(extack, "Failed to create filter chain");
                                err = -ENOMEM;
-                               goto errout_block;
+                               goto errout_block_locked;
                        }
                }
        } else {
                if (!chain || tcf_chain_held_by_acts_only(chain)) {
                        NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
                        err = -EINVAL;
-                       goto errout_block;
+                       goto errout_block_locked;
                }
                tcf_chain_hold(chain);
        }
 
+       if (n->nlmsg_type == RTM_NEWCHAIN) {
+               /* Modifying chain requires holding parent block lock. In case
+                * the chain was successfully added, take a reference to the
+                * chain. This ensures that an empty chain does not disappear at
+                * the end of this function.
+                */
+               tcf_chain_hold(chain);
+               chain->explicitly_created = true;
+       }
+       mutex_unlock(&block->lock);
+
        switch (n->nlmsg_type) {
        case RTM_NEWCHAIN:
                err = tc_chain_tmplt_add(chain, net, tca, extack);
-               if (err)
+               if (err) {
+                       tcf_chain_put_explicitly_created(chain);
                        goto errout;
-               /* In case the chain was successfully added, take a reference
-                * to the chain. This ensures that an empty chain
-                * does not disappear at the end of this function.
-                */
-               tcf_chain_hold(chain);
-               chain->explicitly_created = true;
+               }
+
                tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
                                RTM_NEWCHAIN, false);
                break;
        case RTM_DELCHAIN:
                tfilter_notify_chain(net, skb, block, q, parent, n,
-                                    chain, RTM_DELTFILTER);
+                                    chain, RTM_DELTFILTER, true);
                /* Flush the chain first as the user requested chain removal. */
-               tcf_chain_flush(chain);
+               tcf_chain_flush(chain, true);
                /* In case the chain was successfully deleted, put a reference
                 * to the chain previously taken during addition.
                 */
                tcf_chain_put_explicitly_created(chain);
-               chain->explicitly_created = false;
                break;
        case RTM_GETCHAIN:
                err = tc_chain_notify(chain, skb, n->nlmsg_seq,
@@ -2264,21 +2891,25 @@ replay:
 errout:
        tcf_chain_put(chain);
 errout_block:
-       tcf_block_release(q, block);
+       tcf_block_release(q, block, true);
        if (err == -EAGAIN)
                /* Replay the request. */
                goto replay;
        return err;
+
+errout_block_locked:
+       mutex_unlock(&block->lock);
+       goto errout_block;
 }
 
 /* called with RTNL */
 static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
 {
+       struct tcf_chain *chain, *chain_prev;
        struct net *net = sock_net(skb->sk);
        struct nlattr *tca[TCA_MAX + 1];
        struct Qdisc *q = NULL;
        struct tcf_block *block;
-       struct tcf_chain *chain;
        struct tcmsg *tcm = nlmsg_data(cb->nlh);
        long index_start;
        long index;
@@ -2342,7 +2973,11 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
        index_start = cb->args[0];
        index = 0;
 
-       list_for_each_entry(chain, &block->chain_list, list) {
+       for (chain = __tcf_get_next_chain(block, NULL);
+            chain;
+            chain_prev = chain,
+                    chain = __tcf_get_next_chain(block, chain),
+                    tcf_chain_put(chain_prev)) {
                if ((tca[TCA_CHAIN] &&
                     nla_get_u32(tca[TCA_CHAIN]) != chain->index))
                        continue;
@@ -2350,19 +2985,20 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
                        index++;
                        continue;
                }
-               if (tcf_chain_held_by_acts_only(chain))
-                       continue;
-               err = tc_chain_fill_node(chain, net, skb, block,
+               err = tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv,
+                                        chain->index, net, skb, block,
                                         NETLINK_CB(cb->skb).portid,
                                         cb->nlh->nlmsg_seq, NLM_F_MULTI,
                                         RTM_NEWCHAIN);
-               if (err <= 0)
+               if (err <= 0) {
+                       tcf_chain_put(chain);
                        break;
+               }
                index++;
        }
 
        if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK)
-               tcf_block_refcnt_put(block);
+               tcf_block_refcnt_put(block, true);
        cb->args[0] = index;
 
 out:
@@ -2384,7 +3020,7 @@ EXPORT_SYMBOL(tcf_exts_destroy);
 
 int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
                      struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr,
-                     struct netlink_ext_ack *extack)
+                     bool rtnl_held, struct netlink_ext_ack *extack)
 {
 #ifdef CONFIG_NET_CLS_ACT
        {
@@ -2394,7 +3030,8 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
                if (exts->police && tb[exts->police]) {
                        act = tcf_action_init_1(net, tp, tb[exts->police],
                                                rate_tlv, "police", ovr,
-                                               TCA_ACT_BIND, true, extack);
+                                               TCA_ACT_BIND, rtnl_held,
+                                               extack);
                        if (IS_ERR(act))
                                return PTR_ERR(act);
 
@@ -2406,8 +3043,8 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
 
                        err = tcf_action_init(net, tp, tb[exts->action],
                                              rate_tlv, NULL, ovr, TCA_ACT_BIND,
-                                             exts->actions, &attr_size, true,
-                                             extack);
+                                             exts->actions, &attr_size,
+                                             rtnl_held, extack);
                        if (err < 0)
                                return err;
                        exts->nr_actions = err;
@@ -2671,10 +3308,12 @@ static int __init tc_filter_init(void)
        if (err)
                goto err_rhash_setup_block_ht;
 
-       rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL, 0);
-       rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL, 0);
+       rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL,
+                     RTNL_FLAG_DOIT_UNLOCKED);
+       rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL,
+                     RTNL_FLAG_DOIT_UNLOCKED);
        rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_get_tfilter,
-                     tc_dump_tfilter, 0);
+                     tc_dump_tfilter, RTNL_FLAG_DOIT_UNLOCKED);
        rtnl_register(PF_UNSPEC, RTM_NEWCHAIN, tc_ctl_chain, NULL, 0);
        rtnl_register(PF_UNSPEC, RTM_DELCHAIN, tc_ctl_chain, NULL, 0);
        rtnl_register(PF_UNSPEC, RTM_GETCHAIN, tc_ctl_chain,
index 4a57fec..2383f44 100644 (file)
@@ -107,7 +107,8 @@ static void basic_delete_filter_work(struct work_struct *work)
        rtnl_unlock();
 }
 
-static void basic_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
+static void basic_destroy(struct tcf_proto *tp, bool rtnl_held,
+                         struct netlink_ext_ack *extack)
 {
        struct basic_head *head = rtnl_dereference(tp->root);
        struct basic_filter *f, *n;
@@ -126,7 +127,7 @@ static void basic_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
 }
 
 static int basic_delete(struct tcf_proto *tp, void *arg, bool *last,
-                       struct netlink_ext_ack *extack)
+                       bool rtnl_held, struct netlink_ext_ack *extack)
 {
        struct basic_head *head = rtnl_dereference(tp->root);
        struct basic_filter *f = arg;
@@ -153,7 +154,7 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp,
 {
        int err;
 
-       err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, extack);
+       err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true, extack);
        if (err < 0)
                return err;
 
@@ -173,7 +174,7 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp,
 static int basic_change(struct net *net, struct sk_buff *in_skb,
                        struct tcf_proto *tp, unsigned long base, u32 handle,
                        struct nlattr **tca, void **arg, bool ovr,
-                       struct netlink_ext_ack *extack)
+                       bool rtnl_held, struct netlink_ext_ack *extack)
 {
        int err;
        struct basic_head *head = rtnl_dereference(tp->root);
@@ -247,7 +248,8 @@ errout:
        return err;
 }
 
-static void basic_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void basic_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+                      bool rtnl_held)
 {
        struct basic_head *head = rtnl_dereference(tp->root);
        struct basic_filter *f;
@@ -274,7 +276,7 @@ static void basic_bind_class(void *fh, u32 classid, unsigned long cl)
 }
 
 static int basic_dump(struct net *net, struct tcf_proto *tp, void *fh,
-                     struct sk_buff *skb, struct tcmsg *t)
+                     struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
 {
        struct tc_basic_pcnt gpf = {};
        struct basic_filter *f = fh;
index a95cb24..062350c 100644 (file)
@@ -298,7 +298,7 @@ static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog,
 }
 
 static int cls_bpf_delete(struct tcf_proto *tp, void *arg, bool *last,
-                         struct netlink_ext_ack *extack)
+                         bool rtnl_held, struct netlink_ext_ack *extack)
 {
        struct cls_bpf_head *head = rtnl_dereference(tp->root);
 
@@ -307,7 +307,7 @@ static int cls_bpf_delete(struct tcf_proto *tp, void *arg, bool *last,
        return 0;
 }
 
-static void cls_bpf_destroy(struct tcf_proto *tp,
+static void cls_bpf_destroy(struct tcf_proto *tp, bool rtnl_held,
                            struct netlink_ext_ack *extack)
 {
        struct cls_bpf_head *head = rtnl_dereference(tp->root);
@@ -417,7 +417,8 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
        if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf))
                return -EINVAL;
 
-       ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, ovr, extack);
+       ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, ovr, true,
+                               extack);
        if (ret < 0)
                return ret;
 
@@ -455,7 +456,8 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
 static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
                          struct tcf_proto *tp, unsigned long base,
                          u32 handle, struct nlattr **tca,
-                         void **arg, bool ovr, struct netlink_ext_ack *extack)
+                         void **arg, bool ovr, bool rtnl_held,
+                         struct netlink_ext_ack *extack)
 {
        struct cls_bpf_head *head = rtnl_dereference(tp->root);
        struct cls_bpf_prog *oldprog = *arg;
@@ -575,7 +577,7 @@ static int cls_bpf_dump_ebpf_info(const struct cls_bpf_prog *prog,
 }
 
 static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, void *fh,
-                       struct sk_buff *skb, struct tcmsg *tm)
+                       struct sk_buff *skb, struct tcmsg *tm, bool rtnl_held)
 {
        struct cls_bpf_prog *prog = fh;
        struct nlattr *nest;
@@ -635,7 +637,8 @@ static void cls_bpf_bind_class(void *fh, u32 classid, unsigned long cl)
                prog->res.class = cl;
 }
 
-static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+                        bool rtnl_held)
 {
        struct cls_bpf_head *head = rtnl_dereference(tp->root);
        struct cls_bpf_prog *prog;
index 3bc01bd..1cef3b4 100644 (file)
@@ -78,7 +78,7 @@ static void cls_cgroup_destroy_work(struct work_struct *work)
 static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
                             struct tcf_proto *tp, unsigned long base,
                             u32 handle, struct nlattr **tca,
-                            void **arg, bool ovr,
+                            void **arg, bool ovr, bool rtnl_held,
                             struct netlink_ext_ack *extack)
 {
        struct nlattr *tb[TCA_CGROUP_MAX + 1];
@@ -110,7 +110,7 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
                goto errout;
 
        err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &new->exts, ovr,
-                               extack);
+                               true, extack);
        if (err < 0)
                goto errout;
 
@@ -130,7 +130,7 @@ errout:
        return err;
 }
 
-static void cls_cgroup_destroy(struct tcf_proto *tp,
+static void cls_cgroup_destroy(struct tcf_proto *tp, bool rtnl_held,
                               struct netlink_ext_ack *extack)
 {
        struct cls_cgroup_head *head = rtnl_dereference(tp->root);
@@ -145,12 +145,13 @@ static void cls_cgroup_destroy(struct tcf_proto *tp,
 }
 
 static int cls_cgroup_delete(struct tcf_proto *tp, void *arg, bool *last,
-                            struct netlink_ext_ack *extack)
+                            bool rtnl_held, struct netlink_ext_ack *extack)
 {
        return -EOPNOTSUPP;
 }
 
-static void cls_cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void cls_cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+                           bool rtnl_held)
 {
        struct cls_cgroup_head *head = rtnl_dereference(tp->root);
 
@@ -166,7 +167,7 @@ skip:
 }
 
 static int cls_cgroup_dump(struct net *net, struct tcf_proto *tp, void *fh,
-                          struct sk_buff *skb, struct tcmsg *t)
+                          struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
 {
        struct cls_cgroup_head *head = rtnl_dereference(tp->root);
        struct nlattr *nest;
index 2bb043c..204e2ed 100644 (file)
@@ -391,7 +391,8 @@ static void flow_destroy_filter_work(struct work_struct *work)
 static int flow_change(struct net *net, struct sk_buff *in_skb,
                       struct tcf_proto *tp, unsigned long base,
                       u32 handle, struct nlattr **tca,
-                      void **arg, bool ovr, struct netlink_ext_ack *extack)
+                      void **arg, bool ovr, bool rtnl_held,
+                      struct netlink_ext_ack *extack)
 {
        struct flow_head *head = rtnl_dereference(tp->root);
        struct flow_filter *fold, *fnew;
@@ -445,7 +446,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
                goto err2;
 
        err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, ovr,
-                               extack);
+                               true, extack);
        if (err < 0)
                goto err2;
 
@@ -566,7 +567,7 @@ err1:
 }
 
 static int flow_delete(struct tcf_proto *tp, void *arg, bool *last,
-                      struct netlink_ext_ack *extack)
+                      bool rtnl_held, struct netlink_ext_ack *extack)
 {
        struct flow_head *head = rtnl_dereference(tp->root);
        struct flow_filter *f = arg;
@@ -590,7 +591,8 @@ static int flow_init(struct tcf_proto *tp)
        return 0;
 }
 
-static void flow_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
+static void flow_destroy(struct tcf_proto *tp, bool rtnl_held,
+                        struct netlink_ext_ack *extack)
 {
        struct flow_head *head = rtnl_dereference(tp->root);
        struct flow_filter *f, *next;
@@ -617,7 +619,7 @@ static void *flow_get(struct tcf_proto *tp, u32 handle)
 }
 
 static int flow_dump(struct net *net, struct tcf_proto *tp, void *fh,
-                    struct sk_buff *skb, struct tcmsg *t)
+                    struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
 {
        struct flow_filter *f = fh;
        struct nlattr *nest;
@@ -677,7 +679,8 @@ nla_put_failure:
        return -1;
 }
 
-static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+                     bool rtnl_held)
 {
        struct flow_head *head = rtnl_dereference(tp->root);
        struct flow_filter *f;
index c5d1db3..640f83e 100644 (file)
@@ -396,7 +396,11 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
        err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts);
        if (err) {
                kfree(cls_flower.rule);
-               return err;
+               if (skip_sw) {
+                       NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action");
+                       return err;
+               }
+               return 0;
        }
 
        err = tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, skip_sw);
@@ -465,7 +469,8 @@ static void fl_destroy_sleepable(struct work_struct *work)
        module_put(THIS_MODULE);
 }
 
-static void fl_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
+static void fl_destroy(struct tcf_proto *tp, bool rtnl_held,
+                      struct netlink_ext_ack *extack)
 {
        struct cls_fl_head *head = rtnl_dereference(tp->root);
        struct fl_flow_mask *mask, *next_mask;
@@ -1272,7 +1277,8 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp,
 {
        int err;
 
-       err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, extack);
+       err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true,
+                               extack);
        if (err < 0)
                return err;
 
@@ -1299,7 +1305,8 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp,
 static int fl_change(struct net *net, struct sk_buff *in_skb,
                     struct tcf_proto *tp, unsigned long base,
                     u32 handle, struct nlattr **tca,
-                    void **arg, bool ovr, struct netlink_ext_ack *extack)
+                    void **arg, bool ovr, bool rtnl_held,
+                    struct netlink_ext_ack *extack)
 {
        struct cls_fl_head *head = rtnl_dereference(tp->root);
        struct cls_fl_filter *fold = *arg;
@@ -1385,7 +1392,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
        if (!tc_skip_hw(fnew->flags)) {
                err = fl_hw_replace_filter(tp, fnew, extack);
                if (err)
-                       goto errout_mask;
+                       goto errout_mask_ht;
        }
 
        if (!tc_in_hw(fnew->flags))
@@ -1415,6 +1422,10 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
        kfree(mask);
        return 0;
 
+errout_mask_ht:
+       rhashtable_remove_fast(&fnew->mask->ht, &fnew->ht_node,
+                              fnew->mask->filter_ht_params);
+
 errout_mask:
        fl_mask_put(head, fnew->mask, false);
 
@@ -1432,7 +1443,7 @@ errout_mask_alloc:
 }
 
 static int fl_delete(struct tcf_proto *tp, void *arg, bool *last,
-                    struct netlink_ext_ack *extack)
+                    bool rtnl_held, struct netlink_ext_ack *extack)
 {
        struct cls_fl_head *head = rtnl_dereference(tp->root);
        struct cls_fl_filter *f = arg;
@@ -1444,7 +1455,8 @@ static int fl_delete(struct tcf_proto *tp, void *arg, bool *last,
        return 0;
 }
 
-static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+                   bool rtnl_held)
 {
        struct cls_fl_head *head = rtnl_dereference(tp->root);
        struct cls_fl_filter *f;
@@ -1495,7 +1507,11 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
                                                   &f->exts);
                        if (err) {
                                kfree(cls_flower.rule);
-                               return err;
+                               if (tc_skip_sw(f->flags)) {
+                                       NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action");
+                                       return err;
+                               }
+                               continue;
                        }
 
                        cls_flower.classid = f->res.classid;
@@ -2039,7 +2055,7 @@ nla_put_failure:
 }
 
 static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
-                  struct sk_buff *skb, struct tcmsg *t)
+                  struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
 {
        struct cls_fl_filter *f = fh;
        struct nlattr *nest;
index 29eeeaf..317151b 100644 (file)
@@ -139,7 +139,8 @@ static void fw_delete_filter_work(struct work_struct *work)
        rtnl_unlock();
 }
 
-static void fw_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
+static void fw_destroy(struct tcf_proto *tp, bool rtnl_held,
+                      struct netlink_ext_ack *extack)
 {
        struct fw_head *head = rtnl_dereference(tp->root);
        struct fw_filter *f;
@@ -163,7 +164,7 @@ static void fw_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
 }
 
 static int fw_delete(struct tcf_proto *tp, void *arg, bool *last,
-                    struct netlink_ext_ack *extack)
+                    bool rtnl_held, struct netlink_ext_ack *extack)
 {
        struct fw_head *head = rtnl_dereference(tp->root);
        struct fw_filter *f = arg;
@@ -217,7 +218,7 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp,
        int err;
 
        err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &f->exts, ovr,
-                               extack);
+                               true, extack);
        if (err < 0)
                return err;
 
@@ -250,7 +251,8 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp,
 static int fw_change(struct net *net, struct sk_buff *in_skb,
                     struct tcf_proto *tp, unsigned long base,
                     u32 handle, struct nlattr **tca, void **arg,
-                    bool ovr, struct netlink_ext_ack *extack)
+                    bool ovr, bool rtnl_held,
+                    struct netlink_ext_ack *extack)
 {
        struct fw_head *head = rtnl_dereference(tp->root);
        struct fw_filter *f = *arg;
@@ -354,7 +356,8 @@ errout:
        return err;
 }
 
-static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+                   bool rtnl_held)
 {
        struct fw_head *head = rtnl_dereference(tp->root);
        int h;
@@ -384,7 +387,7 @@ static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 }
 
 static int fw_dump(struct net *net, struct tcf_proto *tp, void *fh,
-                  struct sk_buff *skb, struct tcmsg *t)
+                  struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
 {
        struct fw_head *head = rtnl_dereference(tp->root);
        struct fw_filter *f = fh;
index a1b803f..a371374 100644 (file)
@@ -109,7 +109,8 @@ static int mall_replace_hw_filter(struct tcf_proto *tp,
        return 0;
 }
 
-static void mall_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
+static void mall_destroy(struct tcf_proto *tp, bool rtnl_held,
+                        struct netlink_ext_ack *extack)
 {
        struct cls_mall_head *head = rtnl_dereference(tp->root);
 
@@ -145,7 +146,8 @@ static int mall_set_parms(struct net *net, struct tcf_proto *tp,
 {
        int err;
 
-       err = tcf_exts_validate(net, tp, tb, est, &head->exts, ovr, extack);
+       err = tcf_exts_validate(net, tp, tb, est, &head->exts, ovr, true,
+                               extack);
        if (err < 0)
                return err;
 
@@ -159,7 +161,8 @@ static int mall_set_parms(struct net *net, struct tcf_proto *tp,
 static int mall_change(struct net *net, struct sk_buff *in_skb,
                       struct tcf_proto *tp, unsigned long base,
                       u32 handle, struct nlattr **tca,
-                      void **arg, bool ovr, struct netlink_ext_ack *extack)
+                      void **arg, bool ovr, bool rtnl_held,
+                      struct netlink_ext_ack *extack)
 {
        struct cls_mall_head *head = rtnl_dereference(tp->root);
        struct nlattr *tb[TCA_MATCHALL_MAX + 1];
@@ -232,12 +235,13 @@ err_exts_init:
 }
 
 static int mall_delete(struct tcf_proto *tp, void *arg, bool *last,
-                      struct netlink_ext_ack *extack)
+                      bool rtnl_held, struct netlink_ext_ack *extack)
 {
        return -EOPNOTSUPP;
 }
 
-static void mall_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void mall_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+                     bool rtnl_held)
 {
        struct cls_mall_head *head = rtnl_dereference(tp->root);
 
@@ -279,7 +283,7 @@ static int mall_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
 }
 
 static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh,
-                    struct sk_buff *skb, struct tcmsg *t)
+                    struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
 {
        struct tc_matchall_pcnt gpf = {};
        struct cls_mall_head *head = fh;
index 0404aa5..e590c3a 100644 (file)
@@ -276,7 +276,8 @@ static void route4_queue_work(struct route4_filter *f)
        tcf_queue_work(&f->rwork, route4_delete_filter_work);
 }
 
-static void route4_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
+static void route4_destroy(struct tcf_proto *tp, bool rtnl_held,
+                          struct netlink_ext_ack *extack)
 {
        struct route4_head *head = rtnl_dereference(tp->root);
        int h1, h2;
@@ -312,7 +313,7 @@ static void route4_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
 }
 
 static int route4_delete(struct tcf_proto *tp, void *arg, bool *last,
-                        struct netlink_ext_ack *extack)
+                        bool rtnl_held, struct netlink_ext_ack *extack)
 {
        struct route4_head *head = rtnl_dereference(tp->root);
        struct route4_filter *f = arg;
@@ -393,7 +394,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
        struct route4_bucket *b;
        int err;
 
-       err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, extack);
+       err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true, extack);
        if (err < 0)
                return err;
 
@@ -468,7 +469,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
 static int route4_change(struct net *net, struct sk_buff *in_skb,
                         struct tcf_proto *tp, unsigned long base, u32 handle,
                         struct nlattr **tca, void **arg, bool ovr,
-                        struct netlink_ext_ack *extack)
+                        bool rtnl_held, struct netlink_ext_ack *extack)
 {
        struct route4_head *head = rtnl_dereference(tp->root);
        struct route4_filter __rcu **fp;
@@ -560,7 +561,8 @@ errout:
        return err;
 }
 
-static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+                       bool rtnl_held)
 {
        struct route4_head *head = rtnl_dereference(tp->root);
        unsigned int h, h1;
@@ -597,7 +599,7 @@ static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 }
 
 static int route4_dump(struct net *net, struct tcf_proto *tp, void *fh,
-                      struct sk_buff *skb, struct tcmsg *t)
+                      struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
 {
        struct route4_filter *f = fh;
        struct nlattr *nest;
index e9ccf7d..4d38361 100644 (file)
@@ -312,7 +312,8 @@ static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
                __rsvp_delete_filter(f);
 }
 
-static void rsvp_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
+static void rsvp_destroy(struct tcf_proto *tp, bool rtnl_held,
+                        struct netlink_ext_ack *extack)
 {
        struct rsvp_head *data = rtnl_dereference(tp->root);
        int h1, h2;
@@ -341,7 +342,7 @@ static void rsvp_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
 }
 
 static int rsvp_delete(struct tcf_proto *tp, void *arg, bool *last,
-                      struct netlink_ext_ack *extack)
+                      bool rtnl_held, struct netlink_ext_ack *extack)
 {
        struct rsvp_head *head = rtnl_dereference(tp->root);
        struct rsvp_filter *nfp, *f = arg;
@@ -477,7 +478,8 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
                       struct tcf_proto *tp, unsigned long base,
                       u32 handle,
                       struct nlattr **tca,
-                      void **arg, bool ovr, struct netlink_ext_ack *extack)
+                      void **arg, bool ovr, bool rtnl_held,
+                      struct netlink_ext_ack *extack)
 {
        struct rsvp_head *data = rtnl_dereference(tp->root);
        struct rsvp_filter *f, *nfp;
@@ -502,7 +504,8 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
        err = tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE);
        if (err < 0)
                return err;
-       err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr, extack);
+       err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr, true,
+                               extack);
        if (err < 0)
                goto errout2;
 
@@ -654,7 +657,8 @@ errout2:
        return err;
 }
 
-static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+                     bool rtnl_held)
 {
        struct rsvp_head *head = rtnl_dereference(tp->root);
        unsigned int h, h1;
@@ -688,7 +692,7 @@ static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 }
 
 static int rsvp_dump(struct net *net, struct tcf_proto *tp, void *fh,
-                    struct sk_buff *skb, struct tcmsg *t)
+                    struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
 {
        struct rsvp_filter *f = fh;
        struct rsvp_session *s;
index 9ccc93f..e198162 100644 (file)
@@ -173,7 +173,7 @@ static void tcindex_destroy_fexts_work(struct work_struct *work)
 }
 
 static int tcindex_delete(struct tcf_proto *tp, void *arg, bool *last,
-                         struct netlink_ext_ack *extack)
+                         bool rtnl_held, struct netlink_ext_ack *extack)
 {
        struct tcindex_data *p = rtnl_dereference(tp->root);
        struct tcindex_filter_result *r = arg;
@@ -226,7 +226,7 @@ static int tcindex_destroy_element(struct tcf_proto *tp,
 {
        bool last;
 
-       return tcindex_delete(tp, arg, &last, NULL);
+       return tcindex_delete(tp, arg, &last, false, NULL);
 }
 
 static void __tcindex_destroy(struct rcu_head *head)
@@ -275,7 +275,7 @@ static void tcindex_free_perfect_hash(struct tcindex_data *cp)
        kfree(cp->perfect);
 }
 
-static int tcindex_alloc_perfect_hash(struct tcindex_data *cp)
+static int tcindex_alloc_perfect_hash(struct net *net, struct tcindex_data *cp)
 {
        int i, err = 0;
 
@@ -289,6 +289,9 @@ static int tcindex_alloc_perfect_hash(struct tcindex_data *cp)
                                    TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
                if (err < 0)
                        goto errout;
+#ifdef CONFIG_NET_CLS_ACT
+               cp->perfect[i].exts.net = net;
+#endif
        }
 
        return 0;
@@ -305,16 +308,16 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
                  struct nlattr *est, bool ovr, struct netlink_ext_ack *extack)
 {
        struct tcindex_filter_result new_filter_result, *old_r = r;
-       struct tcindex_filter_result cr;
        struct tcindex_data *cp = NULL, *oldp;
        struct tcindex_filter *f = NULL; /* make gcc behave */
+       struct tcf_result cr = {};
        int err, balloc = 0;
        struct tcf_exts e;
 
        err = tcf_exts_init(&e, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
        if (err < 0)
                return err;
-       err = tcf_exts_validate(net, tp, tb, est, &e, ovr, extack);
+       err = tcf_exts_validate(net, tp, tb, est, &e, ovr, true, extack);
        if (err < 0)
                goto errout;
 
@@ -337,7 +340,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
        if (p->perfect) {
                int i;
 
-               if (tcindex_alloc_perfect_hash(cp) < 0)
+               if (tcindex_alloc_perfect_hash(net, cp) < 0)
                        goto errout;
                for (i = 0; i < cp->hash; i++)
                        cp->perfect[i].res = p->perfect[i].res;
@@ -348,11 +351,8 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
        err = tcindex_filter_result_init(&new_filter_result);
        if (err < 0)
                goto errout1;
-       err = tcindex_filter_result_init(&cr);
-       if (err < 0)
-               goto errout1;
        if (old_r)
-               cr.res = r->res;
+               cr = r->res;
 
        if (tb[TCA_TCINDEX_HASH])
                cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]);
@@ -406,7 +406,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
        err = -ENOMEM;
        if (!cp->perfect && !cp->h) {
                if (valid_perfect_hash(cp)) {
-                       if (tcindex_alloc_perfect_hash(cp) < 0)
+                       if (tcindex_alloc_perfect_hash(net, cp) < 0)
                                goto errout_alloc;
                        balloc = 1;
                } else {
@@ -443,8 +443,8 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
        }
 
        if (tb[TCA_TCINDEX_CLASSID]) {
-               cr.res.classid = nla_get_u32(tb[TCA_TCINDEX_CLASSID]);
-               tcf_bind_filter(tp, &cr.res, base);
+               cr.classid = nla_get_u32(tb[TCA_TCINDEX_CLASSID]);
+               tcf_bind_filter(tp, &cr, base);
        }
 
        if (old_r && old_r != r) {
@@ -456,7 +456,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
        }
 
        oldp = p;
-       r->res = cr.res;
+       r->res = cr;
        tcf_exts_change(&r->exts, &e);
 
        rcu_assign_pointer(tp->root, cp);
@@ -475,6 +475,8 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
                                ; /* nothing */
 
                rcu_assign_pointer(*fp, f);
+       } else {
+               tcf_exts_destroy(&new_filter_result.exts);
        }
 
        if (oldp)
@@ -487,7 +489,6 @@ errout_alloc:
        else if (balloc == 2)
                kfree(cp->h);
 errout1:
-       tcf_exts_destroy(&cr.exts);
        tcf_exts_destroy(&new_filter_result.exts);
 errout:
        kfree(cp);
@@ -499,7 +500,7 @@ static int
 tcindex_change(struct net *net, struct sk_buff *in_skb,
               struct tcf_proto *tp, unsigned long base, u32 handle,
               struct nlattr **tca, void **arg, bool ovr,
-              struct netlink_ext_ack *extack)
+              bool rtnl_held, struct netlink_ext_ack *extack)
 {
        struct nlattr *opt = tca[TCA_OPTIONS];
        struct nlattr *tb[TCA_TCINDEX_MAX + 1];
@@ -522,7 +523,8 @@ tcindex_change(struct net *net, struct sk_buff *in_skb,
                                 tca[TCA_RATE], ovr, extack);
 }
 
-static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
+static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker,
+                        bool rtnl_held)
 {
        struct tcindex_data *p = rtnl_dereference(tp->root);
        struct tcindex_filter *f, *next;
@@ -558,7 +560,7 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
        }
 }
 
-static void tcindex_destroy(struct tcf_proto *tp,
+static void tcindex_destroy(struct tcf_proto *tp, bool rtnl_held,
                            struct netlink_ext_ack *extack)
 {
        struct tcindex_data *p = rtnl_dereference(tp->root);
@@ -568,14 +570,14 @@ static void tcindex_destroy(struct tcf_proto *tp,
        walker.count = 0;
        walker.skip = 0;
        walker.fn = tcindex_destroy_element;
-       tcindex_walk(tp, &walker);
+       tcindex_walk(tp, &walker, true);
 
        call_rcu(&p->rcu, __tcindex_destroy);
 }
 
 
 static int tcindex_dump(struct net *net, struct tcf_proto *tp, void *fh,
-                       struct sk_buff *skb, struct tcmsg *t)
+                       struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
 {
        struct tcindex_data *p = rtnl_dereference(tp->root);
        struct tcindex_filter_result *r = fh;
index dcea210..27d29c0 100644 (file)
@@ -629,7 +629,8 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
        return -ENOENT;
 }
 
-static void u32_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
+static void u32_destroy(struct tcf_proto *tp, bool rtnl_held,
+                       struct netlink_ext_ack *extack)
 {
        struct tc_u_common *tp_c = tp->data;
        struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
@@ -663,7 +664,7 @@ static void u32_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
 }
 
 static int u32_delete(struct tcf_proto *tp, void *arg, bool *last,
-                     struct netlink_ext_ack *extack)
+                     bool rtnl_held, struct netlink_ext_ack *extack)
 {
        struct tc_u_hnode *ht = arg;
        struct tc_u_common *tp_c = tp->data;
@@ -726,7 +727,7 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
 {
        int err;
 
-       err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr, extack);
+       err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr, true, extack);
        if (err < 0)
                return err;
 
@@ -858,7 +859,7 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
 
 static int u32_change(struct net *net, struct sk_buff *in_skb,
                      struct tcf_proto *tp, unsigned long base, u32 handle,
-                     struct nlattr **tca, void **arg, bool ovr,
+                     struct nlattr **tca, void **arg, bool ovr, bool rtnl_held,
                      struct netlink_ext_ack *extack)
 {
        struct tc_u_common *tp_c = tp->data;
@@ -1123,7 +1124,8 @@ erridr:
        return err;
 }
 
-static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+                    bool rtnl_held)
 {
        struct tc_u_common *tp_c = tp->data;
        struct tc_u_hnode *ht;
@@ -1281,7 +1283,7 @@ static void u32_bind_class(void *fh, u32 classid, unsigned long cl)
 }
 
 static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh,
-                   struct sk_buff *skb, struct tcmsg *t)
+                   struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
 {
        struct tc_u_knode *n = fh;
        struct tc_u_hnode *ht_up, *ht_down;
index 03e26e8..2283924 100644 (file)
@@ -1909,17 +1909,19 @@ static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
        block = cops->tcf_block(q, cl, NULL);
        if (!block)
                return;
-       list_for_each_entry(chain, &block->chain_list, list) {
+       for (chain = tcf_get_next_chain(block, NULL);
+            chain;
+            chain = tcf_get_next_chain(block, chain)) {
                struct tcf_proto *tp;
 
-               for (tp = rtnl_dereference(chain->filter_chain);
-                    tp; tp = rtnl_dereference(tp->next)) {
+               for (tp = tcf_get_next_proto(chain, NULL, true);
+                    tp; tp = tcf_get_next_proto(chain, tp, true)) {
                        struct tcf_bind_args arg = {};
 
                        arg.w.fn = tcf_node_bind;
                        arg.classid = clid;
                        arg.cl = new_cl;
-                       tp->ops->walk(tp, &arg.w);
+                       tp->ops->walk(tp, &arg.w, true);
                }
        }
 }
index 66ba2ce..38e5add 100644 (file)
@@ -500,7 +500,7 @@ static void dev_watchdog_down(struct net_device *dev)
  *     netif_carrier_on - set carrier
  *     @dev: network device
  *
- * Device has detected that carrier.
+ * Device has detected acquisition of carrier.
  */
 void netif_carrier_on(struct net_device *dev)
 {
@@ -1366,7 +1366,11 @@ static void mini_qdisc_rcu_func(struct rcu_head *head)
 void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
                          struct tcf_proto *tp_head)
 {
-       struct mini_Qdisc *miniq_old = rtnl_dereference(*miniqp->p_miniq);
+       /* Protected with chain0->filter_chain_lock.
+        * Can't access chain directly because tp_head can be NULL.
+        */
+       struct mini_Qdisc *miniq_old =
+               rcu_dereference_protected(*miniqp->p_miniq, 1);
        struct mini_Qdisc *miniq;
 
        if (!tp_head) {
index 078f01a..435847d 100644 (file)
@@ -256,6 +256,7 @@ static size_t inet_assoc_attr_size(struct sctp_association *asoc)
                + nla_total_size(1) /* INET_DIAG_TOS */
                + nla_total_size(1) /* INET_DIAG_TCLASS */
                + nla_total_size(4) /* INET_DIAG_MARK */
+               + nla_total_size(4) /* INET_DIAG_CLASS_ID */
                + nla_total_size(addrlen * asoc->peer.transport_count)
                + nla_total_size(addrlen * addrcnt)
                + nla_total_size(sizeof(struct inet_diag_meminfo))
index 123e9f2..edfcf16 100644 (file)
@@ -36,6 +36,7 @@ static __le32 sctp_gso_make_checksum(struct sk_buff *skb)
 {
        skb->ip_summed = CHECKSUM_NONE;
        skb->csum_not_inet = 0;
+       gso_reset_checksum(skb, ~0);
        return sctp_compute_cksum(skb, skb_transport_offset(skb));
 }
 
index 9644bdc..a78e55a 100644 (file)
@@ -2027,7 +2027,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
        struct sctp_endpoint *ep = sctp_sk(sk)->ep;
        struct sctp_transport *transport = NULL;
        struct sctp_sndrcvinfo _sinfo, *sinfo;
-       struct sctp_association *asoc;
+       struct sctp_association *asoc, *tmp;
        struct sctp_cmsgs cmsgs;
        union sctp_addr *daddr;
        bool new = false;
@@ -2053,7 +2053,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 
        /* SCTP_SENDALL process */
        if ((sflags & SCTP_SENDALL) && sctp_style(sk, UDP)) {
-               list_for_each_entry(asoc, &ep->asocs, asocs) {
+               list_for_each_entry_safe(asoc, tmp, &ep->asocs, asocs) {
                        err = sctp_sendmsg_check_sflags(asoc, sflags, msg,
                                                        msg_len);
                        if (err == 0)
index 80e0ae5..2936ed1 100644 (file)
@@ -84,6 +84,19 @@ static void fa_zero(struct flex_array *fa, size_t index, size_t count)
        }
 }
 
+static size_t fa_index(struct flex_array *fa, void *elem, size_t count)
+{
+       size_t index = 0;
+
+       while (count--) {
+               if (elem == flex_array_get(fa, index))
+                       break;
+               index++;
+       }
+
+       return index;
+}
+
 /* Migrates chunks from stream queues to new stream queues if needed,
  * but not across associations. Also, removes those chunks to streams
  * higher than the new max.
@@ -131,8 +144,10 @@ static void sctp_stream_outq_migrate(struct sctp_stream *stream,
                }
        }
 
-       for (i = outcnt; i < stream->outcnt; i++)
+       for (i = outcnt; i < stream->outcnt; i++) {
                kfree(SCTP_SO(stream, i)->ext);
+               SCTP_SO(stream, i)->ext = NULL;
+       }
 }
 
 static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt,
@@ -147,6 +162,13 @@ static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt,
 
        if (stream->out) {
                fa_copy(out, stream->out, 0, min(outcnt, stream->outcnt));
+               if (stream->out_curr) {
+                       size_t index = fa_index(stream->out, stream->out_curr,
+                                               stream->outcnt);
+
+                       BUG_ON(index == stream->outcnt);
+                       stream->out_curr = flex_array_get(out, index);
+               }
                fa_free(stream->out);
        }
 
index 48ea766..46fa9f3 100644 (file)
@@ -1523,6 +1523,11 @@ static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 
        smc = smc_sk(sk);
        lock_sock(sk);
+       if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) {
+               /* socket was connected before, no more data to read */
+               rc = 0;
+               goto out;
+       }
        if ((sk->sk_state == SMC_INIT) ||
            (sk->sk_state == SMC_LISTEN) ||
            (sk->sk_state == SMC_CLOSED))
@@ -1858,7 +1863,11 @@ static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos,
 
        smc = smc_sk(sk);
        lock_sock(sk);
-
+       if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) {
+               /* socket was connected before, no more data to read */
+               rc = 0;
+               goto out;
+       }
        if (sk->sk_state == SMC_INIT ||
            sk->sk_state == SMC_LISTEN ||
            sk->sk_state == SMC_CLOSED)
index db83332..d0b0f4c 100644 (file)
 
 /********************************** send *************************************/
 
-struct smc_cdc_tx_pend {
-       struct smc_connection   *conn;          /* socket connection */
-       union smc_host_cursor   cursor; /* tx sndbuf cursor sent */
-       union smc_host_cursor   p_cursor;       /* rx RMBE cursor produced */
-       u16                     ctrl_seq;       /* conn. tx sequence # */
-};
-
 /* handler for send/transmission completion of a CDC msg */
 static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
                               struct smc_link *link,
@@ -61,12 +54,14 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
 
 int smc_cdc_get_free_slot(struct smc_connection *conn,
                          struct smc_wr_buf **wr_buf,
+                         struct smc_rdma_wr **wr_rdma_buf,
                          struct smc_cdc_tx_pend **pend)
 {
        struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
        int rc;
 
        rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf,
+                                    wr_rdma_buf,
                                     (struct smc_wr_tx_pend_priv **)pend);
        if (!conn->alert_token_local)
                /* abnormal termination */
@@ -96,6 +91,7 @@ int smc_cdc_msg_send(struct smc_connection *conn,
                     struct smc_wr_buf *wr_buf,
                     struct smc_cdc_tx_pend *pend)
 {
+       union smc_host_cursor cfed;
        struct smc_link *link;
        int rc;
 
@@ -105,12 +101,12 @@ int smc_cdc_msg_send(struct smc_connection *conn,
 
        conn->tx_cdc_seq++;
        conn->local_tx_ctrl.seqno = conn->tx_cdc_seq;
-       smc_host_msg_to_cdc((struct smc_cdc_msg *)wr_buf,
-                           &conn->local_tx_ctrl, conn);
+       smc_host_msg_to_cdc((struct smc_cdc_msg *)wr_buf, conn, &cfed);
        rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend);
-       if (!rc)
-               smc_curs_copy(&conn->rx_curs_confirmed,
-                             &conn->local_tx_ctrl.cons, conn);
+       if (!rc) {
+               smc_curs_copy(&conn->rx_curs_confirmed, &cfed, conn);
+               conn->local_rx_ctrl.prod_flags.cons_curs_upd_req = 0;
+       }
 
        return rc;
 }
@@ -121,11 +117,14 @@ static int smcr_cdc_get_slot_and_msg_send(struct smc_connection *conn)
        struct smc_wr_buf *wr_buf;
        int rc;
 
-       rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend);
+       rc = smc_cdc_get_free_slot(conn, &wr_buf, NULL, &pend);
        if (rc)
                return rc;
 
-       return smc_cdc_msg_send(conn, wr_buf, pend);
+       spin_lock_bh(&conn->send_lock);
+       rc = smc_cdc_msg_send(conn, wr_buf, pend);
+       spin_unlock_bh(&conn->send_lock);
+       return rc;
 }
 
 int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn)
@@ -195,6 +194,7 @@ int smcd_cdc_msg_send(struct smc_connection *conn)
        if (rc)
                return rc;
        smc_curs_copy(&conn->rx_curs_confirmed, &curs, conn);
+       conn->local_rx_ctrl.prod_flags.cons_curs_upd_req = 0;
        /* Calculate transmitted data and increment free send buffer space */
        diff = smc_curs_diff(conn->sndbuf_desc->len, &conn->tx_curs_fin,
                             &conn->tx_curs_sent);
@@ -271,26 +271,18 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,
                smp_mb__after_atomic();
                smc->sk.sk_data_ready(&smc->sk);
        } else {
-               if (conn->local_rx_ctrl.prod_flags.write_blocked ||
-                   conn->local_rx_ctrl.prod_flags.cons_curs_upd_req ||
-                   conn->local_rx_ctrl.prod_flags.urg_data_pending) {
-                       if (conn->local_rx_ctrl.prod_flags.urg_data_pending)
-                               conn->urg_state = SMC_URG_NOTYET;
-                       /* force immediate tx of current consumer cursor, but
-                        * under send_lock to guarantee arrival in seqno-order
-                        */
-                       if (smc->sk.sk_state != SMC_INIT)
-                               smc_tx_sndbuf_nonempty(conn);
-               }
+               if (conn->local_rx_ctrl.prod_flags.write_blocked)
+                       smc->sk.sk_data_ready(&smc->sk);
+               if (conn->local_rx_ctrl.prod_flags.urg_data_pending)
+                       conn->urg_state = SMC_URG_NOTYET;
        }
 
-       /* piggy backed tx info */
        /* trigger sndbuf consumer: RDMA write into peer RMBE and CDC */
-       if (diff_cons && smc_tx_prepared_sends(conn)) {
+       if ((diff_cons && smc_tx_prepared_sends(conn)) ||
+           conn->local_rx_ctrl.prod_flags.cons_curs_upd_req ||
+           conn->local_rx_ctrl.prod_flags.urg_data_pending)
                smc_tx_sndbuf_nonempty(conn);
-               /* trigger socket release if connection closed */
-               smc_close_wake_tx_prepared(smc);
-       }
+
        if (diff_cons && conn->urg_tx_pend &&
            atomic_read(&conn->peer_rmbe_space) == conn->peer_rmbe_size) {
                /* urg data confirmed by peer, indicate we're ready for more */
index e8c214b..861dc24 100644 (file)
@@ -160,7 +160,9 @@ static inline void smcd_curs_copy(union smcd_cdc_cursor *tgt,
 #endif
 }
 
-/* calculate cursor difference between old and new, where old <= new */
+/* calculate cursor difference between old and new, where old <= new and
+ * difference cannot exceed size
+ */
 static inline int smc_curs_diff(unsigned int size,
                                union smc_host_cursor *old,
                                union smc_host_cursor *new)
@@ -185,28 +187,51 @@ static inline int smc_curs_comp(unsigned int size,
        return smc_curs_diff(size, old, new);
 }
 
+/* calculate cursor difference between old and new, where old <= new and
+ * difference may exceed size
+ */
+static inline int smc_curs_diff_large(unsigned int size,
+                                     union smc_host_cursor *old,
+                                     union smc_host_cursor *new)
+{
+       if (old->wrap < new->wrap)
+               return min_t(int,
+                            (size - old->count) + new->count +
+                            (new->wrap - old->wrap - 1) * size,
+                            size);
+
+       if (old->wrap > new->wrap) /* wrap has switched from 0xffff to 0x0000 */
+               return min_t(int,
+                            (size - old->count) + new->count +
+                            (new->wrap + 0xffff - old->wrap) * size,
+                            size);
+
+       return max_t(int, 0, (new->count - old->count));
+}
+
 static inline void smc_host_cursor_to_cdc(union smc_cdc_cursor *peer,
                                          union smc_host_cursor *local,
+                                         union smc_host_cursor *save,
                                          struct smc_connection *conn)
 {
-       union smc_host_cursor temp;
-
-       smc_curs_copy(&temp, local, conn);
-       peer->count = htonl(temp.count);
-       peer->wrap = htons(temp.wrap);
+       smc_curs_copy(save, local, conn);
+       peer->count = htonl(save->count);
+       peer->wrap = htons(save->wrap);
        /* peer->reserved = htons(0); must be ensured by caller */
 }
 
 static inline void smc_host_msg_to_cdc(struct smc_cdc_msg *peer,
-                                      struct smc_host_cdc_msg *local,
-                                      struct smc_connection *conn)
+                                      struct smc_connection *conn,
+                                      union smc_host_cursor *save)
 {
+       struct smc_host_cdc_msg *local = &conn->local_tx_ctrl;
+
        peer->common.type = local->common.type;
        peer->len = local->len;
        peer->seqno = htons(local->seqno);
        peer->token = htonl(local->token);
-       smc_host_cursor_to_cdc(&peer->prod, &local->prod, conn);
-       smc_host_cursor_to_cdc(&peer->cons, &local->cons, conn);
+       smc_host_cursor_to_cdc(&peer->prod, &local->prod, save, conn);
+       smc_host_cursor_to_cdc(&peer->cons, &local->cons, save, conn);
        peer->prod_flags = local->prod_flags;
        peer->conn_state_flags = local->conn_state_flags;
 }
@@ -271,10 +296,16 @@ static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local,
                smcr_cdc_msg_to_host(local, peer, conn);
 }
 
-struct smc_cdc_tx_pend;
+struct smc_cdc_tx_pend {
+       struct smc_connection   *conn;          /* socket connection */
+       union smc_host_cursor   cursor;         /* tx sndbuf cursor sent */
+       union smc_host_cursor   p_cursor;       /* rx RMBE cursor produced */
+       u16                     ctrl_seq;       /* conn. tx sequence # */
+};
 
 int smc_cdc_get_free_slot(struct smc_connection *conn,
                          struct smc_wr_buf **wr_buf,
+                         struct smc_rdma_wr **wr_rdma_buf,
                          struct smc_cdc_tx_pend **pend);
 void smc_cdc_tx_dismiss_slots(struct smc_connection *conn);
 int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf,
index 776e9df..d53fd58 100644 (file)
@@ -378,7 +378,7 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info)
        vec.iov_len = sizeof(struct smc_clc_msg_decline);
        len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1,
                             sizeof(struct smc_clc_msg_decline));
-       if (len < sizeof(struct smc_clc_msg_decline))
+       if (len < 0 || len < sizeof(struct smc_clc_msg_decline))
                len = -EPROTO;
        return len > 0 ? 0 : len;
 }
index 0e60dd7..2ad37e9 100644 (file)
@@ -345,14 +345,7 @@ static void smc_close_passive_work(struct work_struct *work)
 
        switch (sk->sk_state) {
        case SMC_INIT:
-               if (atomic_read(&conn->bytes_to_rcv) ||
-                   (rxflags->peer_done_writing &&
-                    !smc_cdc_rxed_any_close(conn))) {
-                       sk->sk_state = SMC_APPCLOSEWAIT1;
-               } else {
-                       sk->sk_state = SMC_CLOSED;
-                       sock_put(sk); /* passive closing */
-               }
+               sk->sk_state = SMC_APPCLOSEWAIT1;
                break;
        case SMC_ACTIVE:
                sk->sk_state = SMC_APPCLOSEWAIT1;
index a1a6d35..53a17cf 100644 (file)
@@ -127,6 +127,8 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn)
 {
        struct smc_link_group *lgr = conn->lgr;
 
+       if (!lgr)
+               return;
        write_lock_bh(&lgr->conns_lock);
        if (conn->alert_token_local) {
                __smc_lgr_unregister_conn(conn);
@@ -158,8 +160,6 @@ static void smc_lgr_free_work(struct work_struct *work)
        bool conns;
 
        spin_lock_bh(&smc_lgr_list.lock);
-       if (list_empty(&lgr->list))
-               goto free;
        read_lock_bh(&lgr->conns_lock);
        conns = RB_EMPTY_ROOT(&lgr->conns_all);
        read_unlock_bh(&lgr->conns_lock);
@@ -167,8 +167,8 @@ static void smc_lgr_free_work(struct work_struct *work)
                spin_unlock_bh(&smc_lgr_list.lock);
                return;
        }
-       list_del_init(&lgr->list); /* remove from smc_lgr_list */
-free:
+       if (!list_empty(&lgr->list))
+               list_del_init(&lgr->list); /* remove from smc_lgr_list */
        spin_unlock_bh(&smc_lgr_list.lock);
 
        if (!lgr->is_smcd && !lgr->terminating) {
@@ -299,13 +299,13 @@ static void smc_buf_unuse(struct smc_connection *conn,
                conn->sndbuf_desc->used = 0;
        if (conn->rmb_desc) {
                if (!conn->rmb_desc->regerr) {
-                       conn->rmb_desc->used = 0;
                        if (!lgr->is_smcd) {
                                /* unregister rmb with peer */
                                smc_llc_do_delete_rkey(
                                                &lgr->lnk[SMC_SINGLE_LINK],
                                                conn->rmb_desc);
                        }
+                       conn->rmb_desc->used = 0;
                } else {
                        /* buf registration failed, reuse not possible */
                        write_lock_bh(&lgr->rmbs_lock);
@@ -629,6 +629,8 @@ int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact,
                        local_contact = SMC_REUSE_CONTACT;
                        conn->lgr = lgr;
                        smc_lgr_register_conn(conn); /* add smc conn to lgr */
+                       if (delayed_work_pending(&lgr->free_work))
+                               cancel_delayed_work(&lgr->free_work);
                        write_unlock_bh(&lgr->conns_lock);
                        break;
                }
index b002879..8806d2a 100644 (file)
@@ -52,6 +52,24 @@ enum smc_wr_reg_state {
        FAILED          /* ib_wr_reg_mr response: failure */
 };
 
+struct smc_rdma_sge {                          /* sges for RDMA writes */
+       struct ib_sge           wr_tx_rdma_sge[SMC_IB_MAX_SEND_SGE];
+};
+
+#define SMC_MAX_RDMA_WRITES    2               /* max. # of RDMA writes per
+                                                * message send
+                                                */
+
+struct smc_rdma_sges {                         /* sges per message send */
+       struct smc_rdma_sge     tx_rdma_sge[SMC_MAX_RDMA_WRITES];
+};
+
+struct smc_rdma_wr {                           /* work requests per message
+                                                * send
+                                                */
+       struct ib_rdma_wr       wr_tx_rdma[SMC_MAX_RDMA_WRITES];
+};
+
 struct smc_link {
        struct smc_ib_device    *smcibdev;      /* ib-device */
        u8                      ibport;         /* port - values 1 | 2 */
@@ -64,6 +82,8 @@ struct smc_link {
        struct smc_wr_buf       *wr_tx_bufs;    /* WR send payload buffers */
        struct ib_send_wr       *wr_tx_ibs;     /* WR send meta data */
        struct ib_sge           *wr_tx_sges;    /* WR send gather meta data */
+       struct smc_rdma_sges    *wr_tx_rdma_sges;/*RDMA WRITE gather meta data*/
+       struct smc_rdma_wr      *wr_tx_rdmas;   /* WR RDMA WRITE */
        struct smc_wr_tx_pend   *wr_tx_pends;   /* WR send waiting for CQE */
        /* above four vectors have wr_tx_cnt elements and use the same index */
        dma_addr_t              wr_tx_dma_addr; /* DMA address of wr_tx_bufs */
index e519ef2..0b244be 100644 (file)
@@ -257,12 +257,20 @@ static void smc_ib_global_event_handler(struct ib_event_handler *handler,
        smcibdev = container_of(handler, struct smc_ib_device, event_handler);
 
        switch (ibevent->event) {
-       case IB_EVENT_PORT_ERR:
        case IB_EVENT_DEVICE_FATAL:
+               /* terminate all ports on device */
+               for (port_idx = 0; port_idx < SMC_MAX_PORTS; port_idx++)
+                       set_bit(port_idx, &smcibdev->port_event_mask);
+               schedule_work(&smcibdev->port_event_work);
+               break;
+       case IB_EVENT_PORT_ERR:
        case IB_EVENT_PORT_ACTIVE:
+       case IB_EVENT_GID_CHANGE:
                port_idx = ibevent->element.port_num - 1;
-               set_bit(port_idx, &smcibdev->port_event_mask);
-               schedule_work(&smcibdev->port_event_work);
+               if (port_idx < SMC_MAX_PORTS) {
+                       set_bit(port_idx, &smcibdev->port_event_mask);
+                       schedule_work(&smcibdev->port_event_work);
+               }
                break;
        default:
                break;
@@ -289,18 +297,18 @@ int smc_ib_create_protection_domain(struct smc_link *lnk)
 
 static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv)
 {
-       struct smc_ib_device *smcibdev =
-               (struct smc_ib_device *)ibevent->device;
+       struct smc_link *lnk = (struct smc_link *)priv;
+       struct smc_ib_device *smcibdev = lnk->smcibdev;
        u8 port_idx;
 
        switch (ibevent->event) {
-       case IB_EVENT_DEVICE_FATAL:
-       case IB_EVENT_GID_CHANGE:
-       case IB_EVENT_PORT_ERR:
+       case IB_EVENT_QP_FATAL:
        case IB_EVENT_QP_ACCESS_ERR:
-               port_idx = ibevent->element.port_num - 1;
-               set_bit(port_idx, &smcibdev->port_event_mask);
-               schedule_work(&smcibdev->port_event_work);
+               port_idx = ibevent->element.qp->port - 1;
+               if (port_idx < SMC_MAX_PORTS) {
+                       set_bit(port_idx, &smcibdev->port_event_mask);
+                       schedule_work(&smcibdev->port_event_work);
+               }
                break;
        default:
                break;
index a6d3623..4fd60c5 100644 (file)
@@ -166,7 +166,8 @@ static int smc_llc_add_pending_send(struct smc_link *link,
 {
        int rc;
 
-       rc = smc_wr_tx_get_free_slot(link, smc_llc_tx_handler, wr_buf, pend);
+       rc = smc_wr_tx_get_free_slot(link, smc_llc_tx_handler, wr_buf, NULL,
+                                    pend);
        if (rc < 0)
                return rc;
        BUILD_BUG_ON_MSG(
index 7cb3e4f..632c310 100644 (file)
@@ -27,7 +27,7 @@
 static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = {
        [SMC_PNETID_NAME] = {
                .type = NLA_NUL_STRING,
-               .len = SMC_MAX_PNETID_LEN - 1
+               .len = SMC_MAX_PNETID_LEN
        },
        [SMC_PNETID_ETHNAME] = {
                .type = NLA_NUL_STRING,
index d8366ed..a3bff08 100644 (file)
 #include "smc.h"
 #include "smc_wr.h"
 #include "smc_cdc.h"
+#include "smc_close.h"
 #include "smc_ism.h"
 #include "smc_tx.h"
 
-#define SMC_TX_WORK_DELAY      HZ
+#define SMC_TX_WORK_DELAY      0
 #define SMC_TX_CORK_DELAY      (HZ >> 2)       /* 250 ms */
 
 /***************************** sndbuf producer *******************************/
@@ -165,12 +166,11 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
                        conn->local_tx_ctrl.prod_flags.urg_data_pending = 1;
 
                if (!atomic_read(&conn->sndbuf_space) || conn->urg_tx_pend) {
+                       if (send_done)
+                               return send_done;
                        rc = smc_tx_wait(smc, msg->msg_flags);
-                       if (rc) {
-                               if (send_done)
-                                       return send_done;
+                       if (rc)
                                goto out_err;
-                       }
                        continue;
                }
 
@@ -267,27 +267,23 @@ int smcd_tx_ism_write(struct smc_connection *conn, void *data, size_t len,
 
 /* sndbuf consumer: actual data transfer of one target chunk with RDMA write */
 static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset,
-                            int num_sges, struct ib_sge sges[])
+                            int num_sges, struct ib_rdma_wr *rdma_wr)
 {
        struct smc_link_group *lgr = conn->lgr;
-       struct ib_rdma_wr rdma_wr;
        struct smc_link *link;
        int rc;
 
-       memset(&rdma_wr, 0, sizeof(rdma_wr));
        link = &lgr->lnk[SMC_SINGLE_LINK];
-       rdma_wr.wr.wr_id = smc_wr_tx_get_next_wr_id(link);
-       rdma_wr.wr.sg_list = sges;
-       rdma_wr.wr.num_sge = num_sges;
-       rdma_wr.wr.opcode = IB_WR_RDMA_WRITE;
-       rdma_wr.remote_addr =
+       rdma_wr->wr.wr_id = smc_wr_tx_get_next_wr_id(link);
+       rdma_wr->wr.num_sge = num_sges;
+       rdma_wr->remote_addr =
                lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].dma_addr +
                /* RMBE within RMB */
                conn->tx_off +
                /* offset within RMBE */
                peer_rmbe_offset;
-       rdma_wr.rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey;
-       rc = ib_post_send(link->roce_qp, &rdma_wr.wr, NULL);
+       rdma_wr->rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey;
+       rc = ib_post_send(link->roce_qp, &rdma_wr->wr, NULL);
        if (rc) {
                conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
                smc_lgr_terminate(lgr);
@@ -314,24 +310,25 @@ static inline void smc_tx_advance_cursors(struct smc_connection *conn,
 /* SMC-R helper for smc_tx_rdma_writes() */
 static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len,
                               size_t src_off, size_t src_len,
-                              size_t dst_off, size_t dst_len)
+                              size_t dst_off, size_t dst_len,
+                              struct smc_rdma_wr *wr_rdma_buf)
 {
        dma_addr_t dma_addr =
                sg_dma_address(conn->sndbuf_desc->sgt[SMC_SINGLE_LINK].sgl);
-       struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
        int src_len_sum = src_len, dst_len_sum = dst_len;
-       struct ib_sge sges[SMC_IB_MAX_SEND_SGE];
        int sent_count = src_off;
        int srcchunk, dstchunk;
        int num_sges;
        int rc;
 
        for (dstchunk = 0; dstchunk < 2; dstchunk++) {
+               struct ib_sge *sge =
+                       wr_rdma_buf->wr_tx_rdma[dstchunk].wr.sg_list;
+
                num_sges = 0;
                for (srcchunk = 0; srcchunk < 2; srcchunk++) {
-                       sges[srcchunk].addr = dma_addr + src_off;
-                       sges[srcchunk].length = src_len;
-                       sges[srcchunk].lkey = link->roce_pd->local_dma_lkey;
+                       sge[srcchunk].addr = dma_addr + src_off;
+                       sge[srcchunk].length = src_len;
                        num_sges++;
 
                        src_off += src_len;
@@ -344,7 +341,8 @@ static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len,
                        src_len = dst_len - src_len; /* remainder */
                        src_len_sum += src_len;
                }
-               rc = smc_tx_rdma_write(conn, dst_off, num_sges, sges);
+               rc = smc_tx_rdma_write(conn, dst_off, num_sges,
+                                      &wr_rdma_buf->wr_tx_rdma[dstchunk]);
                if (rc)
                        return rc;
                if (dst_len_sum == len)
@@ -403,7 +401,8 @@ static int smcd_tx_rdma_writes(struct smc_connection *conn, size_t len,
 /* sndbuf consumer: prepare all necessary (src&dst) chunks of data transmit;
  * usable snd_wnd as max transmit
  */
-static int smc_tx_rdma_writes(struct smc_connection *conn)
+static int smc_tx_rdma_writes(struct smc_connection *conn,
+                             struct smc_rdma_wr *wr_rdma_buf)
 {
        size_t len, src_len, dst_off, dst_len; /* current chunk values */
        union smc_host_cursor sent, prep, prod, cons;
@@ -464,7 +463,7 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
                                         dst_off, dst_len);
        else
                rc = smcr_tx_rdma_writes(conn, len, sent.count, src_len,
-                                        dst_off, dst_len);
+                                        dst_off, dst_len, wr_rdma_buf);
        if (rc)
                return rc;
 
@@ -485,31 +484,30 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
 static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
 {
        struct smc_cdc_producer_flags *pflags;
+       struct smc_rdma_wr *wr_rdma_buf;
        struct smc_cdc_tx_pend *pend;
        struct smc_wr_buf *wr_buf;
        int rc;
 
-       spin_lock_bh(&conn->send_lock);
-       rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend);
+       rc = smc_cdc_get_free_slot(conn, &wr_buf, &wr_rdma_buf, &pend);
        if (rc < 0) {
                if (rc == -EBUSY) {
                        struct smc_sock *smc =
                                container_of(conn, struct smc_sock, conn);
 
-                       if (smc->sk.sk_err == ECONNABORTED) {
-                               rc = sock_error(&smc->sk);
-                               goto out_unlock;
-                       }
+                       if (smc->sk.sk_err == ECONNABORTED)
+                               return sock_error(&smc->sk);
                        rc = 0;
                        if (conn->alert_token_local) /* connection healthy */
                                mod_delayed_work(system_wq, &conn->tx_work,
                                                 SMC_TX_WORK_DELAY);
                }
-               goto out_unlock;
+               return rc;
        }
 
+       spin_lock_bh(&conn->send_lock);
        if (!conn->local_tx_ctrl.prod_flags.urg_data_present) {
-               rc = smc_tx_rdma_writes(conn);
+               rc = smc_tx_rdma_writes(conn, wr_rdma_buf);
                if (rc) {
                        smc_wr_tx_put_slot(&conn->lgr->lnk[SMC_SINGLE_LINK],
                                           (struct smc_wr_tx_pend_priv *)pend);
@@ -536,7 +534,7 @@ static int smcd_tx_sndbuf_nonempty(struct smc_connection *conn)
 
        spin_lock_bh(&conn->send_lock);
        if (!pflags->urg_data_present)
-               rc = smc_tx_rdma_writes(conn);
+               rc = smc_tx_rdma_writes(conn, NULL);
        if (!rc)
                rc = smcd_cdc_msg_send(conn);
 
@@ -557,6 +555,12 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
        else
                rc = smcr_tx_sndbuf_nonempty(conn);
 
+       if (!rc) {
+               /* trigger socket release if connection is closing */
+               struct smc_sock *smc = container_of(conn, struct smc_sock,
+                                                   conn);
+               smc_close_wake_tx_prepared(smc);
+       }
        return rc;
 }
 
@@ -598,7 +602,8 @@ void smc_tx_consumer_update(struct smc_connection *conn, bool force)
        if (to_confirm > conn->rmbe_update_limit) {
                smc_curs_copy(&prod, &conn->local_rx_ctrl.prod, conn);
                sender_free = conn->rmb_desc->len -
-                             smc_curs_diff(conn->rmb_desc->len, &prod, &cfed);
+                             smc_curs_diff_large(conn->rmb_desc->len,
+                                                 &cfed, &prod);
        }
 
        if (conn->local_rx_ctrl.prod_flags.cons_curs_upd_req ||
@@ -612,9 +617,6 @@ void smc_tx_consumer_update(struct smc_connection *conn, bool force)
                                              SMC_TX_WORK_DELAY);
                        return;
                }
-               smc_curs_copy(&conn->rx_curs_confirmed,
-                             &conn->local_tx_ctrl.cons, conn);
-               conn->local_rx_ctrl.prod_flags.cons_curs_upd_req = 0;
        }
        if (conn->local_rx_ctrl.prod_flags.write_blocked &&
            !atomic_read(&conn->bytes_to_rcv))
index c269475..253aa75 100644 (file)
@@ -160,6 +160,7 @@ static inline int smc_wr_tx_get_free_slot_index(struct smc_link *link, u32 *idx)
  * @link:              Pointer to smc_link used to later send the message.
  * @handler:           Send completion handler function pointer.
  * @wr_buf:            Out value returns pointer to message buffer.
+ * @wr_rdma_buf:       Out value returns pointer to rdma work request.
  * @wr_pend_priv:      Out value returns pointer serving as handler context.
  *
  * Return: 0 on success, or -errno on error.
@@ -167,6 +168,7 @@ static inline int smc_wr_tx_get_free_slot_index(struct smc_link *link, u32 *idx)
 int smc_wr_tx_get_free_slot(struct smc_link *link,
                            smc_wr_tx_handler handler,
                            struct smc_wr_buf **wr_buf,
+                           struct smc_rdma_wr **wr_rdma_buf,
                            struct smc_wr_tx_pend_priv **wr_pend_priv)
 {
        struct smc_wr_tx_pend *wr_pend;
@@ -204,6 +206,8 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
        wr_ib = &link->wr_tx_ibs[idx];
        wr_ib->wr_id = wr_id;
        *wr_buf = &link->wr_tx_bufs[idx];
+       if (wr_rdma_buf)
+               *wr_rdma_buf = &link->wr_tx_rdmas[idx];
        *wr_pend_priv = &wr_pend->priv;
        return 0;
 }
@@ -218,10 +222,10 @@ int smc_wr_tx_put_slot(struct smc_link *link,
                u32 idx = pend->idx;
 
                /* clear the full struct smc_wr_tx_pend including .priv */
-               memset(&link->wr_tx_pends[pend->idx], 0,
-                      sizeof(link->wr_tx_pends[pend->idx]));
-               memset(&link->wr_tx_bufs[pend->idx], 0,
-                      sizeof(link->wr_tx_bufs[pend->idx]));
+               memset(&link->wr_tx_pends[idx], 0,
+                      sizeof(link->wr_tx_pends[idx]));
+               memset(&link->wr_tx_bufs[idx], 0,
+                      sizeof(link->wr_tx_bufs[idx]));
                test_and_clear_bit(idx, link->wr_tx_mask);
                return 1;
        }
@@ -465,12 +469,26 @@ static void smc_wr_init_sge(struct smc_link *lnk)
                        lnk->wr_tx_dma_addr + i * SMC_WR_BUF_SIZE;
                lnk->wr_tx_sges[i].length = SMC_WR_TX_SIZE;
                lnk->wr_tx_sges[i].lkey = lnk->roce_pd->local_dma_lkey;
+               lnk->wr_tx_rdma_sges[i].tx_rdma_sge[0].wr_tx_rdma_sge[0].lkey =
+                       lnk->roce_pd->local_dma_lkey;
+               lnk->wr_tx_rdma_sges[i].tx_rdma_sge[0].wr_tx_rdma_sge[1].lkey =
+                       lnk->roce_pd->local_dma_lkey;
+               lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge[0].lkey =
+                       lnk->roce_pd->local_dma_lkey;
+               lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge[1].lkey =
+                       lnk->roce_pd->local_dma_lkey;
                lnk->wr_tx_ibs[i].next = NULL;
                lnk->wr_tx_ibs[i].sg_list = &lnk->wr_tx_sges[i];
                lnk->wr_tx_ibs[i].num_sge = 1;
                lnk->wr_tx_ibs[i].opcode = IB_WR_SEND;
                lnk->wr_tx_ibs[i].send_flags =
                        IB_SEND_SIGNALED | IB_SEND_SOLICITED;
+               lnk->wr_tx_rdmas[i].wr_tx_rdma[0].wr.opcode = IB_WR_RDMA_WRITE;
+               lnk->wr_tx_rdmas[i].wr_tx_rdma[1].wr.opcode = IB_WR_RDMA_WRITE;
+               lnk->wr_tx_rdmas[i].wr_tx_rdma[0].wr.sg_list =
+                       lnk->wr_tx_rdma_sges[i].tx_rdma_sge[0].wr_tx_rdma_sge;
+               lnk->wr_tx_rdmas[i].wr_tx_rdma[1].wr.sg_list =
+                       lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge;
        }
        for (i = 0; i < lnk->wr_rx_cnt; i++) {
                lnk->wr_rx_sges[i].addr =
@@ -521,8 +539,12 @@ void smc_wr_free_link_mem(struct smc_link *lnk)
        lnk->wr_tx_mask = NULL;
        kfree(lnk->wr_tx_sges);
        lnk->wr_tx_sges = NULL;
+       kfree(lnk->wr_tx_rdma_sges);
+       lnk->wr_tx_rdma_sges = NULL;
        kfree(lnk->wr_rx_sges);
        lnk->wr_rx_sges = NULL;
+       kfree(lnk->wr_tx_rdmas);
+       lnk->wr_tx_rdmas = NULL;
        kfree(lnk->wr_rx_ibs);
        lnk->wr_rx_ibs = NULL;
        kfree(lnk->wr_tx_ibs);
@@ -552,10 +574,20 @@ int smc_wr_alloc_link_mem(struct smc_link *link)
                                  GFP_KERNEL);
        if (!link->wr_rx_ibs)
                goto no_mem_wr_tx_ibs;
+       link->wr_tx_rdmas = kcalloc(SMC_WR_BUF_CNT,
+                                   sizeof(link->wr_tx_rdmas[0]),
+                                   GFP_KERNEL);
+       if (!link->wr_tx_rdmas)
+               goto no_mem_wr_rx_ibs;
+       link->wr_tx_rdma_sges = kcalloc(SMC_WR_BUF_CNT,
+                                       sizeof(link->wr_tx_rdma_sges[0]),
+                                       GFP_KERNEL);
+       if (!link->wr_tx_rdma_sges)
+               goto no_mem_wr_tx_rdmas;
        link->wr_tx_sges = kcalloc(SMC_WR_BUF_CNT, sizeof(link->wr_tx_sges[0]),
                                   GFP_KERNEL);
        if (!link->wr_tx_sges)
-               goto no_mem_wr_rx_ibs;
+               goto no_mem_wr_tx_rdma_sges;
        link->wr_rx_sges = kcalloc(SMC_WR_BUF_CNT * 3,
                                   sizeof(link->wr_rx_sges[0]),
                                   GFP_KERNEL);
@@ -579,6 +611,10 @@ no_mem_wr_rx_sges:
        kfree(link->wr_rx_sges);
 no_mem_wr_tx_sges:
        kfree(link->wr_tx_sges);
+no_mem_wr_tx_rdma_sges:
+       kfree(link->wr_tx_rdma_sges);
+no_mem_wr_tx_rdmas:
+       kfree(link->wr_tx_rdmas);
 no_mem_wr_rx_ibs:
        kfree(link->wr_rx_ibs);
 no_mem_wr_tx_ibs:
index 1d85bb1..09bf32f 100644 (file)
@@ -85,6 +85,7 @@ void smc_wr_add_dev(struct smc_ib_device *smcibdev);
 
 int smc_wr_tx_get_free_slot(struct smc_link *link, smc_wr_tx_handler handler,
                            struct smc_wr_buf **wr_buf,
+                           struct smc_rdma_wr **wrs,
                            struct smc_wr_tx_pend_priv **wr_pend_priv);
 int smc_wr_tx_put_slot(struct smc_link *link,
                       struct smc_wr_tx_pend_priv *wr_pend_priv);
index d519306..643a164 100644 (file)
@@ -963,8 +963,7 @@ void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
 EXPORT_SYMBOL(dlci_ioctl_set);
 
 static long sock_do_ioctl(struct net *net, struct socket *sock,
-                         unsigned int cmd, unsigned long arg,
-                         unsigned int ifreq_size)
+                         unsigned int cmd, unsigned long arg)
 {
        int err;
        void __user *argp = (void __user *)arg;
@@ -990,11 +989,11 @@ static long sock_do_ioctl(struct net *net, struct socket *sock,
        } else {
                struct ifreq ifr;
                bool need_copyout;
-               if (copy_from_user(&ifr, argp, ifreq_size))
+               if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
                        return -EFAULT;
                err = dev_ioctl(net, cmd, &ifr, &need_copyout);
                if (!err && need_copyout)
-                       if (copy_to_user(argp, &ifr, ifreq_size))
+                       if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
                                return -EFAULT;
        }
        return err;
@@ -1093,8 +1092,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
                        err = open_related_ns(&net->ns, get_net_ns);
                        break;
                default:
-                       err = sock_do_ioctl(net, sock, cmd, arg,
-                                           sizeof(struct ifreq));
+                       err = sock_do_ioctl(net, sock, cmd, arg);
                        break;
                }
        return err;
@@ -2802,8 +2800,7 @@ static int do_siocgstamp(struct net *net, struct socket *sock,
        int err;
 
        set_fs(KERNEL_DS);
-       err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv,
-                           sizeof(struct compat_ifreq));
+       err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
        set_fs(old_fs);
        if (!err)
                err = compat_put_timeval(&ktv, up);
@@ -2819,8 +2816,7 @@ static int do_siocgstampns(struct net *net, struct socket *sock,
        int err;
 
        set_fs(KERNEL_DS);
-       err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts,
-                           sizeof(struct compat_ifreq));
+       err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
        set_fs(old_fs);
        if (!err)
                err = compat_put_timespec(&kts, up);
@@ -3016,6 +3012,54 @@ static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
        return dev_ioctl(net, cmd, &ifreq, NULL);
 }
 
+static int compat_ifreq_ioctl(struct net *net, struct socket *sock,
+                             unsigned int cmd,
+                             struct compat_ifreq __user *uifr32)
+{
+       struct ifreq __user *uifr;
+       int err;
+
+       /* Handle the fact that while struct ifreq has the same *layout* on
+        * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
+        * which are handled elsewhere, it still has different *size* due to
+        * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
+        * resulting in struct ifreq being 32 and 40 bytes respectively).
+        * As a result, if the struct happens to be at the end of a page and
+        * the next page isn't readable/writable, we get a fault. To prevent
+        * that, copy back and forth to the full size.
+        */
+
+       uifr = compat_alloc_user_space(sizeof(*uifr));
+       if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
+               return -EFAULT;
+
+       err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
+
+       if (!err) {
+               switch (cmd) {
+               case SIOCGIFFLAGS:
+               case SIOCGIFMETRIC:
+               case SIOCGIFMTU:
+               case SIOCGIFMEM:
+               case SIOCGIFHWADDR:
+               case SIOCGIFINDEX:
+               case SIOCGIFADDR:
+               case SIOCGIFBRDADDR:
+               case SIOCGIFDSTADDR:
+               case SIOCGIFNETMASK:
+               case SIOCGIFPFLAGS:
+               case SIOCGIFTXQLEN:
+               case SIOCGMIIPHY:
+               case SIOCGMIIREG:
+               case SIOCGIFNAME:
+                       if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
+                               err = -EFAULT;
+                       break;
+               }
+       }
+       return err;
+}
+
 static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
                        struct compat_ifreq __user *uifr32)
 {
@@ -3131,8 +3175,7 @@ static int routing_ioctl(struct net *net, struct socket *sock,
        }
 
        set_fs(KERNEL_DS);
-       ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r,
-                           sizeof(struct compat_ifreq));
+       ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
        set_fs(old_fs);
 
 out:
@@ -3232,21 +3275,22 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
        case SIOCSIFTXQLEN:
        case SIOCBRADDIF:
        case SIOCBRDELIF:
+       case SIOCGIFNAME:
        case SIOCSIFNAME:
        case SIOCGMIIPHY:
        case SIOCGMIIREG:
        case SIOCSMIIREG:
-       case SIOCSARP:
-       case SIOCGARP:
-       case SIOCDARP:
-       case SIOCATMARK:
        case SIOCBONDENSLAVE:
        case SIOCBONDRELEASE:
        case SIOCBONDSETHWADDR:
        case SIOCBONDCHANGEACTIVE:
-       case SIOCGIFNAME:
-               return sock_do_ioctl(net, sock, cmd, arg,
-                                    sizeof(struct compat_ifreq));
+               return compat_ifreq_ioctl(net, sock, cmd, argp);
+
+       case SIOCSARP:
+       case SIOCGARP:
+       case SIOCDARP:
+       case SIOCATMARK:
+               return sock_do_ioctl(net, sock, cmd, arg);
        }
 
        return -ENOIOCTLCMD;
index cf51b8f..1f20011 100644 (file)
@@ -537,6 +537,99 @@ void svc_rdma_sync_reply_hdr(struct svcxprt_rdma *rdma,
                                      DMA_TO_DEVICE);
 }
 
+/* If the xdr_buf has more elements than the device can
+ * transmit in a single RDMA Send, then the reply will
+ * have to be copied into a bounce buffer.
+ */
+static bool svc_rdma_pull_up_needed(struct svcxprt_rdma *rdma,
+                                   struct xdr_buf *xdr,
+                                   __be32 *wr_lst)
+{
+       int elements;
+
+       /* xdr->head */
+       elements = 1;
+
+       /* xdr->pages */
+       if (!wr_lst) {
+               unsigned int remaining;
+               unsigned long pageoff;
+
+               pageoff = xdr->page_base & ~PAGE_MASK;
+               remaining = xdr->page_len;
+               while (remaining) {
+                       ++elements;
+                       remaining -= min_t(u32, PAGE_SIZE - pageoff,
+                                          remaining);
+                       pageoff = 0;
+               }
+       }
+
+       /* xdr->tail */
+       if (xdr->tail[0].iov_len)
+               ++elements;
+
+       /* assume 1 SGE is needed for the transport header */
+       return elements >= rdma->sc_max_send_sges;
+}
+
+/* The device is not capable of sending the reply directly.
+ * Assemble the elements of @xdr into the transport header
+ * buffer.
+ */
+static int svc_rdma_pull_up_reply_msg(struct svcxprt_rdma *rdma,
+                                     struct svc_rdma_send_ctxt *ctxt,
+                                     struct xdr_buf *xdr, __be32 *wr_lst)
+{
+       unsigned char *dst, *tailbase;
+       unsigned int taillen;
+
+       dst = ctxt->sc_xprt_buf;
+       dst += ctxt->sc_sges[0].length;
+
+       memcpy(dst, xdr->head[0].iov_base, xdr->head[0].iov_len);
+       dst += xdr->head[0].iov_len;
+
+       tailbase = xdr->tail[0].iov_base;
+       taillen = xdr->tail[0].iov_len;
+       if (wr_lst) {
+               u32 xdrpad;
+
+               xdrpad = xdr_padsize(xdr->page_len);
+               if (taillen && xdrpad) {
+                       tailbase += xdrpad;
+                       taillen -= xdrpad;
+               }
+       } else {
+               unsigned int len, remaining;
+               unsigned long pageoff;
+               struct page **ppages;
+
+               ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT);
+               pageoff = xdr->page_base & ~PAGE_MASK;
+               remaining = xdr->page_len;
+               while (remaining) {
+                       len = min_t(u32, PAGE_SIZE - pageoff, remaining);
+
+                       memcpy(dst, page_address(*ppages), len);
+                       remaining -= len;
+                       dst += len;
+                       pageoff = 0;
+               }
+       }
+
+       if (taillen)
+               memcpy(dst, tailbase, taillen);
+
+       ctxt->sc_sges[0].length += xdr->len;
+       ib_dma_sync_single_for_device(rdma->sc_pd->device,
+                                     ctxt->sc_sges[0].addr,
+                                     ctxt->sc_sges[0].length,
+                                     DMA_TO_DEVICE);
+
+       return 0;
+}
+
 /* svc_rdma_map_reply_msg - Map the buffer holding RPC message
  * @rdma: controlling transport
  * @ctxt: send_ctxt for the Send WR
@@ -559,8 +652,10 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
        u32 xdr_pad;
        int ret;
 
-       if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges)
-               return -EIO;
+       if (svc_rdma_pull_up_needed(rdma, xdr, wr_lst))
+               return svc_rdma_pull_up_reply_msg(rdma, ctxt, xdr, wr_lst);
+
+       ++ctxt->sc_cur_sge_no;
        ret = svc_rdma_dma_map_buf(rdma, ctxt,
                                   xdr->head[0].iov_base,
                                   xdr->head[0].iov_len);
@@ -591,8 +686,7 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
        while (remaining) {
                len = min_t(u32, PAGE_SIZE - page_off, remaining);
 
-               if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges)
-                       return -EIO;
+               ++ctxt->sc_cur_sge_no;
                ret = svc_rdma_dma_map_page(rdma, ctxt, *ppages++,
                                            page_off, len);
                if (ret < 0)
@@ -606,8 +700,7 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
        len = xdr->tail[0].iov_len;
 tail:
        if (len) {
-               if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges)
-                       return -EIO;
+               ++ctxt->sc_cur_sge_no;
                ret = svc_rdma_dma_map_buf(rdma, ctxt, base, len);
                if (ret < 0)
                        return ret;
index 924c17d..57f86c6 100644 (file)
@@ -419,12 +419,9 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
        /* Transport header, head iovec, tail iovec */
        newxprt->sc_max_send_sges = 3;
        /* Add one SGE per page list entry */
-       newxprt->sc_max_send_sges += svcrdma_max_req_size / PAGE_SIZE;
-       if (newxprt->sc_max_send_sges > dev->attrs.max_send_sge) {
-               pr_err("svcrdma: too few Send SGEs available (%d needed)\n",
-                      newxprt->sc_max_send_sges);
-               goto errout;
-       }
+       newxprt->sc_max_send_sges += (svcrdma_max_req_size / PAGE_SIZE) + 1;
+       if (newxprt->sc_max_send_sges > dev->attrs.max_send_sge)
+               newxprt->sc_max_send_sges = dev->attrs.max_send_sge;
        newxprt->sc_max_req_size = svcrdma_max_req_size;
        newxprt->sc_max_requests = svcrdma_max_requests;
        newxprt->sc_max_bc_requests = svcrdma_max_bc_requests;
index ac306d1..341ecd7 100644 (file)
@@ -1145,7 +1145,7 @@ static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb,
        default:
                pr_warn("Dropping received illegal msg type\n");
                kfree_skb(skb);
-               return false;
+               return true;
        };
 }
 
@@ -1425,6 +1425,10 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
                l->rcv_unacked = 0;
        } else {
                /* RESET_MSG or ACTIVATE_MSG */
+               if (mtyp == ACTIVATE_MSG) {
+                       msg_set_dest_session_valid(hdr, 1);
+                       msg_set_dest_session(hdr, l->peer_session);
+               }
                msg_set_max_pkt(hdr, l->advertised_mtu);
                strcpy(data, l->if_name);
                msg_set_size(hdr, INT_H_SIZE + TIPC_MAX_IF_NAME);
@@ -1642,6 +1646,17 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
                        rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
                        break;
                }
+
+               /* If this endpoint was re-created while peer was ESTABLISHING
+                * it doesn't know current session number. Force re-synch.
+                */
+               if (mtyp == ACTIVATE_MSG && msg_dest_session_valid(hdr) &&
+                   l->session != msg_dest_session(hdr)) {
+                       if (less(l->session, msg_dest_session(hdr)))
+                               l->session = msg_dest_session(hdr) + 1;
+                       break;
+               }
+
                /* ACTIVATE_MSG serves as PEER_RESET if link is already down */
                if (mtyp == RESET_MSG || !link_is_up(l))
                        rc = tipc_link_fsm_evt(l, LINK_PEER_RESET_EVT);
index a092495..d7e4b8b 100644 (file)
@@ -360,6 +360,28 @@ static inline void msg_set_bcast_ack(struct tipc_msg *m, u16 n)
        msg_set_bits(m, 1, 0, 0xffff, n);
 }
 
+/* Note: reusing bits in word 1 for ACTIVATE_MSG only, to re-synch
+ * link peer session number
+ */
+static inline bool msg_dest_session_valid(struct tipc_msg *m)
+{
+       return msg_bits(m, 1, 16, 0x1);
+}
+
+static inline void msg_set_dest_session_valid(struct tipc_msg *m, bool valid)
+{
+       msg_set_bits(m, 1, 16, 0x1, valid);
+}
+
+static inline u16 msg_dest_session(struct tipc_msg *m)
+{
+       return msg_bits(m, 1, 0, 0xffff);
+}
+
+static inline void msg_set_dest_session(struct tipc_msg *m, u16 n)
+{
+       msg_set_bits(m, 1, 0, 0xffff, n);
+}
 
 /*
  * Word 2
index db2a6c3..2dc4919 100644 (file)
@@ -830,15 +830,16 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete)
        tipc_node_write_lock(n);
        if (!tipc_link_is_establishing(l)) {
                __tipc_node_link_down(n, &bearer_id, &xmitq, &maddr);
-               if (delete) {
-                       kfree(l);
-                       le->link = NULL;
-                       n->link_cnt--;
-               }
        } else {
                /* Defuse pending tipc_node_link_up() */
+               tipc_link_reset(l);
                tipc_link_fsm_evt(l, LINK_RESET_EVT);
        }
+       if (delete) {
+               kfree(l);
+               le->link = NULL;
+               n->link_cnt--;
+       }
        trace_tipc_node_link_down(n, true, "node link down or deleted!");
        tipc_node_write_unlock(n);
        if (delete)
index 8051a91..ae47847 100644 (file)
@@ -1645,10 +1645,10 @@ int tls_sw_recvmsg(struct sock *sk,
 
        do {
                bool retain_skb = false;
-               bool async = false;
                bool zc = false;
                int to_decrypt;
                int chunk = 0;
+               bool async;
 
                skb = tls_wait_data(sk, psock, flags, timeo, &err);
                if (!skb) {
@@ -1674,18 +1674,21 @@ int tls_sw_recvmsg(struct sock *sk,
                    tls_ctx->crypto_recv.info.version != TLS_1_3_VERSION)
                        zc = true;
 
+               /* Do not use async mode if record is non-data */
+               if (ctx->control == TLS_RECORD_TYPE_DATA)
+                       async = ctx->async_capable;
+               else
+                       async = false;
+
                err = decrypt_skb_update(sk, skb, &msg->msg_iter,
-                                        &chunk, &zc, ctx->async_capable);
+                                        &chunk, &zc, async);
                if (err < 0 && err != -EINPROGRESS) {
                        tls_err_abort(sk, EBADMSG);
                        goto recv_end;
                }
 
-               if (err == -EINPROGRESS) {
-                       async = true;
+               if (err == -EINPROGRESS)
                        num_async++;
-                       goto pick_next_record;
-               }
 
                if (!cmsg) {
                        int cerr;
@@ -1704,6 +1707,9 @@ int tls_sw_recvmsg(struct sock *sk,
                        goto recv_end;
                }
 
+               if (async)
+                       goto pick_next_record;
+
                if (!zc) {
                        if (rxm->full_len > len) {
                                retain_skb = true;
@@ -2215,8 +2221,12 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 
        if (sw_ctx_rx) {
                tfm = crypto_aead_tfm(sw_ctx_rx->aead_recv);
-               sw_ctx_rx->async_capable =
-                       tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC;
+
+               if (crypto_info->version == TLS_1_3_VERSION)
+                       sw_ctx_rx->async_capable = false;
+               else
+                       sw_ctx_rx->async_capable =
+                               tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC;
 
                /* Set up strparser */
                memset(&cb, 0, sizeof(cb));
index 5d3cce9..15eb5d3 100644 (file)
@@ -75,6 +75,9 @@ static u32 virtio_transport_get_local_cid(void)
 {
        struct virtio_vsock *vsock = virtio_vsock_get();
 
+       if (!vsock)
+               return VMADDR_CID_ANY;
+
        return vsock->guest_cid;
 }
 
@@ -584,10 +587,6 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
 
        virtio_vsock_update_guest_cid(vsock);
 
-       ret = vsock_core_init(&virtio_transport.transport);
-       if (ret < 0)
-               goto out_vqs;
-
        vsock->rx_buf_nr = 0;
        vsock->rx_buf_max_nr = 0;
        atomic_set(&vsock->queued_replies, 0);
@@ -618,8 +617,6 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
        mutex_unlock(&the_virtio_vsock_mutex);
        return 0;
 
-out_vqs:
-       vsock->vdev->config->del_vqs(vsock->vdev);
 out:
        kfree(vsock);
        mutex_unlock(&the_virtio_vsock_mutex);
@@ -637,6 +634,9 @@ static void virtio_vsock_remove(struct virtio_device *vdev)
        flush_work(&vsock->event_work);
        flush_work(&vsock->send_pkt_work);
 
+       /* Reset all connected sockets when the device disappear */
+       vsock_for_each_connected_socket(virtio_vsock_reset_sock);
+
        vdev->config->reset(vdev);
 
        mutex_lock(&vsock->rx_lock);
@@ -669,7 +669,6 @@ static void virtio_vsock_remove(struct virtio_device *vdev)
 
        mutex_lock(&the_virtio_vsock_mutex);
        the_virtio_vsock = NULL;
-       vsock_core_exit();
        mutex_unlock(&the_virtio_vsock_mutex);
 
        vdev->config->del_vqs(vdev);
@@ -702,14 +701,28 @@ static int __init virtio_vsock_init(void)
        virtio_vsock_workqueue = alloc_workqueue("virtio_vsock", 0, 0);
        if (!virtio_vsock_workqueue)
                return -ENOMEM;
+
        ret = register_virtio_driver(&virtio_vsock_driver);
        if (ret)
-               destroy_workqueue(virtio_vsock_workqueue);
+               goto out_wq;
+
+       ret = vsock_core_init(&virtio_transport.transport);
+       if (ret)
+               goto out_vdr;
+
+       return 0;
+
+out_vdr:
+       unregister_virtio_driver(&virtio_vsock_driver);
+out_wq:
+       destroy_workqueue(virtio_vsock_workqueue);
        return ret;
+
 }
 
 static void __exit virtio_vsock_exit(void)
 {
+       vsock_core_exit();
        unregister_virtio_driver(&virtio_vsock_driver);
        destroy_workqueue(virtio_vsock_workqueue);
 }
index c361ce7..c3d5ab0 100644 (file)
@@ -1651,6 +1651,10 @@ static void vmci_transport_cleanup(struct work_struct *work)
 
 static void vmci_transport_destruct(struct vsock_sock *vsk)
 {
+       /* transport can be NULL if we hit a failure at init() time */
+       if (!vmci_trans(vsk))
+               return;
+
        /* Ensure that the detach callback doesn't use the sk/vsk
         * we are about to destruct.
         */
index 882d97b..550ac9d 100644 (file)
@@ -41,6 +41,8 @@ int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev,
                cfg80211_sched_dfs_chan_update(rdev);
        }
 
+       schedule_work(&cfg80211_disconnect_work);
+
        return err;
 }
 
index 623dfe5..b36ad8e 100644 (file)
@@ -1068,6 +1068,8 @@ static void __cfg80211_unregister_wdev(struct wireless_dev *wdev, bool sync)
 
        ASSERT_RTNL();
 
+       flush_work(&wdev->pmsr_free_wk);
+
        nl80211_notify_iface(rdev, wdev, NL80211_CMD_DEL_INTERFACE);
 
        list_del_rcu(&wdev->list);
index c5d6f34..f6b4056 100644 (file)
@@ -445,6 +445,8 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev);
 bool cfg80211_does_bw_fit_range(const struct ieee80211_freq_range *freq_range,
                                u32 center_freq_khz, u32 bw_khz);
 
+extern struct work_struct cfg80211_disconnect_work;
+
 /**
  * cfg80211_chandef_dfs_usable - checks if chandef is DFS usable
  * @wiphy: the wiphy to validate against
index a3cc039..e36437a 100644 (file)
@@ -250,7 +250,7 @@ nl80211_pmsr_ftm_req_attr_policy[NL80211_PMSR_FTM_REQ_ATTR_MAX + 1] = {
        [NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION] =
                NLA_POLICY_MAX(NLA_U8, 15),
        [NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST] =
-               NLA_POLICY_MAX(NLA_U8, 15),
+               NLA_POLICY_MAX(NLA_U8, 31),
        [NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES] = { .type = NLA_U8 },
        [NL80211_PMSR_FTM_REQ_ATTR_REQUEST_LCI] = { .type = NLA_FLAG },
        [NL80211_PMSR_FTM_REQ_ATTR_REQUEST_CIVICLOC] = { .type = NLA_FLAG },
index de92867..0216ab5 100644 (file)
@@ -256,8 +256,7 @@ int nl80211_pmsr_start(struct sk_buff *skb, struct genl_info *info)
                if (err)
                        goto out_err;
        } else {
-               memcpy(req->mac_addr, nla_data(info->attrs[NL80211_ATTR_MAC]),
-                      ETH_ALEN);
+               memcpy(req->mac_addr, wdev_address(wdev), ETH_ALEN);
                memset(req->mac_addr_mask, 0xff, ETH_ALEN);
        }
 
@@ -272,6 +271,7 @@ int nl80211_pmsr_start(struct sk_buff *skb, struct genl_info *info)
 
        req->n_peers = count;
        req->cookie = cfg80211_assign_cookie(rdev);
+       req->nl_portid = info->snd_portid;
 
        err = rdev_start_pmsr(rdev, wdev, req);
        if (err)
@@ -530,14 +530,14 @@ free:
 }
 EXPORT_SYMBOL_GPL(cfg80211_pmsr_report);
 
-void cfg80211_pmsr_free_wk(struct work_struct *work)
+static void cfg80211_pmsr_process_abort(struct wireless_dev *wdev)
 {
-       struct wireless_dev *wdev = container_of(work, struct wireless_dev,
-                                                pmsr_free_wk);
        struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
        struct cfg80211_pmsr_request *req, *tmp;
        LIST_HEAD(free_list);
 
+       lockdep_assert_held(&wdev->mtx);
+
        spin_lock_bh(&wdev->pmsr_lock);
        list_for_each_entry_safe(req, tmp, &wdev->pmsr_list, list) {
                if (req->nl_portid)
@@ -547,14 +547,22 @@ void cfg80211_pmsr_free_wk(struct work_struct *work)
        spin_unlock_bh(&wdev->pmsr_lock);
 
        list_for_each_entry_safe(req, tmp, &free_list, list) {
-               wdev_lock(wdev);
                rdev_abort_pmsr(rdev, wdev, req);
-               wdev_unlock(wdev);
 
                kfree(req);
        }
 }
 
+void cfg80211_pmsr_free_wk(struct work_struct *work)
+{
+       struct wireless_dev *wdev = container_of(work, struct wireless_dev,
+                                                pmsr_free_wk);
+
+       wdev_lock(wdev);
+       cfg80211_pmsr_process_abort(wdev);
+       wdev_unlock(wdev);
+}
+
 void cfg80211_pmsr_wdev_down(struct wireless_dev *wdev)
 {
        struct cfg80211_pmsr_request *req;
@@ -568,8 +576,8 @@ void cfg80211_pmsr_wdev_down(struct wireless_dev *wdev)
        spin_unlock_bh(&wdev->pmsr_lock);
 
        if (found)
-               schedule_work(&wdev->pmsr_free_wk);
-       flush_work(&wdev->pmsr_free_wk);
+               cfg80211_pmsr_process_abort(wdev);
+
        WARN_ON(!list_empty(&wdev->pmsr_list));
 }
 
index f741d83..7d34cb8 100644 (file)
@@ -667,7 +667,7 @@ static void disconnect_work(struct work_struct *work)
        rtnl_unlock();
 }
 
-static DECLARE_WORK(cfg80211_disconnect_work, disconnect_work);
+DECLARE_WORK(cfg80211_disconnect_work, disconnect_work);
 
 
 /*
index cd48cdd..ec30e37 100644 (file)
@@ -5,7 +5,7 @@
  * Copyright 2007-2009 Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright 2017      Intel Deutschland GmbH
- * Copyright (C) 2018 Intel Corporation
+ * Copyright (C) 2018-2019 Intel Corporation
  */
 #include <linux/export.h>
 #include <linux/bitops.h>
@@ -19,6 +19,7 @@
 #include <linux/mpls.h>
 #include <linux/gcd.h>
 #include <linux/bitfield.h>
+#include <linux/nospec.h>
 #include "core.h"
 #include "rdev-ops.h"
 
@@ -715,20 +716,25 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb,
 {
        unsigned int dscp;
        unsigned char vlan_priority;
+       unsigned int ret;
 
        /* skb->priority values from 256->263 are magic values to
         * directly indicate a specific 802.1d priority.  This is used
         * to allow 802.1d priority to be passed directly in from VLAN
         * tags, etc.
         */
-       if (skb->priority >= 256 && skb->priority <= 263)
-               return skb->priority - 256;
+       if (skb->priority >= 256 && skb->priority <= 263) {
+               ret = skb->priority - 256;
+               goto out;
+       }
 
        if (skb_vlan_tag_present(skb)) {
                vlan_priority = (skb_vlan_tag_get(skb) & VLAN_PRIO_MASK)
                        >> VLAN_PRIO_SHIFT;
-               if (vlan_priority > 0)
-                       return vlan_priority;
+               if (vlan_priority > 0) {
+                       ret = vlan_priority;
+                       goto out;
+               }
        }
 
        switch (skb->protocol) {
@@ -747,8 +753,9 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb,
                if (!mpls)
                        return 0;
 
-               return (ntohl(mpls->entry) & MPLS_LS_TC_MASK)
+               ret = (ntohl(mpls->entry) & MPLS_LS_TC_MASK)
                        >> MPLS_LS_TC_SHIFT;
+               goto out;
        }
        case htons(ETH_P_80221):
                /* 802.21 is always network control traffic */
@@ -761,18 +768,24 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb,
                unsigned int i, tmp_dscp = dscp >> 2;
 
                for (i = 0; i < qos_map->num_des; i++) {
-                       if (tmp_dscp == qos_map->dscp_exception[i].dscp)
-                               return qos_map->dscp_exception[i].up;
+                       if (tmp_dscp == qos_map->dscp_exception[i].dscp) {
+                               ret = qos_map->dscp_exception[i].up;
+                               goto out;
+                       }
                }
 
                for (i = 0; i < 8; i++) {
                        if (tmp_dscp >= qos_map->up[i].low &&
-                           tmp_dscp <= qos_map->up[i].high)
-                               return i;
+                           tmp_dscp <= qos_map->up[i].high) {
+                               ret = i;
+                               goto out;
+                       }
                }
        }
 
-       return dscp >> 5;
+       ret = dscp >> 5;
+out:
+       return array_index_nospec(ret, IEEE80211_NUM_TIDS);
 }
 EXPORT_SYMBOL(cfg80211_classify8021d);
 
index 5121729..ec3a828 100644 (file)
@@ -352,17 +352,15 @@ static unsigned int x25_new_lci(struct x25_neigh *nb)
        unsigned int lci = 1;
        struct sock *sk;
 
-       read_lock_bh(&x25_list_lock);
-
-       while ((sk = __x25_find_socket(lci, nb)) != NULL) {
+       while ((sk = x25_find_socket(lci, nb)) != NULL) {
                sock_put(sk);
                if (++lci == 4096) {
                        lci = 0;
                        break;
                }
+               cond_resched();
        }
 
-       read_unlock_bh(&x25_list_lock);
        return lci;
 }
 
index 33e67bd..3223448 100644 (file)
@@ -117,7 +117,7 @@ static bool mei_init(struct mei *me, const uuid_le *guid,
 
        me->verbose = verbose;
 
-       me->fd = open("/dev/mei", O_RDWR);
+       me->fd = open("/dev/mei0", O_RDWR);
        if (me->fd == -1) {
                mei_err(me, "Cannot establish a handle to the Intel MEI driver\n");
                goto err;
index 08c88de..11975ec 100644 (file)
@@ -1444,7 +1444,10 @@ check:
                        new = aa_label_merge(label, target, GFP_KERNEL);
                if (IS_ERR_OR_NULL(new)) {
                        info = "failed to build target label";
-                       error = PTR_ERR(new);
+                       if (!new)
+                               error = -ENOMEM;
+                       else
+                               error = PTR_ERR(new);
                        new = NULL;
                        perms.allow = 0;
                        goto audit;
index 2c01087..8db1731 100644 (file)
@@ -1599,12 +1599,14 @@ static unsigned int apparmor_ipv4_postroute(void *priv,
        return apparmor_ip_postroute(priv, skb, state);
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
 static unsigned int apparmor_ipv6_postroute(void *priv,
                                            struct sk_buff *skb,
                                            const struct nf_hook_state *state)
 {
        return apparmor_ip_postroute(priv, skb, state);
 }
+#endif
 
 static const struct nf_hook_ops apparmor_nf_ops[] = {
        {
index 40013b2..6c0b303 100644 (file)
@@ -2177,16 +2177,11 @@ snd_pcm_sframes_t __snd_pcm_lib_xfer(struct snd_pcm_substream *substream,
                snd_pcm_update_hw_ptr(substream);
 
        if (!is_playback &&
-           runtime->status->state == SNDRV_PCM_STATE_PREPARED) {
-               if (size >= runtime->start_threshold) {
-                       err = snd_pcm_start(substream);
-                       if (err < 0)
-                               goto _end_unlock;
-               } else {
-                       /* nothing to do */
-                       err = 0;
+           runtime->status->state == SNDRV_PCM_STATE_PREPARED &&
+           size >= runtime->start_threshold) {
+               err = snd_pcm_start(substream);
+               if (err < 0)
                        goto _end_unlock;
-               }
        }
 
        avail = snd_pcm_avail(substream);
index 9174f1b..1ec706c 100644 (file)
@@ -115,7 +115,8 @@ static int hda_codec_driver_probe(struct device *dev)
        err = snd_hda_codec_build_controls(codec);
        if (err < 0)
                goto error_module;
-       if (codec->card->registered) {
+       /* only register after the bus probe finished; otherwise it's racy */
+       if (!codec->bus->bus_probing && codec->card->registered) {
                err = snd_card_register(codec->card);
                if (err < 0)
                        goto error_module;
index e784130..e5c4900 100644 (file)
@@ -2185,6 +2185,7 @@ static int azx_probe_continue(struct azx *chip)
        int dev = chip->dev_index;
        int err;
 
+       to_hda_bus(bus)->bus_probing = 1;
        hda->probe_continued = 1;
 
        /* bind with i915 if needed */
@@ -2269,6 +2270,7 @@ out_free:
        if (err < 0)
                hda->init_failed = 1;
        complete_all(&hda->probe_wait);
+       to_hda_bus(bus)->bus_probing = 0;
        return err;
 }
 
index e5bdbc2..29882bd 100644 (file)
@@ -8451,8 +8451,10 @@ static void ca0132_free(struct hda_codec *codec)
        ca0132_exit_chip(codec);
 
        snd_hda_power_down(codec);
-       if (IS_ENABLED(CONFIG_PCI) && spec->mem_base)
+#ifdef CONFIG_PCI
+       if (spec->mem_base)
                pci_iounmap(codec->bus->pci, spec->mem_base);
+#endif
        kfree(spec->spec_init_verbs);
        kfree(codec->spec);
 }
index 152f541..a4ee765 100644 (file)
@@ -924,6 +924,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = {
        SND_PCI_QUIRK(0x103c, 0x807C, "HP EliteBook 820 G3", CXT_FIXUP_HP_DOCK),
        SND_PCI_QUIRK(0x103c, 0x80FD, "HP ProBook 640 G2", CXT_FIXUP_HP_DOCK),
        SND_PCI_QUIRK(0x103c, 0x828c, "HP EliteBook 840 G4", CXT_FIXUP_HP_DOCK),
+       SND_PCI_QUIRK(0x103c, 0x83b2, "HP EliteBook 840 G5", CXT_FIXUP_HP_DOCK),
        SND_PCI_QUIRK(0x103c, 0x83b3, "HP EliteBook 830 G5", CXT_FIXUP_HP_DOCK),
        SND_PCI_QUIRK(0x103c, 0x83d3, "HP ProBook 640 G4", CXT_FIXUP_HP_DOCK),
        SND_PCI_QUIRK(0x103c, 0x8174, "HP Spectre x360", CXT_FIXUP_HP_SPECTRE),
index b4f4721..6df758a 100644 (file)
@@ -117,6 +117,7 @@ struct alc_spec {
        int codec_variant;      /* flag for other variants */
        unsigned int has_alc5505_dsp:1;
        unsigned int no_depop_delay:1;
+       unsigned int done_hp_init:1;
 
        /* for PLL fix */
        hda_nid_t pll_nid;
@@ -514,6 +515,15 @@ static void alc_auto_init_amp(struct hda_codec *codec, int type)
        }
 }
 
+/* get a primary headphone pin if available */
+static hda_nid_t alc_get_hp_pin(struct alc_spec *spec)
+{
+       if (spec->gen.autocfg.hp_pins[0])
+               return spec->gen.autocfg.hp_pins[0];
+       if (spec->gen.autocfg.line_out_type == AC_JACK_HP_OUT)
+               return spec->gen.autocfg.line_out_pins[0];
+       return 0;
+}
 
 /*
  * Realtek SSID verification
@@ -724,9 +734,7 @@ do_sku:
         * 15   : 1 --> enable the function "Mute internal speaker
         *              when the external headphone out jack is plugged"
         */
-       if (!spec->gen.autocfg.hp_pins[0] &&
-           !(spec->gen.autocfg.line_out_pins[0] &&
-             spec->gen.autocfg.line_out_type == AUTO_PIN_HP_OUT)) {
+       if (!alc_get_hp_pin(spec)) {
                hda_nid_t nid;
                tmp = (ass >> 11) & 0x3;        /* HP to chassis */
                nid = ports[tmp];
@@ -2958,7 +2966,7 @@ static void alc282_restore_default_value(struct hda_codec *codec)
 static void alc282_init(struct hda_codec *codec)
 {
        struct alc_spec *spec = codec->spec;
-       hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0];
+       hda_nid_t hp_pin = alc_get_hp_pin(spec);
        bool hp_pin_sense;
        int coef78;
 
@@ -2995,7 +3003,7 @@ static void alc282_init(struct hda_codec *codec)
 static void alc282_shutup(struct hda_codec *codec)
 {
        struct alc_spec *spec = codec->spec;
-       hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0];
+       hda_nid_t hp_pin = alc_get_hp_pin(spec);
        bool hp_pin_sense;
        int coef78;
 
@@ -3073,14 +3081,9 @@ static void alc283_restore_default_value(struct hda_codec *codec)
 static void alc283_init(struct hda_codec *codec)
 {
        struct alc_spec *spec = codec->spec;
-       hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0];
+       hda_nid_t hp_pin = alc_get_hp_pin(spec);
        bool hp_pin_sense;
 
-       if (!spec->gen.autocfg.hp_outs) {
-               if (spec->gen.autocfg.line_out_type == AC_JACK_HP_OUT)
-                       hp_pin = spec->gen.autocfg.line_out_pins[0];
-       }
-
        alc283_restore_default_value(codec);
 
        if (!hp_pin)
@@ -3114,14 +3117,9 @@ static void alc283_init(struct hda_codec *codec)
 static void alc283_shutup(struct hda_codec *codec)
 {
        struct alc_spec *spec = codec->spec;
-       hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0];
+       hda_nid_t hp_pin = alc_get_hp_pin(spec);
        bool hp_pin_sense;
 
-       if (!spec->gen.autocfg.hp_outs) {
-               if (spec->gen.autocfg.line_out_type == AC_JACK_HP_OUT)
-                       hp_pin = spec->gen.autocfg.line_out_pins[0];
-       }
-
        if (!hp_pin) {
                alc269_shutup(codec);
                return;
@@ -3155,7 +3153,7 @@ static void alc283_shutup(struct hda_codec *codec)
 static void alc256_init(struct hda_codec *codec)
 {
        struct alc_spec *spec = codec->spec;
-       hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0];
+       hda_nid_t hp_pin = alc_get_hp_pin(spec);
        bool hp_pin_sense;
 
        if (!hp_pin)
@@ -3191,7 +3189,7 @@ static void alc256_init(struct hda_codec *codec)
 static void alc256_shutup(struct hda_codec *codec)
 {
        struct alc_spec *spec = codec->spec;
-       hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0];
+       hda_nid_t hp_pin = alc_get_hp_pin(spec);
        bool hp_pin_sense;
 
        if (!hp_pin) {
@@ -3227,7 +3225,7 @@ static void alc256_shutup(struct hda_codec *codec)
 static void alc225_init(struct hda_codec *codec)
 {
        struct alc_spec *spec = codec->spec;
-       hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0];
+       hda_nid_t hp_pin = alc_get_hp_pin(spec);
        bool hp1_pin_sense, hp2_pin_sense;
 
        if (!hp_pin)
@@ -3270,7 +3268,7 @@ static void alc225_init(struct hda_codec *codec)
 static void alc225_shutup(struct hda_codec *codec)
 {
        struct alc_spec *spec = codec->spec;
-       hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0];
+       hda_nid_t hp_pin = alc_get_hp_pin(spec);
        bool hp1_pin_sense, hp2_pin_sense;
 
        if (!hp_pin) {
@@ -3314,7 +3312,7 @@ static void alc225_shutup(struct hda_codec *codec)
 static void alc_default_init(struct hda_codec *codec)
 {
        struct alc_spec *spec = codec->spec;
-       hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0];
+       hda_nid_t hp_pin = alc_get_hp_pin(spec);
        bool hp_pin_sense;
 
        if (!hp_pin)
@@ -3343,7 +3341,7 @@ static void alc_default_init(struct hda_codec *codec)
 static void alc_default_shutup(struct hda_codec *codec)
 {
        struct alc_spec *spec = codec->spec;
-       hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0];
+       hda_nid_t hp_pin = alc_get_hp_pin(spec);
        bool hp_pin_sense;
 
        if (!hp_pin) {
@@ -3372,6 +3370,48 @@ static void alc_default_shutup(struct hda_codec *codec)
        snd_hda_shutup_pins(codec);
 }
 
+static void alc294_hp_init(struct hda_codec *codec)
+{
+       struct alc_spec *spec = codec->spec;
+       hda_nid_t hp_pin = alc_get_hp_pin(spec);
+       int i, val;
+
+       if (!hp_pin)
+               return;
+
+       snd_hda_codec_write(codec, hp_pin, 0,
+                           AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE);
+
+       msleep(100);
+
+       snd_hda_codec_write(codec, hp_pin, 0,
+                           AC_VERB_SET_PIN_WIDGET_CONTROL, 0x0);
+
+       alc_update_coef_idx(codec, 0x6f, 0x000f, 0);/* Set HP depop to manual mode */
+       alc_update_coefex_idx(codec, 0x58, 0x00, 0x8000, 0x8000); /* HP depop procedure start */
+
+       /* Wait for depop procedure finish  */
+       val = alc_read_coefex_idx(codec, 0x58, 0x01);
+       for (i = 0; i < 20 && val & 0x0080; i++) {
+               msleep(50);
+               val = alc_read_coefex_idx(codec, 0x58, 0x01);
+       }
+       /* Set HP depop to auto mode */
+       alc_update_coef_idx(codec, 0x6f, 0x000f, 0x000b);
+       msleep(50);
+}
+
+static void alc294_init(struct hda_codec *codec)
+{
+       struct alc_spec *spec = codec->spec;
+
+       if (!spec->done_hp_init) {
+               alc294_hp_init(codec);
+               spec->done_hp_init = true;
+       }
+       alc_default_init(codec);
+}
+
 static void alc5505_coef_set(struct hda_codec *codec, unsigned int index_reg,
                             unsigned int val)
 {
@@ -4737,7 +4777,7 @@ static void alc_update_headset_mode(struct hda_codec *codec)
        struct alc_spec *spec = codec->spec;
 
        hda_nid_t mux_pin = spec->gen.imux_pins[spec->gen.cur_mux[0]];
-       hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0];
+       hda_nid_t hp_pin = alc_get_hp_pin(spec);
 
        int new_headset_mode;
 
@@ -5016,7 +5056,7 @@ static void alc_fixup_tpt470_dock(struct hda_codec *codec,
 static void alc_shutup_dell_xps13(struct hda_codec *codec)
 {
        struct alc_spec *spec = codec->spec;
-       int hp_pin = spec->gen.autocfg.hp_pins[0];
+       int hp_pin = alc_get_hp_pin(spec);
 
        /* Prevent pop noises when headphones are plugged in */
        snd_hda_codec_write(codec, hp_pin, 0,
@@ -5109,7 +5149,7 @@ static void alc271_hp_gate_mic_jack(struct hda_codec *codec,
 
        if (action == HDA_FIXUP_ACT_PROBE) {
                int mic_pin = find_ext_mic_pin(codec);
-               int hp_pin = spec->gen.autocfg.hp_pins[0];
+               int hp_pin = alc_get_hp_pin(spec);
 
                if (snd_BUG_ON(!mic_pin || !hp_pin))
                        return;
@@ -5591,6 +5631,7 @@ enum {
        ALC294_FIXUP_ASUS_HEADSET_MIC,
        ALC294_FIXUP_ASUS_SPK,
        ALC225_FIXUP_HEADSET_JACK,
+       ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -6537,6 +6578,15 @@ static const struct hda_fixup alc269_fixups[] = {
                .type = HDA_FIXUP_FUNC,
                .v.func = alc_fixup_headset_jack,
        },
+       [ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = (const struct hda_pintbl[]) {
+                       { 0x1a, 0x01a1913c }, /* use as headset mic, without its own jack detect */
+                       { }
+               },
+               .chained = true,
+               .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC
+       },
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -6715,6 +6765,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC),
        SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC),
        SND_PCI_QUIRK(0x1462, 0xb171, "Cubi N 8GL (MS-B171)", ALC283_FIXUP_HEADSET_MIC),
+       SND_PCI_QUIRK(0x1558, 0x1325, "System76 Darter Pro (darp5)", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x17aa, 0x1036, "Lenovo P520", ALC233_FIXUP_LENOVO_MULTI_CODECS),
        SND_PCI_QUIRK(0x17aa, 0x20f2, "Thinkpad SL410/510", ALC269_FIXUP_SKU_IGNORE),
        SND_PCI_QUIRK(0x17aa, 0x215e, "Thinkpad L512", ALC269_FIXUP_SKU_IGNORE),
@@ -7373,37 +7424,6 @@ static void alc269_fill_coef(struct hda_codec *codec)
        alc_update_coef_idx(codec, 0x4, 0, 1<<11);
 }
 
-static void alc294_hp_init(struct hda_codec *codec)
-{
-       struct alc_spec *spec = codec->spec;
-       hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0];
-       int i, val;
-
-       if (!hp_pin)
-               return;
-
-       snd_hda_codec_write(codec, hp_pin, 0,
-                           AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE);
-
-       msleep(100);
-
-       snd_hda_codec_write(codec, hp_pin, 0,
-                           AC_VERB_SET_PIN_WIDGET_CONTROL, 0x0);
-
-       alc_update_coef_idx(codec, 0x6f, 0x000f, 0);/* Set HP depop to manual mode */
-       alc_update_coefex_idx(codec, 0x58, 0x00, 0x8000, 0x8000); /* HP depop procedure start */
-
-       /* Wait for depop procedure finish  */
-       val = alc_read_coefex_idx(codec, 0x58, 0x01);
-       for (i = 0; i < 20 && val & 0x0080; i++) {
-               msleep(50);
-               val = alc_read_coefex_idx(codec, 0x58, 0x01);
-       }
-       /* Set HP depop to auto mode */
-       alc_update_coef_idx(codec, 0x6f, 0x000f, 0x000b);
-       msleep(50);
-}
-
 /*
  */
 static int patch_alc269(struct hda_codec *codec)
@@ -7529,7 +7549,7 @@ static int patch_alc269(struct hda_codec *codec)
                spec->codec_variant = ALC269_TYPE_ALC294;
                spec->gen.mixer_nid = 0; /* ALC2x4 does not have any loopback mixer path */
                alc_update_coef_idx(codec, 0x6b, 0x0018, (1<<4) | (1<<3)); /* UAJ MIC Vref control by verb */
-               alc294_hp_init(codec);
+               spec->init_hook = alc294_init;
                break;
        case 0x10ec0300:
                spec->codec_variant = ALC269_TYPE_ALC300;
@@ -7541,7 +7561,7 @@ static int patch_alc269(struct hda_codec *codec)
                spec->codec_variant = ALC269_TYPE_ALC700;
                spec->gen.mixer_nid = 0; /* ALC700 does not have any loopback mixer path */
                alc_update_coef_idx(codec, 0x4a, 1 << 15, 0); /* Combo jack auto trigger control */
-               alc294_hp_init(codec);
+               spec->init_hook = alc294_init;
                break;
 
        }
index d00734d..e5b6769 100644 (file)
@@ -795,6 +795,8 @@ static int hdmi_codec_probe(struct platform_device *pdev)
        if (hcd->spdif)
                hcp->daidrv[i] = hdmi_spdif_dai;
 
+       dev_set_drvdata(dev, hcp);
+
        ret = devm_snd_soc_register_component(dev, &hdmi_driver, hcp->daidrv,
                                     dai_count);
        if (ret) {
@@ -802,8 +804,6 @@ static int hdmi_codec_probe(struct platform_device *pdev)
                        __func__, ret);
                return ret;
        }
-
-       dev_set_drvdata(dev, hcp);
        return 0;
 }
 
index 89c43b2..a9b91bc 100644 (file)
@@ -1778,7 +1778,9 @@ static const struct snd_soc_dapm_route rt5682_dapm_routes[] = {
        {"ADC Stereo1 Filter", NULL, "ADC STO1 ASRC", is_using_asrc},
        {"DAC Stereo1 Filter", NULL, "DAC STO1 ASRC", is_using_asrc},
        {"ADC STO1 ASRC", NULL, "AD ASRC"},
+       {"ADC STO1 ASRC", NULL, "DA ASRC"},
        {"ADC STO1 ASRC", NULL, "CLKDET"},
+       {"DAC STO1 ASRC", NULL, "AD ASRC"},
        {"DAC STO1 ASRC", NULL, "DA ASRC"},
        {"DAC STO1 ASRC", NULL, "CLKDET"},
 
index d6c62aa..ce00fe2 100644 (file)
@@ -700,6 +700,7 @@ static int i2s_hw_params(struct snd_pcm_substream *substream,
 {
        struct i2s_dai *i2s = to_info(dai);
        u32 mod, mask = 0, val = 0;
+       struct clk *rclksrc;
        unsigned long flags;
 
        WARN_ON(!pm_runtime_active(dai->dev));
@@ -782,6 +783,10 @@ static int i2s_hw_params(struct snd_pcm_substream *substream,
 
        i2s->frmclk = params_rate(params);
 
+       rclksrc = i2s->clk_table[CLK_I2S_RCLK_SRC];
+       if (rclksrc && !IS_ERR(rclksrc))
+               i2s->rclk_srcrate = clk_get_rate(rclksrc);
+
        return 0;
 }
 
@@ -886,11 +891,6 @@ static int config_setup(struct i2s_dai *i2s)
                return 0;
 
        if (!(i2s->quirks & QUIRK_NO_MUXPSR)) {
-               struct clk *rclksrc = i2s->clk_table[CLK_I2S_RCLK_SRC];
-
-               if (rclksrc && !IS_ERR(rclksrc))
-                       i2s->rclk_srcrate = clk_get_rate(rclksrc);
-
                psr = i2s->rclk_srcrate / i2s->frmclk / rfs;
                writel(((psr - 1) << 8) | PSR_PSREN, i2s->addr + I2SPSR);
                dev_dbg(&i2s->pdev->dev,
index 59e250c..e819e96 100644 (file)
@@ -1526,14 +1526,14 @@ int rsnd_kctrl_new(struct rsnd_mod *mod,
        int ret;
 
        /*
-        * 1) Avoid duplicate register (ex. MIXer case)
-        * 2) re-register if card was rebinded
+        * 1) Avoid duplicate register for DVC with MIX case
+        * 2) Allow duplicate register for MIX
+        * 3) re-register if card was rebinded
         */
        list_for_each_entry(kctrl, &card->controls, list) {
                struct rsnd_kctrl_cfg *c = kctrl->private_data;
 
-               if (strcmp(kctrl->id.name, name) == 0 &&
-                   c->mod == mod)
+               if (c == cfg)
                        return 0;
        }
 
index 45ef295..f5afab6 100644 (file)
@@ -286,7 +286,7 @@ static int rsnd_ssi_master_clk_start(struct rsnd_mod *mod,
        if (rsnd_ssi_is_multi_slave(mod, io))
                return 0;
 
-       if (ssi->usrcnt > 1) {
+       if (ssi->usrcnt > 0) {
                if (ssi->rate != rate) {
                        dev_err(dev, "SSI parent/child should use same rate\n");
                        return -EINVAL;
index c5934ad..c74991d 100644 (file)
@@ -79,7 +79,7 @@ static int rsnd_ssiu_init(struct rsnd_mod *mod,
                break;
        case 9:
                for (i = 0; i < 4; i++)
-                       rsnd_mod_write(mod, SSI_SYS_STATUS((i * 2) + 1), 0xf << (id * 4));
+                       rsnd_mod_write(mod, SSI_SYS_STATUS((i * 2) + 1), 0xf << 4);
                break;
        }
 
index aae450b..50617db 100644 (file)
@@ -735,12 +735,17 @@ static struct snd_soc_component *soc_find_component(
        const struct device_node *of_node, const char *name)
 {
        struct snd_soc_component *component;
+       struct device_node *component_of_node;
 
        lockdep_assert_held(&client_mutex);
 
        for_each_component(component) {
                if (of_node) {
-                       if (component->dev->of_node == of_node)
+                       component_of_node = component->dev->of_node;
+                       if (!component_of_node && component->dev->parent)
+                               component_of_node = component->dev->parent->of_node;
+
+                       if (component_of_node == of_node)
                                return component;
                } else if (name && strcmp(component->name, name) == 0) {
                        return component;
@@ -951,7 +956,7 @@ static void soc_remove_dai(struct snd_soc_dai *dai, int order)
 {
        int err;
 
-       if (!dai || !dai->probed ||
+       if (!dai || !dai->probed || !dai->driver ||
            dai->driver->remove_order != order)
                return;
 
index 2c4c134..20bad75 100644 (file)
@@ -70,12 +70,16 @@ static int dapm_up_seq[] = {
        [snd_soc_dapm_clock_supply] = 1,
        [snd_soc_dapm_supply] = 2,
        [snd_soc_dapm_micbias] = 3,
+       [snd_soc_dapm_vmid] = 3,
        [snd_soc_dapm_dai_link] = 2,
        [snd_soc_dapm_dai_in] = 4,
        [snd_soc_dapm_dai_out] = 4,
        [snd_soc_dapm_aif_in] = 4,
        [snd_soc_dapm_aif_out] = 4,
        [snd_soc_dapm_mic] = 5,
+       [snd_soc_dapm_siggen] = 5,
+       [snd_soc_dapm_input] = 5,
+       [snd_soc_dapm_output] = 5,
        [snd_soc_dapm_mux] = 6,
        [snd_soc_dapm_demux] = 6,
        [snd_soc_dapm_dac] = 7,
@@ -83,11 +87,19 @@ static int dapm_up_seq[] = {
        [snd_soc_dapm_mixer] = 8,
        [snd_soc_dapm_mixer_named_ctl] = 8,
        [snd_soc_dapm_pga] = 9,
+       [snd_soc_dapm_buffer] = 9,
+       [snd_soc_dapm_scheduler] = 9,
+       [snd_soc_dapm_effect] = 9,
+       [snd_soc_dapm_src] = 9,
+       [snd_soc_dapm_asrc] = 9,
+       [snd_soc_dapm_encoder] = 9,
+       [snd_soc_dapm_decoder] = 9,
        [snd_soc_dapm_adc] = 10,
        [snd_soc_dapm_out_drv] = 11,
        [snd_soc_dapm_hp] = 11,
        [snd_soc_dapm_spk] = 11,
        [snd_soc_dapm_line] = 11,
+       [snd_soc_dapm_sink] = 11,
        [snd_soc_dapm_kcontrol] = 12,
        [snd_soc_dapm_post] = 13,
 };
@@ -100,13 +112,25 @@ static int dapm_down_seq[] = {
        [snd_soc_dapm_spk] = 3,
        [snd_soc_dapm_line] = 3,
        [snd_soc_dapm_out_drv] = 3,
+       [snd_soc_dapm_sink] = 3,
        [snd_soc_dapm_pga] = 4,
+       [snd_soc_dapm_buffer] = 4,
+       [snd_soc_dapm_scheduler] = 4,
+       [snd_soc_dapm_effect] = 4,
+       [snd_soc_dapm_src] = 4,
+       [snd_soc_dapm_asrc] = 4,
+       [snd_soc_dapm_encoder] = 4,
+       [snd_soc_dapm_decoder] = 4,
        [snd_soc_dapm_switch] = 5,
        [snd_soc_dapm_mixer_named_ctl] = 5,
        [snd_soc_dapm_mixer] = 5,
        [snd_soc_dapm_dac] = 6,
        [snd_soc_dapm_mic] = 7,
+       [snd_soc_dapm_siggen] = 7,
+       [snd_soc_dapm_input] = 7,
+       [snd_soc_dapm_output] = 7,
        [snd_soc_dapm_micbias] = 8,
+       [snd_soc_dapm_vmid] = 8,
        [snd_soc_dapm_mux] = 9,
        [snd_soc_dapm_demux] = 9,
        [snd_soc_dapm_aif_in] = 10,
index 045ef13..fc79ec6 100644 (file)
@@ -502,6 +502,7 @@ static void remove_dai(struct snd_soc_component *comp,
 {
        struct snd_soc_dai_driver *dai_drv =
                container_of(dobj, struct snd_soc_dai_driver, dobj);
+       struct snd_soc_dai *dai;
 
        if (pass != SOC_TPLG_PASS_PCM_DAI)
                return;
@@ -509,6 +510,10 @@ static void remove_dai(struct snd_soc_component *comp,
        if (dobj->ops && dobj->ops->dai_unload)
                dobj->ops->dai_unload(comp, dobj);
 
+       list_for_each_entry(dai, &comp->dai_list, list)
+               if (dai->driver == dai_drv)
+                       dai->driver = NULL;
+
        kfree(dai_drv->name);
        list_del(&dobj->list);
        kfree(dai_drv);
index 3828471..db114f3 100644 (file)
@@ -314,6 +314,9 @@ static int search_roland_implicit_fb(struct usb_device *dev, int ifnum,
        return 0;
 }
 
+/* Setup an implicit feedback endpoint from a quirk. Returns 0 if no quirk
+ * applies. Returns 1 if a quirk was found.
+ */
 static int set_sync_ep_implicit_fb_quirk(struct snd_usb_substream *subs,
                                         struct usb_device *dev,
                                         struct usb_interface_descriptor *altsd,
@@ -384,7 +387,7 @@ add_sync_ep:
 
        subs->data_endpoint->sync_master = subs->sync_endpoint;
 
-       return 0;
+       return 1;
 }
 
 static int set_sync_endpoint(struct snd_usb_substream *subs,
@@ -423,6 +426,10 @@ static int set_sync_endpoint(struct snd_usb_substream *subs,
        if (err < 0)
                return err;
 
+       /* endpoint set by quirk */
+       if (err > 0)
+               return 0;
+
        if (altsd->bNumEndpoints < 2)
                return 0;
 
index ebbadb3..7e65fe8 100644 (file)
@@ -1492,6 +1492,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip,
                        return SNDRV_PCM_FMTBIT_DSD_U32_BE;
                break;
 
+       case USB_ID(0x10cb, 0x0103): /* The Bit Opus #3; with fp->dsd_raw */
        case USB_ID(0x152a, 0x85de): /* SMSL D1 DAC */
        case USB_ID(0x16d0, 0x09dd): /* Encore mDSD */
        case USB_ID(0x0d8c, 0x0316): /* Hegel HD12 DSD */
@@ -1566,6 +1567,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip,
        case 0x20b1:  /* XMOS based devices */
        case 0x152a:  /* Thesycon devices */
        case 0x25ce:  /* Mytek devices */
+       case 0x2ab6:  /* T+A devices */
                if (fp->dsd_raw)
                        return SNDRV_PCM_FMTBIT_DSD_U32_BE;
                break;
index 8974834..f7261fa 100644 (file)
@@ -297,10 +297,8 @@ char *get_fdinfo(int fd, const char *key)
        snprintf(path, sizeof(path), "/proc/self/fdinfo/%d", fd);
 
        fdi = fopen(path, "r");
-       if (!fdi) {
-               p_err("can't open fdinfo: %s", strerror(errno));
+       if (!fdi)
                return NULL;
-       }
 
        while ((n = getline(&line, &line_n, fdi)) > 0) {
                char *value;
@@ -313,7 +311,6 @@ char *get_fdinfo(int fd, const char *key)
 
                value = strchr(line, '\t');
                if (!value || !value[1]) {
-                       p_err("malformed fdinfo!?");
                        free(line);
                        return NULL;
                }
@@ -326,7 +323,6 @@ char *get_fdinfo(int fd, const char *key)
                return line;
        }
 
-       p_err("key '%s' not found in fdinfo", key);
        free(line);
        fclose(fdi);
        return NULL;
index 2160a8e..e0c650d 100644 (file)
@@ -358,6 +358,20 @@ static char **parse_bytes(char **argv, const char *name, unsigned char *val,
        return argv + i;
 }
 
+/* on per cpu maps we must copy the provided value on all value instances */
+static void fill_per_cpu_value(struct bpf_map_info *info, void *value)
+{
+       unsigned int i, n, step;
+
+       if (!map_is_per_cpu(info->type))
+               return;
+
+       n = get_possible_cpus();
+       step = round_up(info->value_size, 8);
+       for (i = 1; i < n; i++)
+               memcpy(value + i * step, value, info->value_size);
+}
+
 static int parse_elem(char **argv, struct bpf_map_info *info,
                      void *key, void *value, __u32 key_size, __u32 value_size,
                      __u32 *flags, __u32 **value_fd)
@@ -440,6 +454,8 @@ static int parse_elem(char **argv, struct bpf_map_info *info,
                        argv = parse_bytes(argv, "value", value, value_size);
                        if (!argv)
                                return -1;
+
+                       fill_per_cpu_value(info, value);
                }
 
                return parse_elem(argv, info, key, NULL, key_size, value_size,
@@ -511,10 +527,9 @@ static int show_map_close_json(int fd, struct bpf_map_info *info)
                                jsonw_uint_field(json_wtr, "owner_prog_type",
                                                 prog_type);
                }
-               if (atoi(owner_jited))
-                       jsonw_bool_field(json_wtr, "owner_jited", true);
-               else
-                       jsonw_bool_field(json_wtr, "owner_jited", false);
+               if (owner_jited)
+                       jsonw_bool_field(json_wtr, "owner_jited",
+                                        !!atoi(owner_jited));
 
                free(owner_prog_type);
                free(owner_jited);
@@ -567,7 +582,8 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info)
                char *owner_prog_type = get_fdinfo(fd, "owner_prog_type");
                char *owner_jited = get_fdinfo(fd, "owner_jited");
 
-               printf("\n\t");
+               if (owner_prog_type || owner_jited)
+                       printf("\n\t");
                if (owner_prog_type) {
                        unsigned int prog_type = atoi(owner_prog_type);
 
@@ -577,10 +593,9 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info)
                        else
                                printf("owner_prog_type %d  ", prog_type);
                }
-               if (atoi(owner_jited))
-                       printf("owner jited");
-               else
-                       printf("owner not jited");
+               if (owner_jited)
+                       printf("owner%s jited",
+                              atoi(owner_jited) ? "" : " not");
 
                free(owner_prog_type);
                free(owner_jited);
index 0640e9b..33ed080 100644 (file)
@@ -78,13 +78,14 @@ static void print_boot_time(__u64 nsecs, char *buf, unsigned int size)
 
 static int prog_fd_by_tag(unsigned char *tag)
 {
-       struct bpf_prog_info info = {};
-       __u32 len = sizeof(info);
        unsigned int id = 0;
        int err;
        int fd;
 
        while (true) {
+               struct bpf_prog_info info = {};
+               __u32 len = sizeof(info);
+
                err = bpf_prog_get_next_id(id, &id);
                if (err) {
                        p_err("%s", strerror(errno));
index 3040830..8454566 100644 (file)
@@ -330,7 +330,7 @@ static const struct option longopts[] = {
 
 int main(int argc, char **argv)
 {
-       unsigned long long num_loops = 2;
+       long long num_loops = 2;
        unsigned long timedelay = 1000000;
        unsigned long buf_len = 128;
 
index fd92ce8..57aaeaf 100644 (file)
@@ -15,6 +15,8 @@
 #include "../../arch/ia64/include/uapi/asm/bitsperlong.h"
 #elif defined(__riscv)
 #include "../../arch/riscv/include/uapi/asm/bitsperlong.h"
+#elif defined(__alpha__)
+#include "../../arch/alpha/include/uapi/asm/bitsperlong.h"
 #else
 #include <asm-generic/bitsperlong.h>
 #endif
index f6052e7..a55cb8b 100644 (file)
@@ -268,7 +268,7 @@ struct sockaddr_in {
 #define        IN_MULTICAST(a)         IN_CLASSD(a)
 #define        IN_MULTICAST_NET        0xe0000000
 
-#define        IN_BADCLASS(a)          ((((long int) (a) ) == 0xffffffff)
+#define        IN_BADCLASS(a)          (((long int) (a) ) == (long int)0xffffffff)
 #define        IN_EXPERIMENTAL(a)      IN_BADCLASS((a))
 
 #define        IN_CLASSE(a)            ((((long int) (a)) & 0xf0000000) == 0xf0000000)
index 6e89a5d..653c4f9 100644 (file)
@@ -13,8 +13,6 @@
 
 #include <linux/pkt_cls.h>
 
-#define TCA_ACT_BPF 13
-
 struct tc_act_bpf {
        tc_gen;
 };
index 095aebd..e6150f2 100644 (file)
@@ -19,8 +19,11 @@ C2C stands for Cache To Cache.
 The perf c2c tool provides means for Shared Data C2C/HITM analysis. It allows
 you to track down the cacheline contentions.
 
-The tool is based on x86's load latency and precise store facility events
-provided by Intel CPUs. These events provide:
+On x86, the tool is based on load latency and precise store facility events
+provided by Intel CPUs. On PowerPC, the tool uses random instruction sampling
+with thresholding feature.
+
+These events provide:
   - memory address of the access
   - type of the access (load and store details)
   - latency (in cycles) of the load access
@@ -46,7 +49,7 @@ RECORD OPTIONS
 
 -l::
 --ldlat::
-       Configure mem-loads latency.
+       Configure mem-loads latency. (x86 only)
 
 -k::
 --all-kernel::
@@ -119,11 +122,16 @@ Following perf record options are configured by default:
   -W,-d,--phys-data,--sample-cpu
 
 Unless specified otherwise with '-e' option, following events are monitored by
-default:
+default on x86:
 
   cpu/mem-loads,ldlat=30/P
   cpu/mem-stores/P
 
+and following on PowerPC:
+
+  cpu/mem-loads/
+  cpu/mem-stores/
+
 User can pass any 'perf record' option behind '--' mark, like (to enable
 callchains and system wide monitoring):
 
index f8d2167..199ea0f 100644 (file)
@@ -82,7 +82,7 @@ RECORD OPTIONS
        Be more verbose (show counter open errors, etc)
 
 --ldlat <n>::
-       Specify desired latency for loads event.
+       Specify desired latency for loads event. (x86 only)
 
 In addition, for report all perf report options are valid, and for record
 all perf record options.
index 2e65953..ba98bd0 100644 (file)
@@ -2,6 +2,7 @@ libperf-y += header.o
 libperf-y += sym-handling.o
 libperf-y += kvm-stat.o
 libperf-y += perf_regs.o
+libperf-y += mem-events.o
 
 libperf-$(CONFIG_DWARF) += dwarf-regs.o
 libperf-$(CONFIG_DWARF) += skip-callchain-idx.o
diff --git a/tools/perf/arch/powerpc/util/mem-events.c b/tools/perf/arch/powerpc/util/mem-events.c
new file mode 100644 (file)
index 0000000..d08311f
--- /dev/null
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "mem-events.h"
+
+/* PowerPC does not support 'ldlat' parameter. */
+char *perf_mem_events__name(int i)
+{
+       if (i == PERF_MEM_EVENTS__LOAD)
+               return (char *) "cpu/mem-loads/";
+
+       return (char *) "cpu/mem-stores/";
+}
index d079f36..ac221f1 100644 (file)
@@ -1681,13 +1681,8 @@ static void perf_sample__fprint_metric(struct perf_script *script,
                .force_header = false,
        };
        struct perf_evsel *ev2;
-       static bool init;
        u64 val;
 
-       if (!init) {
-               perf_stat__init_shadow_stats();
-               init = true;
-       }
        if (!evsel->stats)
                perf_evlist__alloc_stats(script->session->evlist, false);
        if (evsel_script(evsel->leader)->gnum++ == 0)
@@ -1794,7 +1789,7 @@ static void process_event(struct perf_script *script,
                return;
        }
 
-       if (PRINT_FIELD(TRACE)) {
+       if (PRINT_FIELD(TRACE) && sample->raw_data) {
                event_format__fprintf(evsel->tp_format, sample->cpu,
                                      sample->raw_data, sample->raw_size, fp);
        }
@@ -2359,6 +2354,8 @@ static int __cmd_script(struct perf_script *script)
 
        signal(SIGINT, sig_handler);
 
+       perf_stat__init_shadow_stats();
+
        /* override event processing functions */
        if (script->show_task_events) {
                script->tool.comm = process_comm_event;
index ed45831..b36061c 100644 (file)
@@ -2514,19 +2514,30 @@ static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
 
 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
 {
-       struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
+       bool found = false;
+       struct perf_evsel *evsel, *tmp;
+       struct parse_events_error err = { .idx = 0, };
+       int ret = parse_events(evlist, "probe:vfs_getname*", &err);
 
-       if (IS_ERR(evsel))
+       if (ret)
                return false;
 
-       if (perf_evsel__field(evsel, "pathname") == NULL) {
+       evlist__for_each_entry_safe(evlist, evsel, tmp) {
+               if (!strstarts(perf_evsel__name(evsel), "probe:vfs_getname"))
+                       continue;
+
+               if (perf_evsel__field(evsel, "pathname")) {
+                       evsel->handler = trace__vfs_getname;
+                       found = true;
+                       continue;
+               }
+
+               list_del_init(&evsel->node);
+               evsel->evlist = NULL;
                perf_evsel__delete(evsel);
-               return false;
        }
 
-       evsel->handler = trace__vfs_getname;
-       perf_evlist__add(evlist, evsel);
-       return true;
+       return found;
 }
 
 static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
index 44090a9..e952127 100644 (file)
@@ -1,6 +1,8 @@
 #! /usr/bin/python
 # SPDX-License-Identifier: GPL-2.0
 
+from __future__ import print_function
+
 import os
 import sys
 import glob
@@ -8,7 +10,11 @@ import optparse
 import tempfile
 import logging
 import shutil
-import ConfigParser
+
+try:
+    import configparser
+except ImportError:
+    import ConfigParser as configparser
 
 def data_equal(a, b):
     # Allow multiple values in assignment separated by '|'
@@ -100,20 +106,20 @@ class Event(dict):
     def equal(self, other):
         for t in Event.terms:
             log.debug("      [%s] %s %s" % (t, self[t], other[t]));
-            if not self.has_key(t) or not other.has_key(t):
+            if t not in self or t not in other:
                 return False
             if not data_equal(self[t], other[t]):
                 return False
         return True
 
     def optional(self):
-        if self.has_key('optional') and self['optional'] == '1':
+        if 'optional' in self and self['optional'] == '1':
             return True
         return False
 
     def diff(self, other):
         for t in Event.terms:
-            if not self.has_key(t) or not other.has_key(t):
+            if t not in self or t not in other:
                 continue
             if not data_equal(self[t], other[t]):
                 log.warning("expected %s=%s, got %s" % (t, self[t], other[t]))
@@ -134,7 +140,7 @@ class Event(dict):
 #   - expected values assignments
 class Test(object):
     def __init__(self, path, options):
-        parser = ConfigParser.SafeConfigParser()
+        parser = configparser.SafeConfigParser()
         parser.read(path)
 
         log.warning("running '%s'" % path)
@@ -193,7 +199,7 @@ class Test(object):
         return True
 
     def load_events(self, path, events):
-        parser_event = ConfigParser.SafeConfigParser()
+        parser_event = configparser.SafeConfigParser()
         parser_event.read(path)
 
         # The event record section header contains 'event' word,
@@ -207,7 +213,7 @@ class Test(object):
             # Read parent event if there's any
             if (':' in section):
                 base = section[section.index(':') + 1:]
-                parser_base = ConfigParser.SafeConfigParser()
+                parser_base = configparser.SafeConfigParser()
                 parser_base.read(self.test_dir + '/' + base)
                 base_items = parser_base.items('event')
 
@@ -322,9 +328,9 @@ def run_tests(options):
     for f in glob.glob(options.test_dir + '/' + options.test):
         try:
             Test(f, options).run()
-        except Unsup, obj:
+        except Unsup as obj:
             log.warning("unsupp  %s" % obj.getMsg())
-        except Notest, obj:
+        except Notest as obj:
             log.warning("skipped %s" % obj.getMsg())
 
 def setup_log(verbose):
@@ -363,7 +369,7 @@ def main():
     parser.add_option("-p", "--perf",
                       action="store", type="string", dest="perf")
     parser.add_option("-v", "--verbose",
-                      action="count", dest="verbose")
+                      default=0, action="count", dest="verbose")
 
     options, args = parser.parse_args()
     if args:
@@ -373,7 +379,7 @@ def main():
     setup_log(options.verbose)
 
     if not options.test_dir:
-        print 'FAILED no -d option specified'
+        print('FAILED no -d option specified')
         sys.exit(-1)
 
     if not options.test:
@@ -382,8 +388,8 @@ def main():
     try:
         run_tests(options)
 
-    except Fail, obj:
-        print "FAILED %s" % obj.getMsg();
+    except Fail as obj:
+        print("FAILED %s" % obj.getMsg())
         sys.exit(-1)
 
     sys.exit(0)
index 5f8501c..5cbba70 100644 (file)
@@ -17,7 +17,7 @@ static int perf_evsel__test_field(struct perf_evsel *evsel, const char *name,
                return -1;
        }
 
-       is_signed = !!(field->flags | TEP_FIELD_IS_SIGNED);
+       is_signed = !!(field->flags & TEP_FIELD_IS_SIGNED);
        if (should_be_signed && !is_signed) {
                pr_debug("%s: \"%s\" signedness(%d) is wrong, should be %d\n",
                         evsel->name, name, is_signed, should_be_signed);
index 1d00e5e..82e16bf 100644 (file)
@@ -224,20 +224,24 @@ static unsigned int annotate_browser__refresh(struct ui_browser *browser)
        return ret;
 }
 
-static int disasm__cmp(struct annotation_line *a, struct annotation_line *b)
+static double disasm__cmp(struct annotation_line *a, struct annotation_line *b,
+                                                 int percent_type)
 {
        int i;
 
        for (i = 0; i < a->data_nr; i++) {
-               if (a->data[i].percent == b->data[i].percent)
+               if (a->data[i].percent[percent_type] == b->data[i].percent[percent_type])
                        continue;
-               return a->data[i].percent < b->data[i].percent;
+               return a->data[i].percent[percent_type] -
+                          b->data[i].percent[percent_type];
        }
        return 0;
 }
 
-static void disasm_rb_tree__insert(struct rb_root *root, struct annotation_line *al)
+static void disasm_rb_tree__insert(struct annotate_browser *browser,
+                               struct annotation_line *al)
 {
+       struct rb_root *root = &browser->entries;
        struct rb_node **p = &root->rb_node;
        struct rb_node *parent = NULL;
        struct annotation_line *l;
@@ -246,7 +250,7 @@ static void disasm_rb_tree__insert(struct rb_root *root, struct annotation_line
                parent = *p;
                l = rb_entry(parent, struct annotation_line, rb_node);
 
-               if (disasm__cmp(al, l))
+               if (disasm__cmp(al, l, browser->opts->percent_type) < 0)
                        p = &(*p)->rb_left;
                else
                        p = &(*p)->rb_right;
@@ -329,7 +333,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser,
                        RB_CLEAR_NODE(&pos->al.rb_node);
                        continue;
                }
-               disasm_rb_tree__insert(&browser->entries, &pos->al);
+               disasm_rb_tree__insert(browser, &pos->al);
        }
        pthread_mutex_unlock(&notes->lock);
 
index 8951250..39c0004 100644 (file)
@@ -160,7 +160,7 @@ getBPFObjectFromModule(llvm::Module *Module)
        }
        PM.run(*Module);
 
-       return std::move(Buffer);
+       return Buffer;
 }
 
 }
index 1ccbd33..383674f 100644 (file)
@@ -134,7 +134,12 @@ struct cpu_map *cpu_map__new(const char *cpu_list)
        if (!cpu_list)
                return cpu_map__read_all_cpu_map();
 
-       if (!isdigit(*cpu_list))
+       /*
+        * must handle the case of empty cpumap to cover
+        * TOPOLOGY header for NUMA nodes with no CPU
+        * ( e.g., because of CPU hotplug)
+        */
+       if (!isdigit(*cpu_list) && *cpu_list != '\0')
                goto out;
 
        while (isdigit(*cpu_list)) {
@@ -181,8 +186,10 @@ struct cpu_map *cpu_map__new(const char *cpu_list)
 
        if (nr_cpus > 0)
                cpus = cpu_map__trim_new(nr_cpus, tmp_cpus);
-       else
+       else if (*cpu_list != '\0')
                cpus = cpu_map__default_new();
+       else
+               cpus = cpu_map__dummy_new();
 invalid:
        free(tmp_cpus);
 out:
index 93f74d8..42c3e5a 100644 (file)
@@ -28,7 +28,7 @@ struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
 static char mem_loads_name[100];
 static bool mem_loads_name__init;
 
-char *perf_mem_events__name(int i)
+char * __weak perf_mem_events__name(int i)
 {
        if (i == PERF_MEM_EVENTS__LOAD) {
                if (!mem_loads_name__init) {
index 8975895..ea523d3 100644 (file)
@@ -391,8 +391,10 @@ void ordered_events__free(struct ordered_events *oe)
         * Current buffer might not have all the events allocated
         * yet, we need to free only allocated ones ...
         */
-       list_del(&oe->buffer->list);
-       ordered_events_buffer__free(oe->buffer, oe->buffer_idx, oe);
+       if (oe->buffer) {
+               list_del(&oe->buffer->list);
+               ordered_events_buffer__free(oe->buffer, oe->buffer_idx, oe);
+       }
 
        /* ... and continue with the rest */
        list_for_each_entry_safe(buffer, tmp, &oe->to_free, list) {
index 63f758c..64d1f36 100644 (file)
@@ -17,6 +17,8 @@ if cc == "clang":
             vars[var] = sub("-mcet", "", vars[var])
         if not clang_has_option("-fcf-protection"):
             vars[var] = sub("-fcf-protection", "", vars[var])
+        if not clang_has_option("-fstack-clash-protection"):
+            vars[var] = sub("-fstack-clash-protection", "", vars[var])
 
 from distutils.core import setup, Extension
 
index 66a84d5..dca7dfa 100644 (file)
 #define EM_AARCH64     183  /* ARM 64 bit */
 #endif
 
+#ifndef ELF32_ST_VISIBILITY
+#define ELF32_ST_VISIBILITY(o) ((o) & 0x03)
+#endif
+
+/* For ELF64 the definitions are the same.  */
+#ifndef ELF64_ST_VISIBILITY
+#define ELF64_ST_VISIBILITY(o) ELF32_ST_VISIBILITY (o)
+#endif
+
+/* How to extract information held in the st_other field.  */
+#ifndef GELF_ST_VISIBILITY
+#define GELF_ST_VISIBILITY(val)        ELF64_ST_VISIBILITY (val)
+#endif
+
 typedef Elf64_Nhdr GElf_Nhdr;
 
 #ifdef HAVE_CPLUS_DEMANGLE_SUPPORT
@@ -87,6 +101,11 @@ static inline uint8_t elf_sym__type(const GElf_Sym *sym)
        return GELF_ST_TYPE(sym->st_info);
 }
 
+static inline uint8_t elf_sym__visibility(const GElf_Sym *sym)
+{
+       return GELF_ST_VISIBILITY(sym->st_other);
+}
+
 #ifndef STT_GNU_IFUNC
 #define STT_GNU_IFUNC 10
 #endif
@@ -111,7 +130,9 @@ static inline int elf_sym__is_label(const GElf_Sym *sym)
        return elf_sym__type(sym) == STT_NOTYPE &&
                sym->st_name != 0 &&
                sym->st_shndx != SHN_UNDEF &&
-               sym->st_shndx != SHN_ABS;
+               sym->st_shndx != SHN_ABS &&
+               elf_sym__visibility(sym) != STV_HIDDEN &&
+               elf_sym__visibility(sym) != STV_INTERNAL;
 }
 
 static bool elf_sym__filter(GElf_Sym *sym)
index 1a2bd15..400ee81 100644 (file)
@@ -10,6 +10,7 @@ TARGETS += drivers/dma-buf
 TARGETS += efivarfs
 TARGETS += exec
 TARGETS += filesystems
+TARGETS += filesystems/binderfs
 TARGETS += firmware
 TARGETS += ftrace
 TARGETS += futex
index 1973470..a29206e 100644 (file)
@@ -13,7 +13,7 @@ static inline unsigned int bpf_num_possible_cpus(void)
        unsigned int start, end, possible_cpus = 0;
        char buff[128];
        FILE *fp;
-       int n;
+       int len, n, i, j = 0;
 
        fp = fopen(fcpu, "r");
        if (!fp) {
@@ -21,17 +21,27 @@ static inline unsigned int bpf_num_possible_cpus(void)
                exit(1);
        }
 
-       while (fgets(buff, sizeof(buff), fp)) {
-               n = sscanf(buff, "%u-%u", &start, &end);
-               if (n == 0) {
-                       printf("Failed to retrieve # possible CPUs!\n");
-                       exit(1);
-               } else if (n == 1) {
-                       end = start;
+       if (!fgets(buff, sizeof(buff), fp)) {
+               printf("Failed to read %s!\n", fcpu);
+               exit(1);
+       }
+
+       len = strlen(buff);
+       for (i = 0; i <= len; i++) {
+               if (buff[i] == ',' || buff[i] == '\0') {
+                       buff[i] = '\0';
+                       n = sscanf(&buff[j], "%u-%u", &start, &end);
+                       if (n <= 0) {
+                               printf("Failed to retrieve # possible CPUs!\n");
+                               exit(1);
+                       } else if (n == 1) {
+                               end = start;
+                       }
+                       possible_cpus += end - start + 1;
+                       j = i + 1;
                }
-               possible_cpus = start == 0 ? end + 1 : 0;
-               break;
        }
+
        fclose(fp);
 
        return possible_cpus;
index bbcacba..02d3143 100644 (file)
@@ -1902,13 +1902,12 @@ static struct btf_raw_test raw_tests[] = {
 },
 
 {
-       .descr = "func proto (CONST=>TYPEDEF=>FUNC_PROTO)",
+       .descr = "func proto (TYPEDEF=>FUNC_PROTO)",
        .raw_types = {
                BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
                BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),               /* [2] */
-               BTF_CONST_ENC(4),                               /* [3] */
-               BTF_TYPEDEF_ENC(NAME_TBD, 5),                   /* [4] */
-               BTF_FUNC_PROTO_ENC(0, 2),                       /* [5] */
+               BTF_TYPEDEF_ENC(NAME_TBD, 4),                   /* [3] */
+               BTF_FUNC_PROTO_ENC(0, 2),                       /* [4] */
                        BTF_FUNC_PROTO_ARG_ENC(0, 1),
                        BTF_FUNC_PROTO_ARG_ENC(0, 2),
                BTF_END_RAW,
@@ -1922,8 +1921,6 @@ static struct btf_raw_test raw_tests[] = {
        .key_type_id = 1,
        .value_type_id = 1,
        .max_entries = 4,
-       .btf_load_err = true,
-       .err_str = "Invalid type_id",
 },
 
 {
index bab13dd..0d26b5e 100755 (executable)
@@ -37,6 +37,10 @@ prerequisite()
                exit $ksft_skip
        fi
 
+       present_cpus=`cat $SYSFS/devices/system/cpu/present`
+       present_max=${present_cpus##*-}
+       echo "present_cpus = $present_cpus present_max = $present_max"
+
        echo -e "\t Cpus in online state: $online_cpus"
 
        offline_cpus=`cat $SYSFS/devices/system/cpu/offline`
@@ -151,6 +155,8 @@ online_cpus=0
 online_max=0
 offline_cpus=0
 offline_max=0
+present_cpus=0
+present_max=0
 
 while getopts e:ahp: opt; do
        case $opt in
@@ -190,9 +196,10 @@ if [ $allcpus -eq 0 ]; then
        online_cpu_expect_success $online_max
 
        if [[ $offline_cpus -gt 0 ]]; then
-               echo -e "\t offline to online to offline: cpu $offline_max"
-               online_cpu_expect_success $offline_max
-               offline_cpu_expect_success $offline_max
+               echo -e "\t offline to online to offline: cpu $present_max"
+               online_cpu_expect_success $present_max
+               offline_cpu_expect_success $present_max
+               online_cpu $present_max
        fi
        exit 0
 else
index f1922bf..4d5b880 100755 (executable)
@@ -9,11 +9,11 @@ lib_dir=$(dirname $0)/../../../../net/forwarding
 
 ALL_TESTS="single_mask_test identical_filters_test two_masks_test \
        multiple_masks_test ctcam_edge_cases_test delta_simple_test \
-       delta_two_masks_one_key_test bloom_simple_test \
-       bloom_complex_test bloom_delta_test"
+       delta_two_masks_one_key_test delta_simple_rehash_test \
+       bloom_simple_test bloom_complex_test bloom_delta_test"
 NUM_NETIFS=2
 source $lib_dir/tc_common.sh
-source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
 
 tcflags="skip_hw"
 
@@ -494,6 +494,77 @@ delta_two_masks_one_key_test()
        log_test "delta two masks one key test ($tcflags)"
 }
 
+delta_simple_rehash_test()
+{
+       RET=0
+
+       if [[ "$tcflags" != "skip_sw" ]]; then
+               return 0;
+       fi
+
+       devlink dev param set $DEVLINK_DEV \
+               name acl_region_rehash_interval cmode runtime value 0
+       check_err $? "Failed to set ACL region rehash interval"
+
+       tp_record_all mlxsw:mlxsw_sp_acl_tcam_vregion_rehash 7
+       tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_rehash
+       check_fail $? "Rehash trace was hit even when rehash should be disabled"
+
+       devlink dev param set $DEVLINK_DEV \
+               name acl_region_rehash_interval cmode runtime value 3000
+       check_err $? "Failed to set ACL region rehash interval"
+
+       sleep 1
+
+       tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+               $tcflags dst_ip 192.0.1.0/25 action drop
+       tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+               $tcflags dst_ip 192.0.2.2 action drop
+       tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+               $tcflags dst_ip 192.0.3.0/24 action drop
+
+       $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "dev $h2 ingress" 101 1
+       check_fail $? "Matched a wrong filter"
+
+       tc_check_packets "dev $h2 ingress" 103 1
+       check_fail $? "Matched a wrong filter"
+
+       tc_check_packets "dev $h2 ingress" 102 1
+       check_err $? "Did not match on correct filter"
+
+       tp_record_all mlxsw:* 3
+       tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_rehash
+       check_err $? "Rehash trace was not hit"
+       tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_migrate
+       check_err $? "Migrate trace was not hit"
+       tp_record_all mlxsw:* 3
+       tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_rehash
+       check_err $? "Rehash trace was not hit"
+       tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_migrate
+       check_fail $? "Migrate trace was hit when no migration should happen"
+
+       $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "dev $h2 ingress" 101 1
+       check_fail $? "Matched a wrong filter after rehash"
+
+       tc_check_packets "dev $h2 ingress" 103 1
+       check_fail $? "Matched a wrong filter after rehash"
+
+       tc_check_packets "dev $h2 ingress" 102 2
+       check_err $? "Did not match on correct filter after rehash"
+
+       tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+       tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+       tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+       log_test "delta simple rehash test ($tcflags)"
+}
+
 bloom_simple_test()
 {
        # Bloom filter requires that the eRP table is used. This test
index a0a80e1..e7ffc79 100755 (executable)
@@ -2,7 +2,6 @@
 # SPDX-License-Identifier: GPL-2.0
 
 NUM_NETIFS=6
-source ../../../../net/forwarding/lib.sh
 source ../../../../net/forwarding/tc_common.sh
 source devlink_lib_spectrum.sh
 
diff --git a/tools/testing/selftests/filesystems/binderfs/.gitignore b/tools/testing/selftests/filesystems/binderfs/.gitignore
new file mode 100644 (file)
index 0000000..8a5d9bf
--- /dev/null
@@ -0,0 +1 @@
+binderfs_test
diff --git a/tools/testing/selftests/filesystems/binderfs/Makefile b/tools/testing/selftests/filesystems/binderfs/Makefile
new file mode 100644 (file)
index 0000000..58cb659
--- /dev/null
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+
+CFLAGS += -I../../../../../usr/include/
+TEST_GEN_PROGS := binderfs_test
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
new file mode 100644 (file)
index 0000000..8c2ed96
--- /dev/null
@@ -0,0 +1,275 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <linux/android/binder.h>
+#include <linux/android/binderfs.h>
+#include "../../kselftest.h"
+
+static ssize_t write_nointr(int fd, const void *buf, size_t count)
+{
+       ssize_t ret;
+again:
+       ret = write(fd, buf, count);
+       if (ret < 0 && errno == EINTR)
+               goto again;
+
+       return ret;
+}
+
+static void write_to_file(const char *filename, const void *buf, size_t count,
+                         int allowed_errno)
+{
+       int fd, saved_errno;
+       ssize_t ret;
+
+       fd = open(filename, O_WRONLY | O_CLOEXEC);
+       if (fd < 0)
+               ksft_exit_fail_msg("%s - Failed to open file %s\n",
+                                  strerror(errno), filename);
+
+       ret = write_nointr(fd, buf, count);
+       if (ret < 0) {
+               if (allowed_errno && (errno == allowed_errno)) {
+                       close(fd);
+                       return;
+               }
+
+               goto on_error;
+       }
+
+       if ((size_t)ret != count)
+               goto on_error;
+
+       close(fd);
+       return;
+
+on_error:
+       saved_errno = errno;
+       close(fd);
+       errno = saved_errno;
+
+       if (ret < 0)
+               ksft_exit_fail_msg("%s - Failed to write to file %s\n",
+                                  strerror(errno), filename);
+
+       ksft_exit_fail_msg("Failed to write to file %s\n", filename);
+}
+
+static void change_to_userns(void)
+{
+       int ret;
+       uid_t uid;
+       gid_t gid;
+       /* {g,u}id_map files only allow a max of 4096 bytes written to them */
+       char idmap[4096];
+
+       uid = getuid();
+       gid = getgid();
+
+       ret = unshare(CLONE_NEWUSER);
+       if (ret < 0)
+               ksft_exit_fail_msg("%s - Failed to unshare user namespace\n",
+                                  strerror(errno));
+
+       write_to_file("/proc/self/setgroups", "deny", strlen("deny"), ENOENT);
+
+       ret = snprintf(idmap, sizeof(idmap), "0 %d 1", uid);
+       if (ret < 0 || (size_t)ret >= sizeof(idmap))
+               ksft_exit_fail_msg("%s - Failed to prepare uid mapping\n",
+                                  strerror(errno));
+
+       write_to_file("/proc/self/uid_map", idmap, strlen(idmap), 0);
+
+       ret = snprintf(idmap, sizeof(idmap), "0 %d 1", gid);
+       if (ret < 0 || (size_t)ret >= sizeof(idmap))
+               ksft_exit_fail_msg("%s - Failed to prepare uid mapping\n",
+                                  strerror(errno));
+
+       write_to_file("/proc/self/gid_map", idmap, strlen(idmap), 0);
+
+       ret = setgid(0);
+       if (ret)
+               ksft_exit_fail_msg("%s - Failed to setgid(0)\n",
+                                  strerror(errno));
+
+       ret = setuid(0);
+       if (ret)
+               ksft_exit_fail_msg("%s - Failed to setgid(0)\n",
+                                  strerror(errno));
+}
+
+static void change_to_mountns(void)
+{
+       int ret;
+
+       ret = unshare(CLONE_NEWNS);
+       if (ret < 0)
+               ksft_exit_fail_msg("%s - Failed to unshare mount namespace\n",
+                                  strerror(errno));
+
+       ret = mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0);
+       if (ret < 0)
+               ksft_exit_fail_msg("%s - Failed to mount / as private\n",
+                                  strerror(errno));
+}
+
+static void rmdir_protect_errno(const char *dir)
+{
+       int saved_errno = errno;
+       (void)rmdir(dir);
+       errno = saved_errno;
+}
+
+static void __do_binderfs_test(void)
+{
+       int fd, ret, saved_errno;
+       size_t len;
+       ssize_t wret;
+       bool keep = false;
+       struct binderfs_device device = { 0 };
+       struct binder_version version = { 0 };
+
+       change_to_mountns();
+
+       ret = mkdir("/dev/binderfs", 0755);
+       if (ret < 0) {
+               if (errno != EEXIST)
+                       ksft_exit_fail_msg(
+                               "%s - Failed to create binderfs mountpoint\n",
+                               strerror(errno));
+
+               keep = true;
+       }
+
+       ret = mount(NULL, "/dev/binderfs", "binder", 0, 0);
+       if (ret < 0) {
+               if (errno != ENODEV)
+                       ksft_exit_fail_msg("%s - Failed to mount binderfs\n",
+                                          strerror(errno));
+
+               keep ? : rmdir_protect_errno("/dev/binderfs");
+               ksft_exit_skip(
+                       "The Android binderfs filesystem is not available\n");
+       }
+
+       /* binderfs mount test passed */
+       ksft_inc_pass_cnt();
+
+       memcpy(device.name, "my-binder", strlen("my-binder"));
+
+       fd = open("/dev/binderfs/binder-control", O_RDONLY | O_CLOEXEC);
+       if (fd < 0)
+               ksft_exit_fail_msg(
+                       "%s - Failed to open binder-control device\n",
+                       strerror(errno));
+
+       ret = ioctl(fd, BINDER_CTL_ADD, &device);
+       saved_errno = errno;
+       close(fd);
+       errno = saved_errno;
+       if (ret < 0) {
+               keep ? : rmdir_protect_errno("/dev/binderfs");
+               ksft_exit_fail_msg(
+                       "%s - Failed to allocate new binder device\n",
+                       strerror(errno));
+       }
+
+       ksft_print_msg(
+               "Allocated new binder device with major %d, minor %d, and name %s\n",
+               device.major, device.minor, device.name);
+
+       /* binder device allocation test passed */
+       ksft_inc_pass_cnt();
+
+       fd = open("/dev/binderfs/my-binder", O_CLOEXEC | O_RDONLY);
+       if (fd < 0) {
+               keep ? : rmdir_protect_errno("/dev/binderfs");
+               ksft_exit_fail_msg("%s - Failed to open my-binder device\n",
+                                  strerror(errno));
+       }
+
+       ret = ioctl(fd, BINDER_VERSION, &version);
+       saved_errno = errno;
+       close(fd);
+       errno = saved_errno;
+       if (ret < 0) {
+               keep ? : rmdir_protect_errno("/dev/binderfs");
+               ksft_exit_fail_msg(
+                       "%s - Failed to open perform BINDER_VERSION request\n",
+                       strerror(errno));
+       }
+
+       ksft_print_msg("Detected binder version: %d\n",
+                      version.protocol_version);
+
+       /* binder transaction with binderfs binder device passed */
+       ksft_inc_pass_cnt();
+
+       ret = unlink("/dev/binderfs/my-binder");
+       if (ret < 0) {
+               keep ? : rmdir_protect_errno("/dev/binderfs");
+               ksft_exit_fail_msg("%s - Failed to delete binder device\n",
+                                  strerror(errno));
+       }
+
+       /* binder device removal passed */
+       ksft_inc_pass_cnt();
+
+       ret = unlink("/dev/binderfs/binder-control");
+       if (!ret) {
+               keep ? : rmdir_protect_errno("/dev/binderfs");
+               ksft_exit_fail_msg("Managed to delete binder-control device\n");
+       } else if (errno != EPERM) {
+               keep ? : rmdir_protect_errno("/dev/binderfs");
+               ksft_exit_fail_msg(
+                       "%s - Failed to delete binder-control device but exited with unexpected error code\n",
+                       strerror(errno));
+       }
+
+       /* binder-control device removal failed as expected */
+       ksft_inc_xfail_cnt();
+
+on_error:
+       ret = umount2("/dev/binderfs", MNT_DETACH);
+       keep ?: rmdir_protect_errno("/dev/binderfs");
+       if (ret < 0)
+               ksft_exit_fail_msg("%s - Failed to unmount binderfs\n",
+                                  strerror(errno));
+
+       /* binderfs unmount test passed */
+       ksft_inc_pass_cnt();
+}
+
+static void binderfs_test_privileged()
+{
+       if (geteuid() != 0)
+               ksft_print_msg(
+                       "Tests are not run as root. Skipping privileged tests\n");
+       else
+               __do_binderfs_test();
+}
+
+static void binderfs_test_unprivileged()
+{
+       change_to_userns();
+       __do_binderfs_test();
+}
+
+int main(int argc, char *argv[])
+{
+       binderfs_test_privileged();
+       binderfs_test_unprivileged();
+       ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/filesystems/binderfs/config b/tools/testing/selftests/filesystems/binderfs/config
new file mode 100644 (file)
index 0000000..02dd6cc
--- /dev/null
@@ -0,0 +1,3 @@
+CONFIG_ANDROID=y
+CONFIG_ANDROID_BINDERFS=y
+CONFIG_ANDROID_BINDER_IPC=y
index f4ba8eb..ad06489 100644 (file)
@@ -1,5 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 TEST_PROGS := ir_loopback.sh
 TEST_GEN_PROGS_EXTENDED := ir_loopback
+APIDIR := ../../../include/uapi
+CFLAGS += -Wall -O2 -I$(APIDIR)
 
 include ../lib.mk
index f8f3e90..1e6d14d 100644 (file)
@@ -21,6 +21,6 @@ TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
 KSFT_KHDR_INSTALL := 1
 include ../lib.mk
 
-$(OUTPUT)/reuseport_bpf_numa: LDFLAGS += -lnuma
+$(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma
 $(OUTPUT)/tcp_mmap: LDFLAGS += -lpthread
 $(OUTPUT)/tcp_inq: LDFLAGS += -lpthread
index 5cd2aed..da96eff 100644 (file)
@@ -10,3 +10,5 @@ CONFIG_NET_CLS_FLOWER=m
 CONFIG_NET_SCH_INGRESS=m
 CONFIG_NET_ACT_GACT=m
 CONFIG_VETH=m
+CONFIG_NAMESPACES=y
+CONFIG_NET_NS=y
index 47ed6ce..c9ff2b4 100644 (file)
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 # Makefile for netfilter selftests
 
-TEST_PROGS := nft_trans_stress.sh
+TEST_PROGS := nft_trans_stress.sh nft_nat.sh
 
 include ../lib.mk
diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh
new file mode 100755 (executable)
index 0000000..8ec7668
--- /dev/null
@@ -0,0 +1,762 @@
+#!/bin/bash
+#
+# This test is for basic NAT functionality: snat, dnat, redirect, masquerade.
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+       echo "SKIP: Could not run test without nft tool"
+       exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+       echo "SKIP: Could not run test without ip tool"
+       exit $ksft_skip
+fi
+
+ip netns add ns0
+ip netns add ns1
+ip netns add ns2
+
+ip link add veth0 netns ns0 type veth peer name eth0 netns ns1
+ip link add veth1 netns ns0 type veth peer name eth0 netns ns2
+
+ip -net ns0 link set lo up
+ip -net ns0 link set veth0 up
+ip -net ns0 addr add 10.0.1.1/24 dev veth0
+ip -net ns0 addr add dead:1::1/64 dev veth0
+
+ip -net ns0 link set veth1 up
+ip -net ns0 addr add 10.0.2.1/24 dev veth1
+ip -net ns0 addr add dead:2::1/64 dev veth1
+
+for i in 1 2; do
+  ip -net ns$i link set lo up
+  ip -net ns$i link set eth0 up
+  ip -net ns$i addr add 10.0.$i.99/24 dev eth0
+  ip -net ns$i route add default via 10.0.$i.1
+  ip -net ns$i addr add dead:$i::99/64 dev eth0
+  ip -net ns$i route add default via dead:$i::1
+done
+
+bad_counter()
+{
+       local ns=$1
+       local counter=$2
+       local expect=$3
+
+       echo "ERROR: $counter counter in $ns has unexpected value (expected $expect)" 1>&2
+       ip netns exec $ns nft list counter inet filter $counter 1>&2
+}
+
+check_counters()
+{
+       ns=$1
+       local lret=0
+
+       cnt=$(ip netns exec $ns nft list counter inet filter ns0in | grep -q "packets 1 bytes 84")
+       if [ $? -ne 0 ]; then
+               bad_counter $ns ns0in "packets 1 bytes 84"
+               lret=1
+       fi
+       cnt=$(ip netns exec $ns nft list counter inet filter ns0out | grep -q "packets 1 bytes 84")
+       if [ $? -ne 0 ]; then
+               bad_counter $ns ns0out "packets 1 bytes 84"
+               lret=1
+       fi
+
+       expect="packets 1 bytes 104"
+       cnt=$(ip netns exec $ns nft list counter inet filter ns0in6 | grep -q "$expect")
+       if [ $? -ne 0 ]; then
+               bad_counter $ns ns0in6 "$expect"
+               lret=1
+       fi
+       cnt=$(ip netns exec $ns nft list counter inet filter ns0out6 | grep -q "$expect")
+       if [ $? -ne 0 ]; then
+               bad_counter $ns ns0out6 "$expect"
+               lret=1
+       fi
+
+       return $lret
+}
+
+check_ns0_counters()
+{
+       local ns=$1
+       local lret=0
+
+       cnt=$(ip netns exec ns0 nft list counter inet filter ns0in | grep -q "packets 0 bytes 0")
+       if [ $? -ne 0 ]; then
+               bad_counter ns0 ns0in "packets 0 bytes 0"
+               lret=1
+       fi
+
+       cnt=$(ip netns exec ns0 nft list counter inet filter ns0in6 | grep -q "packets 0 bytes 0")
+       if [ $? -ne 0 ]; then
+               bad_counter ns0 ns0in6 "packets 0 bytes 0"
+               lret=1
+       fi
+
+       cnt=$(ip netns exec ns0 nft list counter inet filter ns0out | grep -q "packets 0 bytes 0")
+       if [ $? -ne 0 ]; then
+               bad_counter ns0 ns0out "packets 0 bytes 0"
+               lret=1
+       fi
+       cnt=$(ip netns exec ns0 nft list counter inet filter ns0out6 | grep -q "packets 0 bytes 0")
+       if [ $? -ne 0 ]; then
+               bad_counter ns0 ns0out6 "packets 0 bytes 0"
+               lret=1
+       fi
+
+       for dir in "in" "out" ; do
+               expect="packets 1 bytes 84"
+               cnt=$(ip netns exec ns0 nft list counter inet filter ${ns}${dir} | grep -q "$expect")
+               if [ $? -ne 0 ]; then
+                       bad_counter ns0 $ns$dir "$expect"
+                       lret=1
+               fi
+
+               expect="packets 1 bytes 104"
+               cnt=$(ip netns exec ns0 nft list counter inet filter ${ns}${dir}6 | grep -q "$expect")
+               if [ $? -ne 0 ]; then
+                       bad_counter ns0 $ns$dir6 "$expect"
+                       lret=1
+               fi
+       done
+
+       return $lret
+}
+
+reset_counters()
+{
+       for i in 0 1 2;do
+               ip netns exec ns$i nft reset counters inet > /dev/null
+       done
+}
+
+test_local_dnat6()
+{
+       local lret=0
+ip netns exec ns0 nft -f - <<EOF
+table ip6 nat {
+       chain output {
+               type nat hook output priority 0; policy accept;
+               ip6 daddr dead:1::99 dnat to dead:2::99
+       }
+}
+EOF
+       if [ $? -ne 0 ]; then
+               echo "SKIP: Could not add add ip6 dnat hook"
+               return $ksft_skip
+       fi
+
+       # ping netns1, expect rewrite to netns2
+       ip netns exec ns0 ping -q -c 1 dead:1::99 > /dev/null
+       if [ $? -ne 0 ]; then
+               lret=1
+               echo "ERROR: ping6 failed"
+               return $lret
+       fi
+
+       expect="packets 0 bytes 0"
+       for dir in "in6" "out6" ; do
+               cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect")
+               if [ $? -ne 0 ]; then
+                       bad_counter ns0 ns1$dir "$expect"
+                       lret=1
+               fi
+       done
+
+       expect="packets 1 bytes 104"
+       for dir in "in6" "out6" ; do
+               cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect")
+               if [ $? -ne 0 ]; then
+                       bad_counter ns0 ns2$dir "$expect"
+                       lret=1
+               fi
+       done
+
+       # expect 0 count in ns1
+       expect="packets 0 bytes 0"
+       for dir in "in6" "out6" ; do
+               cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect")
+               if [ $? -ne 0 ]; then
+                       bad_counter ns1 ns0$dir "$expect"
+                       lret=1
+               fi
+       done
+
+       # expect 1 packet in ns2
+       expect="packets 1 bytes 104"
+       for dir in "in6" "out6" ; do
+               cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect")
+               if [ $? -ne 0 ]; then
+                       bad_counter ns2 ns0$dir "$expect"
+                       lret=1
+               fi
+       done
+
+       test $lret -eq 0 && echo "PASS: ipv6 ping to ns1 was NATted to ns2"
+       ip netns exec ns0 nft flush chain ip6 nat output
+
+       return $lret
+}
+
+test_local_dnat()
+{
+       local lret=0
+ip netns exec ns0 nft -f - <<EOF
+table ip nat {
+       chain output {
+               type nat hook output priority 0; policy accept;
+               ip daddr 10.0.1.99 dnat to 10.0.2.99
+       }
+}
+EOF
+       # ping netns1, expect rewrite to netns2
+       ip netns exec ns0 ping -q -c 1 10.0.1.99 > /dev/null
+       if [ $? -ne 0 ]; then
+               lret=1
+               echo "ERROR: ping failed"
+               return $lret
+       fi
+
+       expect="packets 0 bytes 0"
+       for dir in "in" "out" ; do
+               cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect")
+               if [ $? -ne 0 ]; then
+                       bad_counter ns0 ns1$dir "$expect"
+                       lret=1
+               fi
+       done
+
+       expect="packets 1 bytes 84"
+       for dir in "in" "out" ; do
+               cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect")
+               if [ $? -ne 0 ]; then
+                       bad_counter ns0 ns2$dir "$expect"
+                       lret=1
+               fi
+       done
+
+       # expect 0 count in ns1
+       expect="packets 0 bytes 0"
+       for dir in "in" "out" ; do
+               cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect")
+               if [ $? -ne 0 ]; then
+                       bad_counter ns1 ns0$dir "$expect"
+                       lret=1
+               fi
+       done
+
+       # expect 1 packet in ns2
+       expect="packets 1 bytes 84"
+       for dir in "in" "out" ; do
+               cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect")
+               if [ $? -ne 0 ]; then
+                       bad_counter ns2 ns0$dir "$expect"
+                       lret=1
+               fi
+       done
+
+       test $lret -eq 0 && echo "PASS: ping to ns1 was NATted to ns2"
+
+       ip netns exec ns0 nft flush chain ip nat output
+
+       reset_counters
+       ip netns exec ns0 ping -q -c 1 10.0.1.99 > /dev/null
+       if [ $? -ne 0 ]; then
+               lret=1
+               echo "ERROR: ping failed"
+               return $lret
+       fi
+
+       expect="packets 1 bytes 84"
+       for dir in "in" "out" ; do
+               cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect")
+               if [ $? -ne 0 ]; then
+                       bad_counter ns1 ns1$dir "$expect"
+                       lret=1
+               fi
+       done
+       expect="packets 0 bytes 0"
+       for dir in "in" "out" ; do
+               cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect")
+               if [ $? -ne 0 ]; then
+                       bad_counter ns0 ns2$dir "$expect"
+                       lret=1
+               fi
+       done
+
+       # expect 1 count in ns1
+       expect="packets 1 bytes 84"
+       for dir in "in" "out" ; do
+               cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect")
+               if [ $? -ne 0 ]; then
+                       bad_counter ns0 ns0$dir "$expect"
+                       lret=1
+               fi
+       done
+
+       # expect 0 packet in ns2
+       expect="packets 0 bytes 0"
+       for dir in "in" "out" ; do
+               cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect")
+               if [ $? -ne 0 ]; then
+                       bad_counter ns2 ns2$dir "$expect"
+                       lret=1
+               fi
+       done
+
+       test $lret -eq 0 && echo "PASS: ping to ns1 OK after nat output chain flush"
+
+       return $lret
+}
+
+
+test_masquerade6()
+{
+       local lret=0
+
+       ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+
+       ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
+       if [ $? -ne 0 ] ; then
+               echo "ERROR: cannot ping ns1 from ns2 via ipv6"
+               return 1
+               lret=1
+       fi
+
+       expect="packets 1 bytes 104"
+       for dir in "in6" "out6" ; do
+               cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+               if [ $? -ne 0 ]; then
+                       bad_counter ns1 ns2$dir "$expect"
+                       lret=1
+               fi
+
+               cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect")
+               if [ $? -ne 0 ]; then
+                       bad_counter ns2 ns1$dir "$expect"
+                       lret=1
+               fi
+       done
+
+       reset_counters
+
+# add masquerading rule
+ip netns exec ns0 nft -f - <<EOF
+table ip6 nat {
+       chain postrouting {
+               type nat hook postrouting priority 0; policy accept;
+               meta oif veth0 masquerade
+       }
+}
+EOF
+       ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
+       if [ $? -ne 0 ] ; then
+               echo "ERROR: cannot ping ns1 from ns2 with active ipv6 masquerading"
+               lret=1
+       fi
+
+       # ns1 should have seen packets from ns0, due to masquerade
+       expect="packets 1 bytes 104"
+       for dir in "in6" "out6" ; do
+
+               cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect")
+               if [ $? -ne 0 ]; then
+                       bad_counter ns1 ns0$dir "$expect"
+                       lret=1
+               fi
+
+               cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect")
+               if [ $? -ne 0 ]; then
+                       bad_counter ns2 ns1$dir "$expect"
+                       lret=1
+               fi
+       done
+
+       # ns1 should not have seen packets from ns2, due to masquerade
+       expect="packets 0 bytes 0"
+       for dir in "in6" "out6" ; do
+               cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+               if [ $? -ne 0 ]; then
+                       bad_counter ns1 ns0$dir "$expect"
+                       lret=1
+               fi
+
+               cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+               if [ $? -ne 0 ]; then
+                       bad_counter ns2 ns1$dir "$expect"
+                       lret=1
+               fi
+       done
+
+       ip netns exec ns0 nft flush chain ip6 nat postrouting
+       if [ $? -ne 0 ]; then
+               echo "ERROR: Could not flush ip6 nat postrouting" 1>&2
+               lret=1
+       fi
+
+       test $lret -eq 0 && echo "PASS: IPv6 masquerade for ns2"
+
+       return $lret
+}
+
+test_masquerade()
+{
+       local lret=0
+
+       ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+       ip netns exec ns0 sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+
+       ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
+       if [ $? -ne 0 ] ; then
+               echo "ERROR: canot ping ns1 from ns2"
+               lret=1
+       fi
+
+       expect="packets 1 bytes 84"
+       for dir in "in" "out" ; do
+               cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+               if [ $? -ne 0 ]; then
+                       bad_counter ns1 ns2$dir "$expect"
+                       lret=1
+               fi
+
+               cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect")
+               if [ $? -ne 0 ]; then
+                       bad_counter ns2 ns1$dir "$expect"
+                       lret=1
+               fi
+       done
+
+       reset_counters
+
+# add masquerading rule
+ip netns exec ns0 nft -f - <<EOF
+table ip nat {
+       chain postrouting {
+               type nat hook postrouting priority 0; policy accept;
+               meta oif veth0 masquerade
+       }
+}
+EOF
+       ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
+       if [ $? -ne 0 ] ; then
+               echo "ERROR: cannot ping ns1 from ns2 with active ip masquerading"
+               lret=1
+       fi
+
+       # ns1 should have seen packets from ns0, due to masquerade
+       expect="packets 1 bytes 84"
+       for dir in "in" "out" ; do
+               cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect")
+               if [ $? -ne 0 ]; then
+                       bad_counter ns1 ns0$dir "$expect"
+                       lret=1
+               fi
+
+               cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect")
+               if [ $? -ne 0 ]; then
+                       bad_counter ns2 ns1$dir "$expect"
+                       lret=1
+               fi
+       done
+
+       # ns1 should not have seen packets from ns2, due to masquerade
+       expect="packets 0 bytes 0"
+       for dir in "in" "out" ; do
+               cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+               if [ $? -ne 0 ]; then
+                       bad_counter ns1 ns0$dir "$expect"
+                       lret=1
+               fi
+
+               cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+               if [ $? -ne 0 ]; then
+                       bad_counter ns2 ns1$dir "$expect"
+                       lret=1
+               fi
+       done
+
+       ip netns exec ns0 nft flush chain ip nat postrouting
+       if [ $? -ne 0 ]; then
+               echo "ERROR: Could not flush nat postrouting" 1>&2
+               lret=1
+       fi
+
+       test $lret -eq 0 && echo "PASS: IP masquerade for ns2"
+
+       return $lret
+}
+
+test_redirect6()
+{
+       local lret=0
+
+       ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+
+       ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
+       if [ $? -ne 0 ] ; then
+               echo "ERROR: cannnot ping ns1 from ns2 via ipv6"
+               lret=1
+       fi
+
+       expect="packets 1 bytes 104"
+       for dir in "in6" "out6" ; do
+               cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+               if [ $? -ne 0 ]; then
+                       bad_counter ns1 ns2$dir "$expect"
+                       lret=1
+               fi
+
+               cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect")
+               if [ $? -ne 0 ]; then
+                       bad_counter ns2 ns1$dir "$expect"
+                       lret=1
+               fi
+       done
+
+       reset_counters
+
+# add redirect rule
+ip netns exec ns0 nft -f - <<EOF
+table ip6 nat {
+       chain prerouting {
+               type nat hook prerouting priority 0; policy accept;
+               meta iif veth1 meta l4proto icmpv6 ip6 saddr dead:2::99 ip6 daddr dead:1::99 redirect
+       }
+}
+EOF
+       ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
+       if [ $? -ne 0 ] ; then
+               echo "ERROR: cannot ping ns1 from ns2 with active ip6 redirect"
+               lret=1
+       fi
+
+       # ns1 should have seen no packets from ns2, due to redirection
+       expect="packets 0 bytes 0"
+       for dir in "in6" "out6" ; do
+               cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+               if [ $? -ne 0 ]; then
+                       bad_counter ns1 ns0$dir "$expect"
+                       lret=1
+               fi
+       done
+
+       # ns0 should have seen packets from ns2, due to masquerade
+       expect="packets 1 bytes 104"
+       for dir in "in6" "out6" ; do
+               cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect")
+               if [ $? -ne 0 ]; then
+                       bad_counter ns1 ns0$dir "$expect"
+                       lret=1
+               fi
+       done
+
+       ip netns exec ns0 nft delete table ip6 nat
+       if [ $? -ne 0 ]; then
+               echo "ERROR: Could not delete ip6 nat table" 1>&2
+               lret=1
+       fi
+
+       test $lret -eq 0 && echo "PASS: IPv6 redirection for ns2"
+
+       return $lret
+}
+
+test_redirect()
+{
+       local lret=0
+
+       ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+       ip netns exec ns0 sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+
+       ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
+       if [ $? -ne 0 ] ; then
+               echo "ERROR: cannot ping ns1 from ns2"
+               lret=1
+       fi
+
+       expect="packets 1 bytes 84"
+       for dir in "in" "out" ; do
+               cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+               if [ $? -ne 0 ]; then
+                       bad_counter ns1 ns2$dir "$expect"
+                       lret=1
+               fi
+
+               cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect")
+               if [ $? -ne 0 ]; then
+                       bad_counter ns2 ns1$dir "$expect"
+                       lret=1
+               fi
+       done
+
+       reset_counters
+
+# add redirect rule
+ip netns exec ns0 nft -f - <<EOF
+table ip nat {
+       chain prerouting {
+               type nat hook prerouting priority 0; policy accept;
+               meta iif veth1 ip protocol icmp ip saddr 10.0.2.99 ip daddr 10.0.1.99 redirect
+       }
+}
+EOF
+       ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
+       if [ $? -ne 0 ] ; then
+               echo "ERROR: cannot ping ns1 from ns2 with active ip redirect"
+               lret=1
+       fi
+
+       # ns1 should have seen no packets from ns2, due to redirection
+       expect="packets 0 bytes 0"
+       for dir in "in" "out" ; do
+
+               cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+               if [ $? -ne 0 ]; then
+                       bad_counter ns1 ns0$dir "$expect"
+                       lret=1
+               fi
+       done
+
+       # ns0 should have seen packets from ns2, due to masquerade
+       expect="packets 1 bytes 84"
+       for dir in "in" "out" ; do
+               cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect")
+               if [ $? -ne 0 ]; then
+                       bad_counter ns1 ns0$dir "$expect"
+                       lret=1
+               fi
+       done
+
+       ip netns exec ns0 nft delete table ip nat
+       if [ $? -ne 0 ]; then
+               echo "ERROR: Could not delete nat table" 1>&2
+               lret=1
+       fi
+
+       test $lret -eq 0 && echo "PASS: IP redirection for ns2"
+
+       return $lret
+}
+
+
+# ip netns exec ns0 ping -c 1 -q 10.0.$i.99
+for i in 0 1 2; do
+ip netns exec ns$i nft -f - <<EOF
+table inet filter {
+       counter ns0in {}
+       counter ns1in {}
+       counter ns2in {}
+
+       counter ns0out {}
+       counter ns1out {}
+       counter ns2out {}
+
+       counter ns0in6 {}
+       counter ns1in6 {}
+       counter ns2in6 {}
+
+       counter ns0out6 {}
+       counter ns1out6 {}
+       counter ns2out6 {}
+
+       map nsincounter {
+               type ipv4_addr : counter
+               elements = { 10.0.1.1 : "ns0in",
+                            10.0.2.1 : "ns0in",
+                            10.0.1.99 : "ns1in",
+                            10.0.2.99 : "ns2in" }
+       }
+
+       map nsincounter6 {
+               type ipv6_addr : counter
+               elements = { dead:1::1 : "ns0in6",
+                            dead:2::1 : "ns0in6",
+                            dead:1::99 : "ns1in6",
+                            dead:2::99 : "ns2in6" }
+       }
+
+       map nsoutcounter {
+               type ipv4_addr : counter
+               elements = { 10.0.1.1 : "ns0out",
+                            10.0.2.1 : "ns0out",
+                            10.0.1.99: "ns1out",
+                            10.0.2.99: "ns2out" }
+       }
+
+       map nsoutcounter6 {
+               type ipv6_addr : counter
+               elements = { dead:1::1 : "ns0out6",
+                            dead:2::1 : "ns0out6",
+                            dead:1::99 : "ns1out6",
+                            dead:2::99 : "ns2out6" }
+       }
+
+       chain input {
+               type filter hook input priority 0; policy accept;
+               counter name ip saddr map @nsincounter
+               icmpv6 type { "echo-request", "echo-reply" } counter name ip6 saddr map @nsincounter6
+       }
+       chain output {
+               type filter hook output priority 0; policy accept;
+               counter name ip daddr map @nsoutcounter
+               icmpv6 type { "echo-request", "echo-reply" } counter name ip6 daddr map @nsoutcounter6
+       }
+}
+EOF
+done
+
+sleep 3
+# test basic connectivity
+for i in 1 2; do
+  ip netns exec ns0 ping -c 1 -q 10.0.$i.99 > /dev/null
+  if [ $? -ne 0 ];then
+       echo "ERROR: Could not reach other namespace(s)" 1>&2
+       ret=1
+  fi
+
+  ip netns exec ns0 ping -c 1 -q dead:$i::99 > /dev/null
+  if [ $? -ne 0 ];then
+       echo "ERROR: Could not reach other namespace(s) via ipv6" 1>&2
+       ret=1
+  fi
+  check_counters ns$i
+  if [ $? -ne 0 ]; then
+       ret=1
+  fi
+
+  check_ns0_counters ns$i
+  if [ $? -ne 0 ]; then
+       ret=1
+  fi
+  reset_counters
+done
+
+if [ $ret -eq 0 ];then
+       echo "PASS: netns routing/connectivity: ns0 can reach ns1 and ns2"
+fi
+
+reset_counters
+test_local_dnat
+test_local_dnat6
+
+reset_counters
+test_masquerade
+test_masquerade6
+
+reset_counters
+test_redirect
+test_redirect6
+
+for i in 0 1 2; do ip netns del ns$i;done
+
+exit $ret
index 9050eee..1de8bd8 100644 (file)
@@ -9,6 +9,3 @@ all: $(TEST_PROGS)
 top_srcdir = ../../../../..
 KSFT_KHDR_INSTALL := 1
 include ../../lib.mk
-
-clean:
-       rm -fr $(TEST_GEN_FILES)
index 82121a8..29bac5e 100644 (file)
@@ -10,4 +10,5 @@
 /proc-uptime-002
 /read
 /self
+/setns-dcache
 /thread-self
index 1c12c34..434d033 100644 (file)
@@ -14,6 +14,7 @@ TEST_GEN_PROGS += proc-uptime-001
 TEST_GEN_PROGS += proc-uptime-002
 TEST_GEN_PROGS += read
 TEST_GEN_PROGS += self
+TEST_GEN_PROGS += setns-dcache
 TEST_GEN_PROGS += thread-self
 
 include ../lib.mk
diff --git a/tools/testing/selftests/proc/setns-dcache.c b/tools/testing/selftests/proc/setns-dcache.c
new file mode 100644 (file)
index 0000000..60ab197
--- /dev/null
@@ -0,0 +1,129 @@
+/*
+ * Copyright © 2019 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/*
+ * Test that setns(CLONE_NEWNET) points to new /proc/net content even
+ * if old one is in dcache.
+ *
+ * FIXME /proc/net/unix is under CONFIG_UNIX which can be disabled.
+ */
+#undef NDEBUG
+#include <assert.h>
+#include <errno.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+
+static pid_t pid = -1;
+
+static void f(void)
+{
+       if (pid > 0) {
+               kill(pid, SIGTERM);
+       }
+}
+
+int main(void)
+{
+       int fd[2];
+       char _ = 0;
+       int nsfd;
+
+       atexit(f);
+
+       /* Check for priviledges and syscall availability straight away. */
+       if (unshare(CLONE_NEWNET) == -1) {
+               if (errno == ENOSYS || errno == EPERM) {
+                       return 4;
+               }
+               return 1;
+       }
+       /* Distinguisher between two otherwise empty net namespaces. */
+       if (socket(AF_UNIX, SOCK_STREAM, 0) == -1) {
+               return 1;
+       }
+
+       if (pipe(fd) == -1) {
+               return 1;
+       }
+
+       pid = fork();
+       if (pid == -1) {
+               return 1;
+       }
+
+       if (pid == 0) {
+               if (unshare(CLONE_NEWNET) == -1) {
+                       return 1;
+               }
+
+               if (write(fd[1], &_, 1) != 1) {
+                       return 1;
+               }
+
+               pause();
+
+               return 0;
+       }
+
+       if (read(fd[0], &_, 1) != 1) {
+               return 1;
+       }
+
+       {
+               char buf[64];
+               snprintf(buf, sizeof(buf), "/proc/%u/ns/net", pid);
+               nsfd = open(buf, O_RDONLY);
+               if (nsfd == -1) {
+                       return 1;
+               }
+       }
+
+       /* Reliably pin dentry into dcache. */
+       (void)open("/proc/net/unix", O_RDONLY);
+
+       if (setns(nsfd, CLONE_NEWNET) == -1) {
+               return 1;
+       }
+
+       kill(pid, SIGTERM);
+       pid = 0;
+
+       {
+               char buf[4096];
+               ssize_t rv;
+               int fd;
+
+               fd = open("/proc/net/unix", O_RDONLY);
+               if (fd == -1) {
+                       return 1;
+               }
+
+#define S "Num       RefCount Protocol Flags    Type St Inode Path\n"
+               rv = read(fd, buf, sizeof(buf));
+
+               assert(rv == strlen(S));
+               assert(memcmp(buf, S, strlen(S)) == 0);
+       }
+
+       return 0;
+}
index 496a9a8..7e632b4 100644 (file)
@@ -1608,7 +1608,16 @@ TEST_F(TRACE_poke, getpid_runs_normally)
 #ifdef SYSCALL_NUM_RET_SHARE_REG
 # define EXPECT_SYSCALL_RETURN(val, action)    EXPECT_EQ(-1, action)
 #else
-# define EXPECT_SYSCALL_RETURN(val, action)    EXPECT_EQ(val, action)
+# define EXPECT_SYSCALL_RETURN(val, action)            \
+       do {                                            \
+               errno = 0;                              \
+               if (val < 0) {                          \
+                       EXPECT_EQ(-1, action);          \
+                       EXPECT_EQ(-(val), errno);       \
+               } else {                                \
+                       EXPECT_EQ(val, action);         \
+               }                                       \
+       } while (0)
 #endif
 
 /* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
@@ -1647,7 +1656,7 @@ int get_syscall(struct __test_metadata *_metadata, pid_t tracee)
 
 /* Architecture-specific syscall changing routine. */
 void change_syscall(struct __test_metadata *_metadata,
-                   pid_t tracee, int syscall)
+                   pid_t tracee, int syscall, int result)
 {
        int ret;
        ARCH_REGS regs;
@@ -1706,7 +1715,7 @@ void change_syscall(struct __test_metadata *_metadata,
 #ifdef SYSCALL_NUM_RET_SHARE_REG
                TH_LOG("Can't modify syscall return on this architecture");
 #else
-               regs.SYSCALL_RET = EPERM;
+               regs.SYSCALL_RET = result;
 #endif
 
 #ifdef HAVE_GETREGS
@@ -1734,14 +1743,19 @@ void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee,
        case 0x1002:
                /* change getpid to getppid. */
                EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee));
-               change_syscall(_metadata, tracee, __NR_getppid);
+               change_syscall(_metadata, tracee, __NR_getppid, 0);
                break;
        case 0x1003:
-               /* skip gettid. */
+               /* skip gettid with valid return code. */
                EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee));
-               change_syscall(_metadata, tracee, -1);
+               change_syscall(_metadata, tracee, -1, 45000);
                break;
        case 0x1004:
+               /* skip openat with error. */
+               EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee));
+               change_syscall(_metadata, tracee, -1, -ESRCH);
+               break;
+       case 0x1005:
                /* do nothing (allow getppid) */
                EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee));
                break;
@@ -1774,9 +1788,11 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
        nr = get_syscall(_metadata, tracee);
 
        if (nr == __NR_getpid)
-               change_syscall(_metadata, tracee, __NR_getppid);
+               change_syscall(_metadata, tracee, __NR_getppid, 0);
+       if (nr == __NR_gettid)
+               change_syscall(_metadata, tracee, -1, 45000);
        if (nr == __NR_openat)
-               change_syscall(_metadata, tracee, -1);
+               change_syscall(_metadata, tracee, -1, -ESRCH);
 }
 
 FIXTURE_DATA(TRACE_syscall) {
@@ -1793,8 +1809,10 @@ FIXTURE_SETUP(TRACE_syscall)
                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002),
                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1),
                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003),
-               BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
+               BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_openat, 0, 1),
                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004),
+               BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
+               BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1005),
                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
        };
 
@@ -1842,15 +1860,26 @@ TEST_F(TRACE_syscall, ptrace_syscall_redirected)
        EXPECT_NE(self->mypid, syscall(__NR_getpid));
 }
 
-TEST_F(TRACE_syscall, ptrace_syscall_dropped)
+TEST_F(TRACE_syscall, ptrace_syscall_errno)
+{
+       /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
+       teardown_trace_fixture(_metadata, self->tracer);
+       self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
+                                          true);
+
+       /* Tracer should skip the open syscall, resulting in ESRCH. */
+       EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat));
+}
+
+TEST_F(TRACE_syscall, ptrace_syscall_faked)
 {
        /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
        teardown_trace_fixture(_metadata, self->tracer);
        self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
                                           true);
 
-       /* Tracer should skip the open syscall, resulting in EPERM. */
-       EXPECT_SYSCALL_RETURN(EPERM, syscall(__NR_openat));
+       /* Tracer should skip the gettid syscall, resulting fake pid. */
+       EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid));
 }
 
 TEST_F(TRACE_syscall, syscall_allowed)
@@ -1883,7 +1912,21 @@ TEST_F(TRACE_syscall, syscall_redirected)
        EXPECT_NE(self->mypid, syscall(__NR_getpid));
 }
 
-TEST_F(TRACE_syscall, syscall_dropped)
+TEST_F(TRACE_syscall, syscall_errno)
+{
+       long ret;
+
+       ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+       ASSERT_EQ(0, ret);
+
+       ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
+       ASSERT_EQ(0, ret);
+
+       /* openat has been skipped and an errno return. */
+       EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat));
+}
+
+TEST_F(TRACE_syscall, syscall_faked)
 {
        long ret;
 
@@ -1894,8 +1937,7 @@ TEST_F(TRACE_syscall, syscall_dropped)
        ASSERT_EQ(0, ret);
 
        /* gettid has been skipped and an altered return value stored. */
-       EXPECT_SYSCALL_RETURN(EPERM, syscall(__NR_gettid));
-       EXPECT_NE(self->mytid, syscall(__NR_gettid));
+       EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid));
 }
 
 TEST_F(TRACE_syscall, skip_after_RET_TRACE)
index c02683c..7656c7c 100644 (file)
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 CFLAGS += -O3 -Wl,-no-as-needed -Wall
-LDFLAGS += -lrt -lpthread -lm
+LDLIBS += -lrt -lpthread -lm
 
 # these are all "safe" tests that don't modify
 # system time or require escalated privileges
index 5ecea81..5858452 100644 (file)
@@ -3000,8 +3000,10 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
        if (ops->init)
                ops->init(dev);
 
+       kvm_get_kvm(kvm);
        ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR | O_CLOEXEC);
        if (ret < 0) {
+               kvm_put_kvm(kvm);
                mutex_lock(&kvm->lock);
                list_del(&dev->vm_node);
                mutex_unlock(&kvm->lock);
@@ -3009,7 +3011,6 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
                return ret;
        }
 
-       kvm_get_kvm(kvm);
        cd->fd = ret;
        return 0;
 }