OSDN Git Service

Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
authorJakub Kicinski <kuba@kernel.org>
Fri, 4 Feb 2022 01:36:16 +0000 (17:36 -0800)
committerJakub Kicinski <kuba@kernel.org>
Fri, 4 Feb 2022 01:36:16 +0000 (17:36 -0800)
No conflicts.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
432 files changed:
Documentation/admin-guide/sysctl/net.rst
Documentation/bpf/btf.rst
Documentation/devicetree/bindings/net/cdns,macb.yaml
Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml
Documentation/devicetree/bindings/net/fsl-fman.txt
Documentation/devicetree/bindings/net/microchip,lan966x-switch.yaml
Documentation/networking/ethtool-netlink.rst
MAINTAINERS
arch/alpha/include/uapi/asm/socket.h
arch/arm64/boot/dts/xilinx/zynqmp.dtsi
arch/mips/include/uapi/asm/socket.h
arch/parisc/include/uapi/asm/socket.h
arch/sparc/include/uapi/asm/socket.h
drivers/bluetooth/btintel.c
drivers/bluetooth/btintel.h
drivers/bluetooth/btmrvl_debugfs.c
drivers/bluetooth/btmrvl_sdio.c
drivers/bluetooth/btmtk.h
drivers/bluetooth/btmtksdio.c
drivers/bluetooth/btrtl.c
drivers/bluetooth/btusb.c
drivers/bluetooth/hci_h5.c
drivers/bluetooth/hci_ll.c
drivers/bluetooth/hci_serdev.c
drivers/net/bonding/bond_alb.c
drivers/net/dsa/Kconfig
drivers/net/dsa/Makefile
drivers/net/dsa/b53/b53_common.c
drivers/net/dsa/bcm_sf2.c
drivers/net/dsa/microchip/ksz8795.c
drivers/net/dsa/microchip/ksz9477.c
drivers/net/dsa/microchip/ksz_common.c
drivers/net/dsa/microchip/ksz_common.h
drivers/net/dsa/mt7530.c
drivers/net/dsa/mv88e6xxx/chip.c
drivers/net/dsa/mv88e6xxx/chip.h
drivers/net/dsa/mv88e6xxx/global1.h
drivers/net/dsa/mv88e6xxx/global1_vtu.c
drivers/net/dsa/mv88e6xxx/global2.h
drivers/net/dsa/mv88e6xxx/global2_scratch.c
drivers/net/dsa/mv88e6xxx/port.c
drivers/net/dsa/mv88e6xxx/port.h
drivers/net/dsa/mv88e6xxx/serdes.c
drivers/net/dsa/mv88e6xxx/smi.c
drivers/net/dsa/qca/ar9331.c
drivers/net/dsa/qca8k.c
drivers/net/dsa/qca8k.h
drivers/net/dsa/realtek-smi-core.c [deleted file]
drivers/net/dsa/realtek/Kconfig [new file with mode: 0644]
drivers/net/dsa/realtek/Makefile [new file with mode: 0644]
drivers/net/dsa/realtek/realtek-mdio.c [new file with mode: 0644]
drivers/net/dsa/realtek/realtek-smi.c [new file with mode: 0644]
drivers/net/dsa/realtek/realtek.h [moved from drivers/net/dsa/realtek-smi-core.h with 55% similarity]
drivers/net/dsa/realtek/rtl8365mb.c [moved from drivers/net/dsa/rtl8365mb.c with 75% similarity]
drivers/net/dsa/realtek/rtl8366-core.c [moved from drivers/net/dsa/rtl8366.c with 61% similarity]
drivers/net/dsa/realtek/rtl8366rb.c [moved from drivers/net/dsa/rtl8366rb.c with 78% similarity]
drivers/net/dsa/xrs700x/xrs700x.c
drivers/net/ethernet/amazon/ena/ena_netdev.c
drivers/net/ethernet/broadcom/bnxt/bnxt.c
drivers/net/ethernet/broadcom/bnxt/bnxt.h
drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h
drivers/net/ethernet/broadcom/genet/bcmgenet.c
drivers/net/ethernet/cadence/macb.h
drivers/net/ethernet/cadence/macb_main.c
drivers/net/ethernet/cavium/thunder/thunder_bgx.c
drivers/net/ethernet/cortina/gemini.c
drivers/net/ethernet/dec/tulip/pnic.c
drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c
drivers/net/ethernet/freescale/enetc/enetc_pf.c
drivers/net/ethernet/freescale/fec_main.c
drivers/net/ethernet/freescale/fec_ptp.c
drivers/net/ethernet/freescale/xgmac_mdio.c
drivers/net/ethernet/intel/e1000e/netdev.c
drivers/net/ethernet/intel/i40e/i40e_main.c
drivers/net/ethernet/intel/i40e/i40e_xsk.c
drivers/net/ethernet/intel/iavf/iavf_main.c
drivers/net/ethernet/intel/ice/ice_main.c
drivers/net/ethernet/intel/ice/ice_txrx.c
drivers/net/ethernet/intel/ice/ice_xsk.c
drivers/net/ethernet/intel/igb/igb_main.c
drivers/net/ethernet/intel/igbvf/netdev.c
drivers/net/ethernet/intel/igc/igc_main.c
drivers/net/ethernet/intel/ixgb/ixgb_main.c
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
drivers/net/ethernet/marvell/mvneta.c
drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
drivers/net/ethernet/mediatek/mtk_star_emac.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h
drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h
drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h
drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h
drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
drivers/net/ethernet/mellanox/mlxsw/core.c
drivers/net/ethernet/mellanox/mlxsw/core.h
drivers/net/ethernet/mellanox/mlxsw/core_env.c
drivers/net/ethernet/mellanox/mlxsw/core_env.h
drivers/net/ethernet/mellanox/mlxsw/minimal.c
drivers/net/ethernet/mellanox/mlxsw/reg.h
drivers/net/ethernet/mellanox/mlxsw/resources.h
drivers/net/ethernet/mellanox/mlxsw/spectrum.c
drivers/net/ethernet/mellanox/mlxsw/spectrum.h
drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c
drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
drivers/net/ethernet/microchip/lan743x_ethtool.c
drivers/net/ethernet/microchip/lan966x/Makefile
drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c
drivers/net/ethernet/microchip/lan966x/lan966x_main.c
drivers/net/ethernet/microchip/lan966x/lan966x_main.h
drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c [new file with mode: 0644]
drivers/net/ethernet/microchip/lan966x/lan966x_regs.h
drivers/net/ethernet/microchip/sparx5/sparx5_main.c
drivers/net/ethernet/microchip/sparx5/sparx5_phylink.c
drivers/net/ethernet/microsoft/mana/gdma_main.c
drivers/net/ethernet/microsoft/mana/mana.h
drivers/net/ethernet/microsoft/mana/mana_en.c
drivers/net/ethernet/microsoft/mana/mana_ethtool.c
drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
drivers/net/ethernet/netronome/nfp/nfp_net_sriov.h
drivers/net/ethernet/netronome/nfp/nfp_port.h
drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c
drivers/net/ethernet/pensando/ionic/ionic.h
drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
drivers/net/ethernet/pensando/ionic/ionic_dev.c
drivers/net/ethernet/pensando/ionic/ionic_dev.h
drivers/net/ethernet/pensando/ionic/ionic_lif.c
drivers/net/ethernet/pensando/ionic/ionic_lif.h
drivers/net/ethernet/pensando/ionic/ionic_main.c
drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c
drivers/net/ethernet/pensando/ionic/ionic_txrx.c
drivers/net/ethernet/qlogic/qed/qed_mcp.c
drivers/net/ethernet/qlogic/qed/qed_mcp.h
drivers/net/ethernet/realtek/r8169_main.c
drivers/net/ethernet/renesas/ravb_main.c
drivers/net/ethernet/renesas/sh_eth.c
drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
drivers/net/ethernet/sfc/ef10.c
drivers/net/ethernet/sfc/ef100_nic.c
drivers/net/ethernet/sfc/net_driver.h
drivers/net/ethernet/sfc/nic_common.h
drivers/net/ethernet/sfc/rx_common.c
drivers/net/ethernet/sfc/rx_common.h
drivers/net/ethernet/sfc/siena.c
drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
drivers/net/ethernet/xilinx/xilinx_axienet.h
drivers/net/ethernet/xilinx/xilinx_axienet_main.c
drivers/net/fjes/fjes_main.c
drivers/net/ipa/ipa_data-v3.1.c
drivers/net/ipa/ipa_data-v3.5.1.c
drivers/net/ipa/ipa_data-v4.11.c
drivers/net/ipa/ipa_data-v4.2.c
drivers/net/ipa/ipa_data-v4.5.c
drivers/net/ipa/ipa_data-v4.9.c
drivers/net/ipa/ipa_data.h
drivers/net/ipa/ipa_endpoint.c
drivers/net/mdio/mdio-xgene.c
drivers/net/pcs/pcs-xpcs.c
drivers/net/phy/aquantia_main.c
drivers/net/phy/at803x.c
drivers/net/phy/phy-core.c
drivers/net/usb/asix_devices.c
drivers/nfc/st-nci/vendor_cmds.c
drivers/nfc/st21nfca/vendor_cmds.c
drivers/ptp/ptp_clock.c
drivers/ptp/ptp_sysfs.c
drivers/ptp/ptp_vclock.c
include/linux/bpf.h
include/linux/bpf_verifier.h
include/linux/btf.h
include/linux/btf_ids.h
include/linux/dsa/tag_qca.h [new file with mode: 0644]
include/linux/ethtool.h
include/linux/filter.h
include/linux/ipv6.h
include/linux/linkmode.h
include/linux/mii.h
include/linux/mlx5/mlx5_ifc.h
include/linux/netlink.h
include/linux/pcs/pcs-xpcs.h
include/linux/phy.h
include/linux/skbuff.h
include/linux/sunrpc/svc_xprt.h
include/linux/sunrpc/xprt.h
include/linux/udp.h
include/net/ax25.h
include/net/bluetooth/hci_core.h
include/net/bluetooth/mgmt.h
include/net/bonding.h
include/net/dsa.h
include/net/inet_connection_sock.h
include/net/inet_timewait_sock.h
include/net/ip.h
include/net/ipv6.h
include/net/netfilter/nf_conntrack_bpf.h [new file with mode: 0644]
include/net/netns/core.h
include/net/netns/ipv4.h
include/net/netns/ipv6.h
include/net/page_pool.h
include/net/pkt_sched.h
include/net/request_sock.h
include/net/sch_generic.h
include/net/sock.h
include/net/tcp.h
include/net/udplite.h
include/net/xdp.h
include/uapi/asm-generic/socket.h
include/uapi/linux/bpf.h
include/uapi/linux/ethtool_netlink.h
include/uapi/linux/socket.h
kernel/bpf/arraymap.c
kernel/bpf/btf.c
kernel/bpf/cgroup.c
kernel/bpf/core.c
kernel/bpf/cpumap.c
kernel/bpf/devmap.c
kernel/bpf/syscall.c
kernel/bpf/verifier.c
kernel/trace/bpf_trace.c
net/ax25/ax25_route.c
net/bluetooth/hci_conn.c
net/bluetooth/hci_core.c
net/bluetooth/hci_event.c
net/bluetooth/hci_sync.c
net/bluetooth/mgmt.c
net/bluetooth/msft.c
net/bpf/test_run.c
net/core/filter.c
net/core/net_namespace.c
net/core/page_pool.c
net/core/sock.c
net/core/sock_map.c
net/core/sysctl_net_core.c
net/core/xdp.c
net/dccp/dccp.h
net/dccp/ipv4.c
net/dccp/ipv6.c
net/dccp/minisocks.c
net/dsa/dsa2.c
net/dsa/dsa_priv.h
net/dsa/slave.c
net/dsa/switch.c
net/dsa/tag_qca.c
net/ethtool/rings.c
net/hsr/hsr_main.h
net/ipv4/bpf_tcp_ca.c
net/ipv4/fib_semantics.c
net/ipv4/icmp.c
net/ipv4/inet_connection_sock.c
net/ipv4/inet_timewait_sock.c
net/ipv4/ip_options.c
net/ipv4/ip_output.c
net/ipv4/proc.c
net/ipv4/route.c
net/ipv4/sysctl_net_ipv4.c
net/ipv4/tcp.c
net/ipv4/tcp_bbr.c
net/ipv4/tcp_cubic.c
net/ipv4/tcp_dctcp.c
net/ipv4/tcp_input.c
net/ipv4/tcp_ipv4.c
net/ipv4/tcp_minisocks.c
net/ipv4/tcp_output.c
net/ipv6/exthdrs.c
net/ipv6/icmp.c
net/ipv6/ip6_offload.c
net/ipv6/ip6_output.c
net/ipv6/ip6_tunnel.c
net/ipv6/tcp_ipv6.c
net/ipv6/udp.c
net/mptcp/options.c
net/mptcp/pm_netlink.c
net/netfilter/Makefile
net/netfilter/nf_conntrack_bpf.c [new file with mode: 0644]
net/netfilter/nf_conntrack_core.c
net/smc/af_smc.c
net/smc/smc_tx.c
net/smc/smc_tx.h
net/sunrpc/auth_gss/auth_gss.c
net/sunrpc/svc_xprt.c
net/sunrpc/xprt.c
net/tipc/msg.h
net/unix/af_unix.c
samples/bpf/xdp1_user.c
samples/bpf/xdp_adjust_tail_user.c
samples/bpf/xdp_fwd_user.c
samples/bpf/xdp_router_ipv4_user.c
samples/bpf/xdp_rxq_info_user.c
samples/bpf/xdp_sample_pkts_user.c
samples/bpf/xdp_sample_user.c
samples/bpf/xdp_tx_iptunnel_user.c
samples/bpf/xdpsock_ctrl_proc.c
samples/bpf/xdpsock_user.c
samples/bpf/xsk_fwd.c
scripts/bpf_doc.py
security/device_cgroup.c
tools/bpf/bpftool/btf.c
tools/bpf/bpftool/cgroup.c
tools/bpf/bpftool/common.c
tools/bpf/bpftool/gen.c
tools/bpf/bpftool/link.c
tools/bpf/bpftool/main.c
tools/bpf/bpftool/main.h
tools/bpf/bpftool/map.c
tools/bpf/bpftool/net.c
tools/bpf/bpftool/pids.c
tools/bpf/bpftool/prog.c
tools/bpf/bpftool/struct_ops.c
tools/bpf/resolve_btfids/Makefile
tools/include/uapi/linux/bpf.h
tools/lib/bpf/bpf.c
tools/lib/bpf/bpf.h
tools/lib/bpf/bpf_helpers.h
tools/lib/bpf/btf.c
tools/lib/bpf/btf.h
tools/lib/bpf/hashmap.c
tools/lib/bpf/libbpf.c
tools/lib/bpf/libbpf.h
tools/lib/bpf/libbpf.map
tools/lib/bpf/libbpf_legacy.h
tools/lib/bpf/netlink.c
tools/perf/util/bpf-loader.c
tools/perf/util/bpf_map.c
tools/testing/selftests/bpf/Makefile
tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
tools/testing/selftests/bpf/config
tools/testing/selftests/bpf/prog_tests/bind_perm.c
tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt_unix.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/bpf_nf.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/btf.c
tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/flow_dissector.c
tools/testing/selftests/bpf/prog_tests/global_data.c
tools/testing/selftests/bpf/prog_tests/global_data_init.c
tools/testing/selftests/bpf/prog_tests/kfunc_call.c
tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
tools/testing/selftests/bpf/prog_tests/tailcalls.c
tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
tools/testing/selftests/bpf/progs/bpf_iter_setsockopt_unix.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/bpf_iter_unix.c
tools/testing/selftests/bpf/progs/bpf_mod_race.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/bpf_tracing_net.h
tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/freplace_cls_redirect.c
tools/testing/selftests/bpf/progs/kfunc_call_race.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/kfunc_call_test.c
tools/testing/selftests/bpf/progs/ksym_race.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/sample_map_ret0.c
tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
tools/testing/selftests/bpf/progs/sockopt_sk.c
tools/testing/selftests/bpf/progs/test_bpf_nf.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_btf_haskv.c
tools/testing/selftests/bpf/progs/test_btf_newkv.c
tools/testing/selftests/bpf/progs/test_btf_nokv.c
tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c
tools/testing/selftests/bpf/progs/test_sockmap_progs_query.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_tc_edt.c
tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c
tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c
tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
tools/testing/selftests/bpf/progs/test_xdp_update_frags.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c
tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
tools/testing/selftests/bpf/test_verifier.c
tools/testing/selftests/bpf/verifier/calls.c
tools/testing/selftests/bpf/xdpxceiver.c
tools/testing/selftests/net/fib_rule_tests.sh
tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
tools/testing/selftests/net/forwarding/forwarding.config.sample
tools/testing/selftests/net/forwarding/lib.sh
tools/testing/selftests/net/mptcp/mptcp_join.sh
tools/testing/selftests/net/mptcp/pm_nl_ctl.c
tools/testing/selftests/net/timestamping.c

index 4150f74..f86b5e1 100644 (file)
@@ -365,6 +365,15 @@ new netns has been created.
 
 Default : 0  (for compatibility reasons)
 
+txrehash
+--------
+
+Controls default hash rethink behaviour on listening socket when SO_TXREHASH
+option is set to SOCK_TXREHASH_DEFAULT (i. e. not overridden by setsockopt).
+
+If set to 1 (default), hash rethink is performed on listening socket.
+If set to 0, hash rethink is not performed.
+
 2. /proc/sys/net/unix - Parameters for Unix domain sockets
 ----------------------------------------------------------
 
index 1ebf4c5..ab08852 100644 (file)
@@ -565,18 +565,15 @@ A map can be created with ``btf_fd`` and specified key/value type id.::
 In libbpf, the map can be defined with extra annotation like below:
 ::
 
-    struct bpf_map_def SEC("maps") btf_map = {
-        .type = BPF_MAP_TYPE_ARRAY,
-        .key_size = sizeof(int),
-        .value_size = sizeof(struct ipv_counts),
-        .max_entries = 4,
-    };
-    BPF_ANNOTATE_KV_PAIR(btf_map, int, struct ipv_counts);
+    struct {
+        __uint(type, BPF_MAP_TYPE_ARRAY);
+        __type(key, int);
+        __type(value, struct ipv_counts);
+        __uint(max_entries, 4);
+    } btf_map SEC(".maps");
 
-Here, the parameters for macro BPF_ANNOTATE_KV_PAIR are map name, key and
-value types for the map. During ELF parsing, libbpf is able to extract
-key/value type_id's and assign them to BPF_MAP_CREATE attributes
-automatically.
+During ELF parsing, libbpf is able to extract key/value type_id's and assign
+them to BPF_MAP_CREATE attributes automatically.
 
 .. _BPF_Prog_Load:
 
@@ -824,13 +821,12 @@ structure has bitfields. For example, for the following map,::
            ___A b1:4;
            enum A b2:4;
       };
-      struct bpf_map_def SEC("maps") tmpmap = {
-           .type = BPF_MAP_TYPE_ARRAY,
-           .key_size = sizeof(__u32),
-           .value_size = sizeof(struct tmp_t),
-           .max_entries = 1,
-      };
-      BPF_ANNOTATE_KV_PAIR(tmpmap, int, struct tmp_t);
+      struct {
+           __uint(type, BPF_MAP_TYPE_ARRAY);
+           __type(key, int);
+           __type(value, struct tmp_t);
+           __uint(max_entries, 1);
+      } tmpmap SEC(".maps");
 
 bpftool is able to pretty print like below:
 ::
index 8dd06db..6cd3d85 100644 (file)
@@ -81,6 +81,25 @@ properties:
 
   phy-handle: true
 
+  phys:
+    maxItems: 1
+
+  phy-names:
+    const: sgmii-phy
+    description:
+      Required with ZynqMP SoC when in SGMII mode.
+      Should reference PS-GTR generic PHY device for this controller
+      instance. See ZynqMP example.
+
+  resets:
+    maxItems: 1
+    description:
+      Recommended with ZynqMP, specify reset control for this
+      controller instance with zynqmp-reset driver.
+
+  reset-names:
+    maxItems: 1
+
   fixed-link: true
 
   iommus:
@@ -157,3 +176,40 @@ examples:
                     reset-gpios = <&pioE 6 1>;
             };
     };
+
+  - |
+    #include <dt-bindings/clock/xlnx-zynqmp-clk.h>
+    #include <dt-bindings/power/xlnx-zynqmp-power.h>
+    #include <dt-bindings/reset/xlnx-zynqmp-resets.h>
+    #include <dt-bindings/phy/phy.h>
+
+    bus {
+            #address-cells = <2>;
+            #size-cells = <2>;
+            gem1: ethernet@ff0c0000 {
+                    compatible = "cdns,zynqmp-gem", "cdns,gem";
+                    interrupt-parent = <&gic>;
+                    interrupts = <0 59 4>, <0 59 4>;
+                    reg = <0x0 0xff0c0000 0x0 0x1000>;
+                    clocks = <&zynqmp_clk LPD_LSBUS>, <&zynqmp_clk GEM1_REF>,
+                             <&zynqmp_clk GEM1_TX>, <&zynqmp_clk GEM1_RX>,
+                             <&zynqmp_clk GEM_TSU>;
+                    clock-names = "pclk", "hclk", "tx_clk", "rx_clk", "tsu_clk";
+                    #address-cells = <1>;
+                    #size-cells = <0>;
+                    #stream-id-cells = <1>;
+                    iommus = <&smmu 0x875>;
+                    power-domains = <&zynqmp_firmware PD_ETH_1>;
+                    resets = <&zynqmp_reset ZYNQMP_RESET_GEM1>;
+                    reset-names = "gem1_rst";
+                    status = "okay";
+                    phy-mode = "sgmii";
+                    phy-names = "sgmii-phy";
+                    phys = <&psgtr 1 PHY_TYPE_SGMII 1 1>;
+                    fixed-link {
+                            speed = <1000>;
+                            full-duplex;
+                            pause;
+                    };
+            };
+    };
index 84985f5..1841520 100644 (file)
@@ -42,6 +42,12 @@ properties:
     description:
       Set if the output SYNCLKO frequency should be set to 125MHz instead of 25MHz.
 
+  microchip,synclko-disable:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      Set if the output SYNCLKO clock should be disabled. Do not mix with
+      microchip,synclko-125.
+
 required:
   - compatible
   - reg
index 020337f..801efc7 100644 (file)
@@ -388,14 +388,24 @@ PROPERTIES
                Value type: <prop-encoded-array>
                Definition: A standard property.
 
-- bus-frequency
+- clocks
+               Usage: optional
+               Value type: <phandle>
+               Definition: A reference to the input clock of the controller
+               from which the MDC frequency is derived.
+
+- clock-frequency
                Usage: optional
                Value type: <u32>
-               Definition: Specifies the external MDIO bus clock speed to
-               be used, if different from the standard 2.5 MHz.
-               This may be due to the standard speed being unsupported (e.g.
-               due to a hardware problem), or to advertise that all relevant
-               components in the system support a faster speed.
+               Definition: Specifies the external MDC frequency, in Hertz, to
+               be used. Requires that the input clock is specified in the
+               "clocks" property. See also: mdio.yaml.
+
+- suppress-preamble
+               Usage: optional
+               Value type: <boolean>
+               Definition: Disable generation of preamble bits. See also:
+               mdio.yaml.
 
 - interrupts
                Usage: required for external MDIO
index e79e4e1..1381276 100644 (file)
@@ -38,6 +38,7 @@ properties:
       - description: register based extraction
       - description: frame dma based extraction
       - description: analyzer interrupt
+      - description: ptp interrupt
 
   interrupt-names:
     minItems: 1
@@ -45,6 +46,7 @@ properties:
       - const: xtr
       - const: fdma
       - const: ana
+      - const: ptp
 
   resets:
     items:
index 9d98e05..cae28af 100644 (file)
@@ -860,8 +860,16 @@ Kernel response contents:
   ``ETHTOOL_A_RINGS_RX_JUMBO``          u32     size of RX jumbo ring
   ``ETHTOOL_A_RINGS_TX``                u32     size of TX ring
   ``ETHTOOL_A_RINGS_RX_BUF_LEN``        u32     size of buffers on the ring
+  ``ETHTOOL_A_RINGS_TCP_DATA_SPLIT``    u8      TCP header / data split
   ====================================  ======  ===========================
 
+``ETHTOOL_A_RINGS_TCP_DATA_SPLIT`` indicates whether the device is usable with
+page-flipping TCP zero-copy receive (``getsockopt(TCP_ZEROCOPY_RECEIVE)``).
+If enabled the device is configured to place frame headers and data into
+separate buffers. The device configuration must make it possible to receive
+full memory pages of data, for example because MTU is high enough or through
+HW-GRO.
+
 
 RINGS_SET
 =========
index bb83dcb..c7169da 100644 (file)
@@ -16351,8 +16351,7 @@ REALTEK RTL83xx SMI DSA ROUTER CHIPS
 M:     Linus Walleij <linus.walleij@linaro.org>
 S:     Maintained
 F:     Documentation/devicetree/bindings/net/dsa/realtek-smi.txt
-F:     drivers/net/dsa/realtek-smi*
-F:     drivers/net/dsa/rtl83*
+F:     drivers/net/dsa/realtek/*
 
 REALTEK WIRELESS DRIVER (rtlwifi family)
 M:     Ping-Ke Shih <pkshih@realtek.com>
index 284d287..7d81535 100644 (file)
 
 #define SO_RESERVE_MEM         73
 
+#define SO_TXREHASH            74
+
 #if !defined(__KERNEL__)
 
 #if __BITS_PER_LONG == 64
index 74e6644..9bec3ba 100644 (file)
                        #stream-id-cells = <1>;
                        iommus = <&smmu 0x874>;
                        power-domains = <&zynqmp_firmware PD_ETH_0>;
+                       resets = <&zynqmp_reset ZYNQMP_RESET_GEM0>;
+                       reset-names = "gem0_rst";
                };
 
                gem1: ethernet@ff0c0000 {
                        #stream-id-cells = <1>;
                        iommus = <&smmu 0x875>;
                        power-domains = <&zynqmp_firmware PD_ETH_1>;
+                       resets = <&zynqmp_reset ZYNQMP_RESET_GEM1>;
+                       reset-names = "gem1_rst";
                };
 
                gem2: ethernet@ff0d0000 {
                        #stream-id-cells = <1>;
                        iommus = <&smmu 0x876>;
                        power-domains = <&zynqmp_firmware PD_ETH_2>;
+                       resets = <&zynqmp_reset ZYNQMP_RESET_GEM2>;
+                       reset-names = "gem2_rst";
                };
 
                gem3: ethernet@ff0e0000 {
                        #stream-id-cells = <1>;
                        iommus = <&smmu 0x877>;
                        power-domains = <&zynqmp_firmware PD_ETH_3>;
+                       resets = <&zynqmp_reset ZYNQMP_RESET_GEM3>;
+                       reset-names = "gem3_rst";
                };
 
                gpio: gpio@ff0a0000 {
index 24e0efb..1d55e57 100644 (file)
 
 #define SO_RESERVE_MEM         73
 
+#define SO_TXREHASH            74
+
 #if !defined(__KERNEL__)
 
 #if __BITS_PER_LONG == 64
index 845ddc6..654061e 100644 (file)
 
 #define SO_RESERVE_MEM         0x4047
 
+#define SO_TXREHASH            0x4048
+
 #if !defined(__KERNEL__)
 
 #if __BITS_PER_LONG == 64
index 2672dd0..666f81e 100644 (file)
 
 #define SO_RESERVE_MEM           0x0052
 
+#define SO_TXREHASH              0x0053
+
 
 #if !defined(__KERNEL__)
 
index 1a4f8b2..06514ed 100644 (file)
@@ -2428,10 +2428,15 @@ static int btintel_setup_combined(struct hci_dev *hdev)
 
                        /* Apply the device specific HCI quirks
                         *
-                        * WBS for SdP - SdP and Stp have a same hw_varaint but
-                        * different fw_variant
+                        * WBS for SdP - For the Legacy ROM products, only SdP
+                        * supports the WBS. But the version information is not
+                        * enough to use here because the StP2 and SdP have same
+                        * hw_variant and fw_variant. So, this flag is set by
+                        * the transport driver (btusb) based on the HW info
+                        * (idProduct)
                         */
-                       if (ver.hw_variant == 0x08 && ver.fw_variant == 0x22)
+                       if (!btintel_test_flag(hdev,
+                                              INTEL_ROM_LEGACY_NO_WBS_SUPPORT))
                                set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED,
                                        &hdev->quirks);
 
index c9b24e9..e0060e5 100644 (file)
@@ -152,6 +152,7 @@ enum {
        INTEL_BROKEN_INITIAL_NCMD,
        INTEL_BROKEN_SHUTDOWN_LED,
        INTEL_ROM_LEGACY,
+       INTEL_ROM_LEGACY_NO_WBS_SUPPORT,
 
        __INTEL_NUM_FLAGS,
 };
index c486757..db35b91 100644 (file)
@@ -1,4 +1,4 @@
-/**
+/*
  * Marvell Bluetooth driver: debugfs related functions
  *
  * Copyright (C) 2009, Marvell International Ltd.
index 68378b4..b8ef66f 100644 (file)
@@ -1,4 +1,4 @@
-/**
+/*
  * Marvell BT-over-SDIO driver: SDIO interface related functions.
  *
  * Copyright (C) 2009, Marvell International Ltd.
index 6e7b0c7..fb76d97 100644 (file)
@@ -7,8 +7,12 @@
 
 #define HCI_WMT_MAX_EVENT_SIZE         64
 
+#define BTMTK_WMT_REG_WRITE 0x1
 #define BTMTK_WMT_REG_READ 0x2
 
+#define MT7921_PINMUX_0 0x70005050
+#define MT7921_PINMUX_1 0x70005054
+
 enum {
        BTMTK_WMT_PATCH_DWNLD = 0x1,
        BTMTK_WMT_TEST = 0x2,
@@ -68,6 +72,37 @@ struct btmtk_tci_sleep {
        u8 time_compensation;
 } __packed;
 
+struct btmtk_wakeon {
+       u8 mode;
+       u8 gpo;
+       u8 active_high;
+       __le16 enable_delay;
+       __le16 wakeup_delay;
+} __packed;
+
+struct btmtk_sco {
+       u8 clock_config;
+       u8 transmit_format_config;
+       u8 channel_format_config;
+       u8 channel_select_config;
+} __packed;
+
+struct reg_read_cmd {
+       u8 type;
+       u8 rsv;
+       u8 num;
+       __le32 addr;
+} __packed;
+
+struct reg_write_cmd {
+       u8 type;
+       u8 rsv;
+       u8 num;
+       __le32 addr;
+       __le32 data;
+       __le32 mask;
+} __packed;
+
 struct btmtk_hci_wmt_params {
        u8 op;
        u8 flag;
index b5ea8d3..8be763a 100644 (file)
 
 #define VERSION "0.1"
 
-#define MTKBTSDIO_AUTOSUSPEND_DELAY    8000
+#define MTKBTSDIO_AUTOSUSPEND_DELAY    1000
 
-static bool enable_autosuspend;
+static bool enable_autosuspend = true;
 
 struct btmtksdio_data {
        const char *fwname;
        u16 chipid;
+       bool lp_mbox_supported;
 };
 
 static const struct btmtksdio_data mt7663_data = {
        .fwname = FIRMWARE_MT7663,
        .chipid = 0x7663,
+       .lp_mbox_supported = false,
 };
 
 static const struct btmtksdio_data mt7668_data = {
        .fwname = FIRMWARE_MT7668,
        .chipid = 0x7668,
+       .lp_mbox_supported = false,
 };
 
 static const struct btmtksdio_data mt7921_data = {
        .fwname = FIRMWARE_MT7961,
        .chipid = 0x7921,
+       .lp_mbox_supported = true,
 };
 
 static const struct sdio_device_id btmtksdio_table[] = {
@@ -87,8 +91,17 @@ MODULE_DEVICE_TABLE(sdio, btmtksdio_table);
 #define RX_DONE_INT            BIT(1)
 #define TX_EMPTY               BIT(2)
 #define TX_FIFO_OVERFLOW       BIT(8)
+#define FW_MAILBOX_INT         BIT(15)
+#define INT_MASK               GENMASK(15, 0)
 #define RX_PKT_LEN             GENMASK(31, 16)
 
+#define MTK_REG_CSICR          0xc0
+#define CSICR_CLR_MBOX_ACK BIT(0)
+#define MTK_REG_PH2DSM0R       0xc4
+#define PH2DSM0R_DRIVER_OWN    BIT(0)
+#define MTK_REG_PD2HRM0R       0xdc
+#define PD2HRM0R_DRV_OWN       BIT(0)
+
 #define MTK_REG_CTDR           0x18
 
 #define MTK_REG_CRDR           0x1c
@@ -100,6 +113,7 @@ MODULE_DEVICE_TABLE(sdio, btmtksdio_table);
 #define BTMTKSDIO_TX_WAIT_VND_EVT      1
 #define BTMTKSDIO_HW_TX_READY          2
 #define BTMTKSDIO_FUNC_ENABLED         3
+#define BTMTKSDIO_PATCH_ENABLED                4
 
 struct mtkbtsdio_hdr {
        __le16  len;
@@ -278,6 +292,78 @@ static u32 btmtksdio_drv_own_query(struct btmtksdio_dev *bdev)
        return sdio_readl(bdev->func, MTK_REG_CHLPCR, NULL);
 }
 
+static u32 btmtksdio_drv_own_query_79xx(struct btmtksdio_dev *bdev)
+{
+       return sdio_readl(bdev->func, MTK_REG_PD2HRM0R, NULL);
+}
+
+static int btmtksdio_fw_pmctrl(struct btmtksdio_dev *bdev)
+{
+       u32 status;
+       int err;
+
+       sdio_claim_host(bdev->func);
+
+       if (bdev->data->lp_mbox_supported &&
+           test_bit(BTMTKSDIO_PATCH_ENABLED, &bdev->tx_state)) {
+               sdio_writel(bdev->func, CSICR_CLR_MBOX_ACK, MTK_REG_CSICR,
+                           &err);
+               err = readx_poll_timeout(btmtksdio_drv_own_query_79xx, bdev,
+                                        status, !(status & PD2HRM0R_DRV_OWN),
+                                        2000, 1000000);
+               if (err < 0) {
+                       bt_dev_err(bdev->hdev, "mailbox ACK not cleared");
+                       goto out;
+               }
+       }
+
+       /* Return ownership to the device */
+       sdio_writel(bdev->func, C_FW_OWN_REQ_SET, MTK_REG_CHLPCR, &err);
+       if (err < 0)
+               goto out;
+
+       err = readx_poll_timeout(btmtksdio_drv_own_query, bdev, status,
+                                !(status & C_COM_DRV_OWN), 2000, 1000000);
+
+out:
+       sdio_release_host(bdev->func);
+
+       if (err < 0)
+               bt_dev_err(bdev->hdev, "Cannot return ownership to device");
+
+       return err;
+}
+
+static int btmtksdio_drv_pmctrl(struct btmtksdio_dev *bdev)
+{
+       u32 status;
+       int err;
+
+       sdio_claim_host(bdev->func);
+
+       /* Get ownership from the device */
+       sdio_writel(bdev->func, C_FW_OWN_REQ_CLR, MTK_REG_CHLPCR, &err);
+       if (err < 0)
+               goto out;
+
+       err = readx_poll_timeout(btmtksdio_drv_own_query, bdev, status,
+                                status & C_COM_DRV_OWN, 2000, 1000000);
+
+       if (!err && bdev->data->lp_mbox_supported &&
+           test_bit(BTMTKSDIO_PATCH_ENABLED, &bdev->tx_state))
+               err = readx_poll_timeout(btmtksdio_drv_own_query_79xx, bdev,
+                                        status, status & PD2HRM0R_DRV_OWN,
+                                        2000, 1000000);
+
+out:
+       sdio_release_host(bdev->func);
+
+       if (err < 0)
+               bt_dev_err(bdev->hdev, "Cannot get ownership from device");
+
+       return err;
+}
+
 static int btmtksdio_recv_event(struct hci_dev *hdev, struct sk_buff *skb)
 {
        struct btmtksdio_dev *bdev = hci_get_drvdata(hdev);
@@ -480,6 +566,13 @@ static void btmtksdio_txrx_work(struct work_struct *work)
                 * FIFO.
                 */
                sdio_writel(bdev->func, int_status, MTK_REG_CHISR, NULL);
+               int_status &= INT_MASK;
+
+               if ((int_status & FW_MAILBOX_INT) &&
+                   bdev->data->chipid == 0x7921) {
+                       sdio_writel(bdev->func, PH2DSM0R_DRIVER_OWN,
+                                   MTK_REG_PH2DSM0R, 0);
+               }
 
                if (int_status & FW_OWN_BACK_INT)
                        bt_dev_dbg(bdev->hdev, "Get fw own back");
@@ -531,7 +624,7 @@ static void btmtksdio_interrupt(struct sdio_func *func)
 static int btmtksdio_open(struct hci_dev *hdev)
 {
        struct btmtksdio_dev *bdev = hci_get_drvdata(hdev);
-       u32 status, val;
+       u32 val;
        int err;
 
        sdio_claim_host(bdev->func);
@@ -542,18 +635,10 @@ static int btmtksdio_open(struct hci_dev *hdev)
 
        set_bit(BTMTKSDIO_FUNC_ENABLED, &bdev->tx_state);
 
-       /* Get ownership from the device */
-       sdio_writel(bdev->func, C_FW_OWN_REQ_CLR, MTK_REG_CHLPCR, &err);
+       err = btmtksdio_drv_pmctrl(bdev);
        if (err < 0)
                goto err_disable_func;
 
-       err = readx_poll_timeout(btmtksdio_drv_own_query, bdev, status,
-                                status & C_COM_DRV_OWN, 2000, 1000000);
-       if (err < 0) {
-               bt_dev_err(bdev->hdev, "Cannot get ownership from device");
-               goto err_disable_func;
-       }
-
        /* Disable interrupt & mask out all interrupt sources */
        sdio_writel(bdev->func, C_INT_EN_CLR, MTK_REG_CHLPCR, &err);
        if (err < 0)
@@ -623,8 +708,6 @@ err_release_host:
 static int btmtksdio_close(struct hci_dev *hdev)
 {
        struct btmtksdio_dev *bdev = hci_get_drvdata(hdev);
-       u32 status;
-       int err;
 
        sdio_claim_host(bdev->func);
 
@@ -635,13 +718,7 @@ static int btmtksdio_close(struct hci_dev *hdev)
 
        cancel_work_sync(&bdev->txrx_work);
 
-       /* Return ownership to the device */
-       sdio_writel(bdev->func, C_FW_OWN_REQ_SET, MTK_REG_CHLPCR, NULL);
-
-       err = readx_poll_timeout(btmtksdio_drv_own_query, bdev, status,
-                                !(status & C_COM_DRV_OWN), 2000, 1000000);
-       if (err < 0)
-               bt_dev_err(bdev->hdev, "Cannot return ownership to device");
+       btmtksdio_fw_pmctrl(bdev);
 
        clear_bit(BTMTKSDIO_FUNC_ENABLED, &bdev->tx_state);
        sdio_disable_func(bdev->func);
@@ -686,6 +763,7 @@ static int btmtksdio_func_query(struct hci_dev *hdev)
 
 static int mt76xx_setup(struct hci_dev *hdev, const char *fwname)
 {
+       struct btmtksdio_dev *bdev = hci_get_drvdata(hdev);
        struct btmtk_hci_wmt_params wmt_params;
        struct btmtk_tci_sleep tci_sleep;
        struct sk_buff *skb;
@@ -746,6 +824,8 @@ ignore_setup_fw:
                return err;
        }
 
+       set_bit(BTMTKSDIO_PATCH_ENABLED, &bdev->tx_state);
+
 ignore_func_on:
        /* Apply the low power environment setup */
        tci_sleep.mode = 0x5;
@@ -768,6 +848,7 @@ ignore_func_on:
 
 static int mt79xx_setup(struct hci_dev *hdev, const char *fwname)
 {
+       struct btmtksdio_dev *bdev = hci_get_drvdata(hdev);
        struct btmtk_hci_wmt_params wmt_params;
        u8 param = 0x1;
        int err;
@@ -793,19 +874,15 @@ static int mt79xx_setup(struct hci_dev *hdev, const char *fwname)
 
        hci_set_msft_opcode(hdev, 0xFD30);
        hci_set_aosp_capable(hdev);
+       set_bit(BTMTKSDIO_PATCH_ENABLED, &bdev->tx_state);
 
        return err;
 }
 
-static int btsdio_mtk_reg_read(struct hci_dev *hdev, u32 reg, u32 *val)
+static int btmtksdio_mtk_reg_read(struct hci_dev *hdev, u32 reg, u32 *val)
 {
        struct btmtk_hci_wmt_params wmt_params;
-       struct reg_read_cmd {
-               u8 type;
-               u8 rsv;
-               u8 num;
-               __le32 addr;
-       } __packed reg_read = {
+       struct reg_read_cmd reg_read = {
                .type = 1,
                .num = 1,
        };
@@ -821,7 +898,7 @@ static int btsdio_mtk_reg_read(struct hci_dev *hdev, u32 reg, u32 *val)
 
        err = mtk_hci_wmt_sync(hdev, &wmt_params);
        if (err < 0) {
-               bt_dev_err(hdev, "Failed to read reg(%d)", err);
+               bt_dev_err(hdev, "Failed to read reg (%d)", err);
                return err;
        }
 
@@ -830,6 +907,66 @@ static int btsdio_mtk_reg_read(struct hci_dev *hdev, u32 reg, u32 *val)
        return err;
 }
 
+static int btmtksdio_mtk_reg_write(struct hci_dev *hdev, u32 reg, u32 val, u32 mask)
+{
+       struct btmtk_hci_wmt_params wmt_params;
+       const struct reg_write_cmd reg_write = {
+               .type = 1,
+               .num = 1,
+               .addr = cpu_to_le32(reg),
+               .data = cpu_to_le32(val),
+               .mask = cpu_to_le32(mask),
+       };
+       int err, status;
+
+       wmt_params.op = BTMTK_WMT_REGISTER;
+       wmt_params.flag = BTMTK_WMT_REG_WRITE;
+       wmt_params.dlen = sizeof(reg_write);
+       wmt_params.data = &reg_write;
+       wmt_params.status = &status;
+
+       err = mtk_hci_wmt_sync(hdev, &wmt_params);
+       if (err < 0)
+               bt_dev_err(hdev, "Failed to write reg (%d)", err);
+
+       return err;
+}
+
+static int btmtksdio_sco_setting(struct hci_dev *hdev)
+{
+       const struct btmtk_sco sco_setting = {
+               .clock_config = 0x49,
+               .channel_format_config = 0x80,
+       };
+       struct sk_buff *skb;
+       u32 val;
+       int err;
+
+       /* Enable SCO over I2S/PCM for MediaTek chipset */
+       skb =  __hci_cmd_sync(hdev, 0xfc72, sizeof(sco_setting),
+                             &sco_setting, HCI_CMD_TIMEOUT);
+       if (IS_ERR(skb))
+               return PTR_ERR(skb);
+
+       kfree_skb(skb);
+
+       err = btmtksdio_mtk_reg_read(hdev, MT7921_PINMUX_0, &val);
+       if (err < 0)
+               return err;
+
+       val |= 0x11000000;
+       err = btmtksdio_mtk_reg_write(hdev, MT7921_PINMUX_0, val, ~0);
+       if (err < 0)
+               return err;
+
+       err = btmtksdio_mtk_reg_read(hdev, MT7921_PINMUX_1, &val);
+       if (err < 0)
+               return err;
+
+       val |= 0x00000101;
+       return btmtksdio_mtk_reg_write(hdev, MT7921_PINMUX_1, val, ~0);
+}
+
 static int btmtksdio_setup(struct hci_dev *hdev)
 {
        struct btmtksdio_dev *bdev = hci_get_drvdata(hdev);
@@ -844,13 +981,13 @@ static int btmtksdio_setup(struct hci_dev *hdev)
 
        switch (bdev->data->chipid) {
        case 0x7921:
-               err = btsdio_mtk_reg_read(hdev, 0x70010200, &dev_id);
+               err = btmtksdio_mtk_reg_read(hdev, 0x70010200, &dev_id);
                if (err < 0) {
                        bt_dev_err(hdev, "Failed to get device id (%d)", err);
                        return err;
                }
 
-               err = btsdio_mtk_reg_read(hdev, 0x80021004, &fw_version);
+               err = btmtksdio_mtk_reg_read(hdev, 0x80021004, &fw_version);
                if (err < 0) {
                        bt_dev_err(hdev, "Failed to get fw version (%d)", err);
                        return err;
@@ -862,6 +999,22 @@ static int btmtksdio_setup(struct hci_dev *hdev)
                err = mt79xx_setup(hdev, fwname);
                if (err < 0)
                        return err;
+
+               err = btmtksdio_fw_pmctrl(bdev);
+               if (err < 0)
+                       return err;
+
+               err = btmtksdio_drv_pmctrl(bdev);
+               if (err < 0)
+                       return err;
+
+               /* Enable SCO over I2S/PCM */
+               err = btmtksdio_sco_setting(hdev);
+               if (err < 0) {
+                       bt_dev_err(hdev, "Failed to enable SCO setting (%d)", err);
+                       return err;
+               }
+
                break;
        case 0x7663:
        case 0x7668:
@@ -958,6 +1111,32 @@ static int btmtksdio_send_frame(struct hci_dev *hdev, struct sk_buff *skb)
        return 0;
 }
 
+static bool btmtksdio_sdio_wakeup(struct hci_dev *hdev)
+{
+       struct btmtksdio_dev *bdev = hci_get_drvdata(hdev);
+       bool may_wakeup = device_may_wakeup(bdev->dev);
+       const struct btmtk_wakeon bt_awake = {
+               .mode = 0x1,
+               .gpo = 0,
+               .active_high = 0x1,
+               .enable_delay = cpu_to_le16(0xc80),
+               .wakeup_delay = cpu_to_le16(0x20),
+       };
+
+       if (may_wakeup && bdev->data->chipid == 0x7921) {
+               struct sk_buff *skb;
+
+               skb =  __hci_cmd_sync(hdev, 0xfc27, sizeof(bt_awake),
+                                     &bt_awake, HCI_CMD_TIMEOUT);
+               if (IS_ERR(skb))
+                       may_wakeup = false;
+
+               kfree_skb(skb);
+       }
+
+       return may_wakeup;
+}
+
 static int btmtksdio_probe(struct sdio_func *func,
                           const struct sdio_device_id *id)
 {
@@ -997,6 +1176,7 @@ static int btmtksdio_probe(struct sdio_func *func,
        hdev->setup    = btmtksdio_setup;
        hdev->shutdown = btmtksdio_shutdown;
        hdev->send     = btmtksdio_send_frame;
+       hdev->wakeup   = btmtksdio_sdio_wakeup;
        hdev->set_bdaddr = btmtk_set_bdaddr;
 
        SET_HCIDEV_DEV(hdev, &func->dev);
@@ -1032,7 +1212,11 @@ static int btmtksdio_probe(struct sdio_func *func,
         */
        pm_runtime_put_noidle(bdev->dev);
 
-       return 0;
+       err = device_init_wakeup(bdev->dev, true);
+       if (err)
+               bt_dev_err(hdev, "failed to initialize device wakeup");
+
+       return err;
 }
 
 static void btmtksdio_remove(struct sdio_func *func)
@@ -1058,7 +1242,6 @@ static int btmtksdio_runtime_suspend(struct device *dev)
 {
        struct sdio_func *func = dev_to_sdio_func(dev);
        struct btmtksdio_dev *bdev;
-       u32 status;
        int err;
 
        bdev = sdio_get_drvdata(func);
@@ -1070,18 +1253,9 @@ static int btmtksdio_runtime_suspend(struct device *dev)
 
        sdio_set_host_pm_flags(func, MMC_PM_KEEP_POWER);
 
-       sdio_claim_host(bdev->func);
+       err = btmtksdio_fw_pmctrl(bdev);
 
-       sdio_writel(bdev->func, C_FW_OWN_REQ_SET, MTK_REG_CHLPCR, &err);
-       if (err < 0)
-               goto out;
-
-       err = readx_poll_timeout(btmtksdio_drv_own_query, bdev, status,
-                                !(status & C_COM_DRV_OWN), 2000, 1000000);
-out:
-       bt_dev_info(bdev->hdev, "status (%d) return ownership to device", err);
-
-       sdio_release_host(bdev->func);
+       bt_dev_dbg(bdev->hdev, "status (%d) return ownership to device", err);
 
        return err;
 }
@@ -1090,7 +1264,6 @@ static int btmtksdio_runtime_resume(struct device *dev)
 {
        struct sdio_func *func = dev_to_sdio_func(dev);
        struct btmtksdio_dev *bdev;
-       u32 status;
        int err;
 
        bdev = sdio_get_drvdata(func);
@@ -1100,18 +1273,9 @@ static int btmtksdio_runtime_resume(struct device *dev)
        if (!test_bit(BTMTKSDIO_FUNC_ENABLED, &bdev->tx_state))
                return 0;
 
-       sdio_claim_host(bdev->func);
+       err = btmtksdio_drv_pmctrl(bdev);
 
-       sdio_writel(bdev->func, C_FW_OWN_REQ_CLR, MTK_REG_CHLPCR, &err);
-       if (err < 0)
-               goto out;
-
-       err = readx_poll_timeout(btmtksdio_drv_own_query, bdev, status,
-                                status & C_COM_DRV_OWN, 2000, 1000000);
-out:
-       bt_dev_info(bdev->hdev, "status (%d) get ownership from device", err);
-
-       sdio_release_host(bdev->func);
+       bt_dev_dbg(bdev->hdev, "status (%d) get ownership from device", err);
 
        return err;
 }
index c2bdd1e..c2030f7 100644 (file)
@@ -149,6 +149,14 @@ static const struct id_table ic_id_table[] = {
          .cfg_name = "rtl_bt/rtl8761bu_config" },
 
        /* 8822C with UART interface */
+       { IC_INFO(RTL_ROM_LMP_8822B, 0xc, 0x8, HCI_UART),
+         .config_needed = true,
+         .has_rom_version = true,
+         .has_msft_ext = true,
+         .fw_name  = "rtl_bt/rtl8822cs_fw.bin",
+         .cfg_name = "rtl_bt/rtl8822cs_config" },
+
+       /* 8822C with UART interface */
        { IC_INFO(RTL_ROM_LMP_8822B, 0xc, 0xa, HCI_UART),
          .config_needed = true,
          .has_rom_version = true,
index c30d131..aefa0ee 100644 (file)
@@ -62,6 +62,7 @@ static struct usb_driver btusb_driver;
 #define BTUSB_QCA_WCN6855      0x1000000
 #define BTUSB_INTEL_BROKEN_SHUTDOWN_LED        0x2000000
 #define BTUSB_INTEL_BROKEN_INITIAL_NCMD 0x4000000
+#define BTUSB_INTEL_NO_WBS_SUPPORT     0x8000000
 
 static const struct usb_device_id btusb_table[] = {
        /* Generic Bluetooth USB device */
@@ -385,9 +386,11 @@ static const struct usb_device_id blacklist_table[] = {
        { USB_DEVICE(0x8087, 0x0033), .driver_info = BTUSB_INTEL_COMBINED },
        { USB_DEVICE(0x8087, 0x07da), .driver_info = BTUSB_CSR },
        { USB_DEVICE(0x8087, 0x07dc), .driver_info = BTUSB_INTEL_COMBINED |
+                                                    BTUSB_INTEL_NO_WBS_SUPPORT |
                                                     BTUSB_INTEL_BROKEN_INITIAL_NCMD |
                                                     BTUSB_INTEL_BROKEN_SHUTDOWN_LED },
        { USB_DEVICE(0x8087, 0x0a2a), .driver_info = BTUSB_INTEL_COMBINED |
+                                                    BTUSB_INTEL_NO_WBS_SUPPORT |
                                                     BTUSB_INTEL_BROKEN_SHUTDOWN_LED },
        { USB_DEVICE(0x8087, 0x0a2b), .driver_info = BTUSB_INTEL_COMBINED },
        { USB_DEVICE(0x8087, 0x0aa7), .driver_info = BTUSB_INTEL_COMBINED |
@@ -405,6 +408,8 @@ static const struct usb_device_id blacklist_table[] = {
                                                     BTUSB_WIDEBAND_SPEECH },
 
        /* Realtek 8852AE Bluetooth devices */
+       { USB_DEVICE(0x0bda, 0x2852), .driver_info = BTUSB_REALTEK |
+                                                    BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x0bda, 0xc852), .driver_info = BTUSB_REALTEK |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x0bda, 0x385a), .driver_info = BTUSB_REALTEK |
@@ -2057,10 +2062,10 @@ static int btusb_setup_csr(struct hci_dev *hdev)
                 * These controllers are really messed-up.
                 *
                 * 1. Their bulk RX endpoint will never report any data unless
-                * the device was suspended at least once (yes, really).
+                *    the device was suspended at least once (yes, really).
                 * 2. They will not wakeup when autosuspended and receiving data
-                * on their bulk RX endpoint from e.g. a keyboard or mouse
-                * (IOW remote-wakeup support is broken for the bulk endpoint).
+                *    on their bulk RX endpoint from e.g. a keyboard or mouse
+                *    (IOW remote-wakeup support is broken for the bulk endpoint).
                 *
                 * To fix 1. enable runtime-suspend, force-suspend the
                 * HCI and then wake-it up by disabling runtime-suspend.
@@ -3737,6 +3742,9 @@ static int btusb_probe(struct usb_interface *intf,
                hdev->send = btusb_send_frame_intel;
                hdev->cmd_timeout = btusb_intel_cmd_timeout;
 
+               if (id->driver_info & BTUSB_INTEL_NO_WBS_SUPPORT)
+                       btintel_set_flag(hdev, INTEL_ROM_LEGACY_NO_WBS_SUPPORT);
+
                if (id->driver_info & BTUSB_INTEL_BROKEN_INITIAL_NCMD)
                        btintel_set_flag(hdev, INTEL_BROKEN_INITIAL_NCMD);
 
index 34286ff..fdf504b 100644 (file)
@@ -966,6 +966,11 @@ static void h5_btrtl_open(struct h5 *h5)
                pm_runtime_enable(&h5->hu->serdev->dev);
        }
 
+       /* The controller needs reset to startup */
+       gpiod_set_value_cansleep(h5->enable_gpio, 0);
+       gpiod_set_value_cansleep(h5->device_wake_gpio, 0);
+       msleep(100);
+
        /* The controller needs up to 500ms to wakeup */
        gpiod_set_value_cansleep(h5->enable_gpio, 1);
        gpiod_set_value_cansleep(h5->device_wake_gpio, 1);
index eb1e736..4eb420a 100644 (file)
@@ -509,7 +509,7 @@ static int send_command_from_firmware(struct ll_device *lldev,
        return 0;
 }
 
-/**
+/*
  * download_firmware -
  *     internal function which parses through the .bts firmware
  *     script file intreprets SEND, DELAY actions only as of now
index 3b00d82..4cda890 100644 (file)
@@ -305,6 +305,8 @@ int hci_uart_register_device(struct hci_uart *hu,
        if (err)
                return err;
 
+       percpu_init_rwsem(&hu->proto_lock);
+
        err = p->open(hu);
        if (err)
                goto err_open;
@@ -327,7 +329,6 @@ int hci_uart_register_device(struct hci_uart *hu,
 
        INIT_WORK(&hu->init_ready, hci_uart_init_work);
        INIT_WORK(&hu->write_work, hci_uart_write_work);
-       percpu_init_rwsem(&hu->proto_lock);
 
        /* Only when vendor specific setup callback is provided, consider
         * the manufacturer information valid. This avoids filling in the
index 533e476..c98a4b0 100644 (file)
@@ -1269,6 +1269,27 @@ unwind:
        return res;
 }
 
+/* determine if the packet is NA or NS */
+static bool alb_determine_nd(struct sk_buff *skb, struct bonding *bond)
+{
+       struct ipv6hdr *ip6hdr;
+       struct icmp6hdr *hdr;
+
+       if (!pskb_network_may_pull(skb, sizeof(*ip6hdr)))
+               return true;
+
+       ip6hdr = ipv6_hdr(skb);
+       if (ip6hdr->nexthdr != IPPROTO_ICMPV6)
+               return false;
+
+       if (!pskb_network_may_pull(skb, sizeof(*ip6hdr) + sizeof(*hdr)))
+               return true;
+
+       hdr = icmp6_hdr(skb);
+       return hdr->icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT ||
+               hdr->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION;
+}
+
 /************************ exported alb functions ************************/
 
 int bond_alb_initialize(struct bonding *bond, int rlb_enabled)
@@ -1348,8 +1369,11 @@ struct slave *bond_xmit_tlb_slave_get(struct bonding *bond,
        /* Do not TX balance any multicast or broadcast */
        if (!is_multicast_ether_addr(eth_data->h_dest)) {
                switch (skb->protocol) {
-               case htons(ETH_P_IP):
                case htons(ETH_P_IPV6):
+                       if (alb_determine_nd(skb, bond))
+                               break;
+                       fallthrough;
+               case htons(ETH_P_IP):
                        hash_index = bond_xmit_hash(bond, skb);
                        if (bond->params.tlb_dynamic_lb) {
                                tx_slave = tlb_choose_channel(bond,
@@ -1432,10 +1456,12 @@ struct slave *bond_xmit_alb_slave_get(struct bonding *bond,
                        break;
                }
 
-               if (!pskb_network_may_pull(skb, sizeof(*ip6hdr))) {
+               if (alb_determine_nd(skb, bond)) {
                        do_tx_balance = false;
                        break;
                }
+
+               /* The IPv6 header is pulled by alb_determine_nd */
                /* Additionally, DAD probes should not be tx-balanced as that
                 * will lead to false positives for duplicate addresses and
                 * prevent address configuration from working.
index c0c9144..8d51c10 100644 (file)
@@ -68,17 +68,7 @@ config NET_DSA_QCA8K
          This enables support for the Qualcomm Atheros QCA8K Ethernet
          switch chips.
 
-config NET_DSA_REALTEK_SMI
-       tristate "Realtek SMI Ethernet switch family support"
-       select NET_DSA_TAG_RTL4_A
-       select NET_DSA_TAG_RTL8_4
-       select FIXED_PHY
-       select IRQ_DOMAIN
-       select REALTEK_PHY
-       select REGMAP
-       help
-         This enables support for the Realtek SMI-based switch
-         chips, currently only RTL8366RB.
+source "drivers/net/dsa/realtek/Kconfig"
 
 config NET_DSA_SMSC_LAN9303
        tristate
index 8da1569..e73838c 100644 (file)
@@ -9,8 +9,6 @@ obj-$(CONFIG_NET_DSA_LANTIQ_GSWIP) += lantiq_gswip.o
 obj-$(CONFIG_NET_DSA_MT7530)   += mt7530.o
 obj-$(CONFIG_NET_DSA_MV88E6060) += mv88e6060.o
 obj-$(CONFIG_NET_DSA_QCA8K)    += qca8k.o
-obj-$(CONFIG_NET_DSA_REALTEK_SMI) += realtek-smi.o
-realtek-smi-objs               := realtek-smi-core.o rtl8366.o rtl8366rb.o rtl8365mb.o
 obj-$(CONFIG_NET_DSA_SMSC_LAN9303) += lan9303-core.o
 obj-$(CONFIG_NET_DSA_SMSC_LAN9303_I2C) += lan9303_i2c.o
 obj-$(CONFIG_NET_DSA_SMSC_LAN9303_MDIO) += lan9303_mdio.o
@@ -23,5 +21,6 @@ obj-y                         += microchip/
 obj-y                          += mv88e6xxx/
 obj-y                          += ocelot/
 obj-y                          += qca/
+obj-y                          += realtek/
 obj-y                          += sja1105/
 obj-y                          += xrs700x/
index 3867f3d..a3b9899 100644 (file)
@@ -2186,7 +2186,7 @@ int b53_eee_init(struct dsa_switch *ds, int port, struct phy_device *phy)
 {
        int ret;
 
-       ret = phy_init_eee(phy, 0);
+       ret = phy_init_eee(phy, false);
        if (ret)
                return 0;
 
index 33499fc..9161ce4 100644 (file)
@@ -709,49 +709,25 @@ static u32 bcm_sf2_sw_get_phy_flags(struct dsa_switch *ds, int port)
                       PHY_BRCM_IDDQ_SUSPEND;
 }
 
-static void bcm_sf2_sw_validate(struct dsa_switch *ds, int port,
-                               unsigned long *supported,
-                               struct phylink_link_state *state)
+static void bcm_sf2_sw_get_caps(struct dsa_switch *ds, int port,
+                               struct phylink_config *config)
 {
+       unsigned long *interfaces = config->supported_interfaces;
        struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
-       __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
-
-       if (!phy_interface_mode_is_rgmii(state->interface) &&
-           state->interface != PHY_INTERFACE_MODE_MII &&
-           state->interface != PHY_INTERFACE_MODE_REVMII &&
-           state->interface != PHY_INTERFACE_MODE_GMII &&
-           state->interface != PHY_INTERFACE_MODE_INTERNAL &&
-           state->interface != PHY_INTERFACE_MODE_MOCA) {
-               linkmode_zero(supported);
-               if (port != core_readl(priv, CORE_IMP0_PRT_ID))
-                       dev_err(ds->dev,
-                               "Unsupported interface: %d for port %d\n",
-                               state->interface, port);
-               return;
-       }
-
-       /* Allow all the expected bits */
-       phylink_set(mask, Autoneg);
-       phylink_set_port_modes(mask);
-       phylink_set(mask, Pause);
-       phylink_set(mask, Asym_Pause);
 
-       /* With the exclusion of MII and Reverse MII, we support Gigabit,
-        * including Half duplex
-        */
-       if (state->interface != PHY_INTERFACE_MODE_MII &&
-           state->interface != PHY_INTERFACE_MODE_REVMII) {
-               phylink_set(mask, 1000baseT_Full);
-               phylink_set(mask, 1000baseT_Half);
+       if (priv->int_phy_mask & BIT(port)) {
+               __set_bit(PHY_INTERFACE_MODE_INTERNAL, interfaces);
+       } else if (priv->moca_port == port) {
+               __set_bit(PHY_INTERFACE_MODE_MOCA, interfaces);
+       } else {
+               __set_bit(PHY_INTERFACE_MODE_MII, interfaces);
+               __set_bit(PHY_INTERFACE_MODE_REVMII, interfaces);
+               __set_bit(PHY_INTERFACE_MODE_GMII, interfaces);
+               phy_interface_set_rgmii(interfaces);
        }
 
-       phylink_set(mask, 10baseT_Half);
-       phylink_set(mask, 10baseT_Full);
-       phylink_set(mask, 100baseT_Half);
-       phylink_set(mask, 100baseT_Full);
-
-       linkmode_and(supported, supported, mask);
-       linkmode_and(state->advertising, state->advertising, mask);
+       config->mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
+               MAC_10 | MAC_100 | MAC_1000;
 }
 
 static void bcm_sf2_sw_mac_config(struct dsa_switch *ds, int port,
@@ -1218,7 +1194,7 @@ static const struct dsa_switch_ops bcm_sf2_ops = {
        .get_sset_count         = bcm_sf2_sw_get_sset_count,
        .get_ethtool_phy_stats  = b53_get_ethtool_phy_stats,
        .get_phy_flags          = bcm_sf2_sw_get_phy_flags,
-       .phylink_validate       = bcm_sf2_sw_validate,
+       .phylink_get_caps       = bcm_sf2_sw_get_caps,
        .phylink_mac_config     = bcm_sf2_sw_mac_config,
        .phylink_mac_link_down  = bcm_sf2_sw_mac_link_down,
        .phylink_mac_link_up    = bcm_sf2_sw_mac_link_up,
index 991b9c6..5dc9899 100644 (file)
@@ -1461,27 +1461,22 @@ static int ksz8_setup(struct dsa_switch *ds)
        return 0;
 }
 
-static void ksz8_validate(struct dsa_switch *ds, int port,
-                         unsigned long *supported,
-                         struct phylink_link_state *state)
+static void ksz8_get_caps(struct dsa_switch *ds, int port,
+                         struct phylink_config *config)
 {
-       __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
        struct ksz_device *dev = ds->priv;
 
        if (port == dev->cpu_port) {
-               if (state->interface != PHY_INTERFACE_MODE_RMII &&
-                   state->interface != PHY_INTERFACE_MODE_MII &&
-                   state->interface != PHY_INTERFACE_MODE_NA)
-                       goto unsupported;
+               __set_bit(PHY_INTERFACE_MODE_RMII,
+                         config->supported_interfaces);
+               __set_bit(PHY_INTERFACE_MODE_MII,
+                         config->supported_interfaces);
        } else {
-               if (state->interface != PHY_INTERFACE_MODE_INTERNAL &&
-                   state->interface != PHY_INTERFACE_MODE_NA)
-                       goto unsupported;
+               __set_bit(PHY_INTERFACE_MODE_INTERNAL,
+                         config->supported_interfaces);
        }
 
-       /* Allow all the expected bits */
-       phylink_set_port_modes(mask);
-       phylink_set(mask, Autoneg);
+       config->mac_capabilities = MAC_10 | MAC_100;
 
        /* Silicon Errata Sheet (DS80000830A):
         * "Port 1 does not respond to received flow control PAUSE frames"
@@ -1489,27 +1484,11 @@ static void ksz8_validate(struct dsa_switch *ds, int port,
         * switches.
         */
        if (!ksz_is_ksz88x3(dev) || port)
-               phylink_set(mask, Pause);
+               config->mac_capabilities |= MAC_SYM_PAUSE;
 
        /* Asym pause is not supported on KSZ8863 and KSZ8873 */
        if (!ksz_is_ksz88x3(dev))
-               phylink_set(mask, Asym_Pause);
-
-       /* 10M and 100M are only supported */
-       phylink_set(mask, 10baseT_Half);
-       phylink_set(mask, 10baseT_Full);
-       phylink_set(mask, 100baseT_Half);
-       phylink_set(mask, 100baseT_Full);
-
-       linkmode_and(supported, supported, mask);
-       linkmode_and(state->advertising, state->advertising, mask);
-
-       return;
-
-unsupported:
-       linkmode_zero(supported);
-       dev_err(ds->dev, "Unsupported interface: %s, port: %d\n",
-               phy_modes(state->interface), port);
+               config->mac_capabilities |= MAC_ASYM_PAUSE;
 }
 
 static const struct dsa_switch_ops ksz8_switch_ops = {
@@ -1518,7 +1497,7 @@ static const struct dsa_switch_ops ksz8_switch_ops = {
        .setup                  = ksz8_setup,
        .phy_read               = ksz_phy_read16,
        .phy_write              = ksz_phy_write16,
-       .phylink_validate       = ksz8_validate,
+       .phylink_get_caps       = ksz8_get_caps,
        .phylink_mac_link_down  = ksz_mac_link_down,
        .port_enable            = ksz_enable_port,
        .get_strings            = ksz8_get_strings,
index 353b5f9..a85d990 100644 (file)
@@ -222,9 +222,12 @@ static int ksz9477_reset_switch(struct ksz_device *dev)
                           (BROADCAST_STORM_VALUE *
                           BROADCAST_STORM_PROT_RATE) / 100);
 
-       if (dev->synclko_125)
-               ksz_write8(dev, REG_SW_GLOBAL_OUTPUT_CTRL__1,
-                          SW_ENABLE_REFCLKO | SW_REFCLKO_IS_125MHZ);
+       data8 = SW_ENABLE_REFCLKO;
+       if (dev->synclko_disable)
+               data8 = 0;
+       else if (dev->synclko_125)
+               data8 = SW_ENABLE_REFCLKO | SW_REFCLKO_IS_125MHZ;
+       ksz_write8(dev, REG_SW_GLOBAL_OUTPUT_CTRL__1, data8);
 
        return 0;
 }
index 55dbda0..7e33ec7 100644 (file)
@@ -434,6 +434,12 @@ int ksz_switch_register(struct ksz_device *dev,
                        }
                dev->synclko_125 = of_property_read_bool(dev->dev->of_node,
                                                         "microchip,synclko-125");
+               dev->synclko_disable = of_property_read_bool(dev->dev->of_node,
+                                                            "microchip,synclko-disable");
+               if (dev->synclko_125 && dev->synclko_disable) {
+                       dev_err(dev->dev, "inconsistent synclko settings\n");
+                       return -EINVAL;
+               }
        }
 
        ret = dsa_register_switch(dev->ds);
index df8ae59..3db63f6 100644 (file)
@@ -75,6 +75,7 @@ struct ksz_device {
        u32 regs_size;
        bool phy_errata_9477;
        bool synclko_125;
+       bool synclko_disable;
 
        struct vlan_table *vlan_cache;
 
index b82512e..bc77a26 100644 (file)
@@ -2846,7 +2846,7 @@ static void mt753x_phylink_mac_link_up(struct dsa_switch *ds, int port,
                        mcr |= PMCR_RX_FC_EN;
        }
 
-       if (mode == MLO_AN_PHY && phydev && phy_init_eee(phydev, 0) >= 0) {
+       if (mode == MLO_AN_PHY && phydev && phy_init_eee(phydev, false) >= 0) {
                switch (speed) {
                case SPEED_1000:
                        mcr |= PMCR_FORCE_EEE1G;
index 58ca684..7670796 100644 (file)
@@ -86,12 +86,16 @@ int mv88e6xxx_write(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val)
 int mv88e6xxx_wait_mask(struct mv88e6xxx_chip *chip, int addr, int reg,
                        u16 mask, u16 val)
 {
+       const unsigned long timeout = jiffies + msecs_to_jiffies(50);
        u16 data;
        int err;
        int i;
 
-       /* There's no bus specific operation to wait for a mask */
-       for (i = 0; i < 16; i++) {
+       /* There's no bus specific operation to wait for a mask. Even
+        * if the initial poll takes longer than 50ms, always do at
+        * least one more attempt.
+        */
+       for (i = 0; time_before(jiffies, timeout) || (i < 2); i++) {
                err = mv88e6xxx_read(chip, addr, reg, &data);
                if (err)
                        return err;
@@ -99,7 +103,10 @@ int mv88e6xxx_wait_mask(struct mv88e6xxx_chip *chip, int addr, int reg,
                if ((data & mask) == val)
                        return 0;
 
-               usleep_range(1000, 2000);
+               if (i < 2)
+                       cpu_relax();
+               else
+                       usleep_range(1000, 2000);
        }
 
        dev_err(chip->dev, "Timeout while waiting for switch\n");
@@ -563,133 +570,249 @@ static int mv88e6xxx_serdes_pcs_link_up(struct mv88e6xxx_chip *chip, int port,
        return 0;
 }
 
-static void mv88e6065_phylink_validate(struct mv88e6xxx_chip *chip, int port,
-                                      unsigned long *mask,
-                                      struct phylink_link_state *state)
+static const u8 mv88e6185_phy_interface_modes[] = {
+       [MV88E6185_PORT_STS_CMODE_GMII_FD]       = PHY_INTERFACE_MODE_GMII,
+       [MV88E6185_PORT_STS_CMODE_MII_100_FD_PS] = PHY_INTERFACE_MODE_MII,
+       [MV88E6185_PORT_STS_CMODE_MII_100]       = PHY_INTERFACE_MODE_MII,
+       [MV88E6185_PORT_STS_CMODE_MII_10]        = PHY_INTERFACE_MODE_MII,
+       [MV88E6185_PORT_STS_CMODE_SERDES]        = PHY_INTERFACE_MODE_1000BASEX,
+       [MV88E6185_PORT_STS_CMODE_1000BASE_X]    = PHY_INTERFACE_MODE_1000BASEX,
+       [MV88E6185_PORT_STS_CMODE_PHY]           = PHY_INTERFACE_MODE_SGMII,
+};
+
+static void mv88e6185_phylink_get_caps(struct mv88e6xxx_chip *chip, int port,
+                                      struct phylink_config *config)
 {
-       if (!phy_interface_mode_is_8023z(state->interface)) {
-               /* 10M and 100M are only supported in non-802.3z mode */
-               phylink_set(mask, 10baseT_Half);
-               phylink_set(mask, 10baseT_Full);
-               phylink_set(mask, 100baseT_Half);
-               phylink_set(mask, 100baseT_Full);
-       }
+       u8 cmode = chip->ports[port].cmode;
+
+       if (cmode <= ARRAY_SIZE(mv88e6185_phy_interface_modes) &&
+           mv88e6185_phy_interface_modes[cmode])
+               __set_bit(mv88e6185_phy_interface_modes[cmode],
+                         config->supported_interfaces);
+
+       config->mac_capabilities = MAC_SYM_PAUSE | MAC_10 | MAC_100 |
+                                  MAC_1000FD;
 }
 
-static void mv88e6185_phylink_validate(struct mv88e6xxx_chip *chip, int port,
-                                      unsigned long *mask,
-                                      struct phylink_link_state *state)
-{
-       /* FIXME: if the port is in 1000Base-X mode, then it only supports
-        * 1000M FD speeds.  In this case, CMODE will indicate 5.
+static const u8 mv88e6xxx_phy_interface_modes[] = {
+       [MV88E6XXX_PORT_STS_CMODE_MII_PHY]      = PHY_INTERFACE_MODE_MII,
+       [MV88E6XXX_PORT_STS_CMODE_MII]          = PHY_INTERFACE_MODE_MII,
+       [MV88E6XXX_PORT_STS_CMODE_GMII]         = PHY_INTERFACE_MODE_GMII,
+       [MV88E6XXX_PORT_STS_CMODE_RMII_PHY]     = PHY_INTERFACE_MODE_RMII,
+       [MV88E6XXX_PORT_STS_CMODE_RMII]         = PHY_INTERFACE_MODE_RMII,
+       [MV88E6XXX_PORT_STS_CMODE_100BASEX]     = PHY_INTERFACE_MODE_100BASEX,
+       [MV88E6XXX_PORT_STS_CMODE_1000BASEX]    = PHY_INTERFACE_MODE_1000BASEX,
+       [MV88E6XXX_PORT_STS_CMODE_SGMII]        = PHY_INTERFACE_MODE_SGMII,
+       /* higher interface modes are not needed here, since ports supporting
+        * them are writable, and so the supported interfaces are filled in the
+        * corresponding .phylink_set_interfaces() implementation below
         */
-       phylink_set(mask, 1000baseT_Full);
-       phylink_set(mask, 1000baseX_Full);
+};
 
-       mv88e6065_phylink_validate(chip, port, mask, state);
+static void mv88e6xxx_translate_cmode(u8 cmode, unsigned long *supported)
+{
+       if (cmode < ARRAY_SIZE(mv88e6xxx_phy_interface_modes) &&
+           mv88e6xxx_phy_interface_modes[cmode])
+               __set_bit(mv88e6xxx_phy_interface_modes[cmode], supported);
+       else if (cmode == MV88E6XXX_PORT_STS_CMODE_RGMII)
+               phy_interface_set_rgmii(supported);
 }
 
-static void mv88e6341_phylink_validate(struct mv88e6xxx_chip *chip, int port,
-                                      unsigned long *mask,
-                                      struct phylink_link_state *state)
+static void mv88e6250_phylink_get_caps(struct mv88e6xxx_chip *chip, int port,
+                                      struct phylink_config *config)
 {
-       if (port >= 5)
-               phylink_set(mask, 2500baseX_Full);
+       unsigned long *supported = config->supported_interfaces;
 
-       /* No ethtool bits for 200Mbps */
-       phylink_set(mask, 1000baseT_Full);
-       phylink_set(mask, 1000baseX_Full);
+       /* Translate the default cmode */
+       mv88e6xxx_translate_cmode(chip->ports[port].cmode, supported);
 
-       mv88e6065_phylink_validate(chip, port, mask, state);
+       config->mac_capabilities = MAC_SYM_PAUSE | MAC_10 | MAC_100;
 }
 
-static void mv88e6352_phylink_validate(struct mv88e6xxx_chip *chip, int port,
-                                      unsigned long *mask,
-                                      struct phylink_link_state *state)
+static int mv88e6352_get_port4_serdes_cmode(struct mv88e6xxx_chip *chip)
 {
-       /* No ethtool bits for 200Mbps */
-       phylink_set(mask, 1000baseT_Full);
-       phylink_set(mask, 1000baseX_Full);
+       u16 reg, val;
+       int err;
+
+       err = mv88e6xxx_port_read(chip, 4, MV88E6XXX_PORT_STS, &reg);
+       if (err)
+               return err;
+
+       /* If PHY_DETECT is zero, then we are not in auto-media mode */
+       if (!(reg & MV88E6XXX_PORT_STS_PHY_DETECT))
+               return 0xf;
+
+       val = reg & ~MV88E6XXX_PORT_STS_PHY_DETECT;
+       err = mv88e6xxx_port_write(chip, 4, MV88E6XXX_PORT_STS, val);
+       if (err)
+               return err;
+
+       err = mv88e6xxx_port_read(chip, 4, MV88E6XXX_PORT_STS, &val);
+       if (err)
+               return err;
+
+       /* Restore PHY_DETECT value */
+       err = mv88e6xxx_port_write(chip, 4, MV88E6XXX_PORT_STS, reg);
+       if (err)
+               return err;
 
-       mv88e6065_phylink_validate(chip, port, mask, state);
+       return val & MV88E6XXX_PORT_STS_CMODE_MASK;
 }
 
-static void mv88e6390_phylink_validate(struct mv88e6xxx_chip *chip, int port,
-                                      unsigned long *mask,
-                                      struct phylink_link_state *state)
+static void mv88e6352_phylink_get_caps(struct mv88e6xxx_chip *chip, int port,
+                                      struct phylink_config *config)
 {
-       if (port >= 9) {
-               phylink_set(mask, 2500baseX_Full);
-               phylink_set(mask, 2500baseT_Full);
+       unsigned long *supported = config->supported_interfaces;
+       int err, cmode;
+
+       /* Translate the default cmode */
+       mv88e6xxx_translate_cmode(chip->ports[port].cmode, supported);
+
+       config->mac_capabilities = MAC_SYM_PAUSE | MAC_10 | MAC_100 |
+                                  MAC_1000FD;
+
+       /* Port 4 supports automedia if the serdes is associated with it. */
+       if (port == 4) {
+               mv88e6xxx_reg_lock(chip);
+               err = mv88e6352_g2_scratch_port_has_serdes(chip, port);
+               if (err < 0)
+                       dev_err(chip->dev, "p%d: failed to read scratch\n",
+                               port);
+               if (err <= 0)
+                       goto unlock;
+
+               cmode = mv88e6352_get_port4_serdes_cmode(chip);
+               if (cmode < 0)
+                       dev_err(chip->dev, "p%d: failed to read serdes cmode\n",
+                               port);
+               else
+                       mv88e6xxx_translate_cmode(cmode, supported);
+unlock:
+               mv88e6xxx_reg_unlock(chip);
        }
+}
+
+static void mv88e6341_phylink_get_caps(struct mv88e6xxx_chip *chip, int port,
+                                      struct phylink_config *config)
+{
+       unsigned long *supported = config->supported_interfaces;
+
+       /* Translate the default cmode */
+       mv88e6xxx_translate_cmode(chip->ports[port].cmode, supported);
 
        /* No ethtool bits for 200Mbps */
-       phylink_set(mask, 1000baseT_Full);
-       phylink_set(mask, 1000baseX_Full);
+       config->mac_capabilities = MAC_SYM_PAUSE | MAC_10 | MAC_100 |
+                                  MAC_1000FD;
+
+       /* The C_Mode field is programmable on port 5 */
+       if (port == 5) {
+               __set_bit(PHY_INTERFACE_MODE_SGMII, supported);
+               __set_bit(PHY_INTERFACE_MODE_1000BASEX, supported);
+               __set_bit(PHY_INTERFACE_MODE_2500BASEX, supported);
 
-       mv88e6065_phylink_validate(chip, port, mask, state);
+               config->mac_capabilities |= MAC_2500FD;
+       }
 }
 
-static void mv88e6390x_phylink_validate(struct mv88e6xxx_chip *chip, int port,
-                                       unsigned long *mask,
-                                       struct phylink_link_state *state)
+static void mv88e6390_phylink_get_caps(struct mv88e6xxx_chip *chip, int port,
+                                      struct phylink_config *config)
 {
-       if (port >= 9) {
-               phylink_set(mask, 10000baseT_Full);
-               phylink_set(mask, 10000baseKR_Full);
+       unsigned long *supported = config->supported_interfaces;
+
+       /* Translate the default cmode */
+       mv88e6xxx_translate_cmode(chip->ports[port].cmode, supported);
+
+       /* No ethtool bits for 200Mbps */
+       config->mac_capabilities = MAC_SYM_PAUSE | MAC_10 | MAC_100 |
+                                  MAC_1000FD;
+
+       /* The C_Mode field is programmable on ports 9 and 10 */
+       if (port == 9 || port == 10) {
+               __set_bit(PHY_INTERFACE_MODE_SGMII, supported);
+               __set_bit(PHY_INTERFACE_MODE_1000BASEX, supported);
+               __set_bit(PHY_INTERFACE_MODE_2500BASEX, supported);
+
+               config->mac_capabilities |= MAC_2500FD;
        }
+}
+
+static void mv88e6390x_phylink_get_caps(struct mv88e6xxx_chip *chip, int port,
+                                       struct phylink_config *config)
+{
+       unsigned long *supported = config->supported_interfaces;
 
-       mv88e6390_phylink_validate(chip, port, mask, state);
+       mv88e6390_phylink_get_caps(chip, port, config);
+
+       /* For the 6x90X, ports 2-7 can be in automedia mode.
+        * (Note that 6x90 doesn't support RXAUI nor XAUI).
+        *
+        * Port 2 can also support 1000BASE-X in automedia mode if port 9 is
+        * configured for 1000BASE-X, SGMII or 2500BASE-X.
+        * Port 3-4 can also support 1000BASE-X in automedia mode if port 9 is
+        * configured for RXAUI, 1000BASE-X, SGMII or 2500BASE-X.
+        *
+        * Port 5 can also support 1000BASE-X in automedia mode if port 10 is
+        * configured for 1000BASE-X, SGMII or 2500BASE-X.
+        * Port 6-7 can also support 1000BASE-X in automedia mode if port 10 is
+        * configured for RXAUI, 1000BASE-X, SGMII or 2500BASE-X.
+        *
+        * For now, be permissive (as the old code was) and allow 1000BASE-X
+        * on ports 2..7.
+        */
+       if (port >= 2 && port <= 7)
+               __set_bit(PHY_INTERFACE_MODE_1000BASEX, supported);
+
+       /* The C_Mode field can also be programmed for 10G speeds */
+       if (port == 9 || port == 10) {
+               __set_bit(PHY_INTERFACE_MODE_XAUI, supported);
+               __set_bit(PHY_INTERFACE_MODE_RXAUI, supported);
+
+               config->mac_capabilities |= MAC_10000FD;
+       }
 }
 
-static void mv88e6393x_phylink_validate(struct mv88e6xxx_chip *chip, int port,
-                                       unsigned long *mask,
-                                       struct phylink_link_state *state)
+static void mv88e6393x_phylink_get_caps(struct mv88e6xxx_chip *chip, int port,
+                                       struct phylink_config *config)
 {
+       unsigned long *supported = config->supported_interfaces;
        bool is_6191x =
                chip->info->prod_num == MV88E6XXX_PORT_SWITCH_ID_PROD_6191X;
 
-       if (((port == 0 || port == 9) && !is_6191x) || port == 10) {
-               phylink_set(mask, 10000baseT_Full);
-               phylink_set(mask, 10000baseKR_Full);
-               phylink_set(mask, 10000baseCR_Full);
-               phylink_set(mask, 10000baseSR_Full);
-               phylink_set(mask, 10000baseLR_Full);
-               phylink_set(mask, 10000baseLRM_Full);
-               phylink_set(mask, 10000baseER_Full);
-               phylink_set(mask, 5000baseT_Full);
-               phylink_set(mask, 2500baseX_Full);
-               phylink_set(mask, 2500baseT_Full);
-       }
+       mv88e6xxx_translate_cmode(chip->ports[port].cmode, supported);
+
+       config->mac_capabilities = MAC_SYM_PAUSE | MAC_10 | MAC_100 |
+                                  MAC_1000FD;
 
-       phylink_set(mask, 1000baseT_Full);
-       phylink_set(mask, 1000baseX_Full);
+       /* The C_Mode field can be programmed for ports 0, 9 and 10 */
+       if (port == 0 || port == 9 || port == 10) {
+               __set_bit(PHY_INTERFACE_MODE_SGMII, supported);
+               __set_bit(PHY_INTERFACE_MODE_1000BASEX, supported);
 
-       mv88e6065_phylink_validate(chip, port, mask, state);
+               /* 6191X supports >1G modes only on port 10 */
+               if (!is_6191x || port == 10) {
+                       __set_bit(PHY_INTERFACE_MODE_2500BASEX, supported);
+                       __set_bit(PHY_INTERFACE_MODE_5GBASER, supported);
+                       __set_bit(PHY_INTERFACE_MODE_10GBASER, supported);
+                       /* FIXME: USXGMII is not supported yet */
+                       /* __set_bit(PHY_INTERFACE_MODE_USXGMII, supported); */
+
+                       config->mac_capabilities |= MAC_2500FD | MAC_5000FD |
+                               MAC_10000FD;
+               }
+       }
 }
 
-static void mv88e6xxx_validate(struct dsa_switch *ds, int port,
-                              unsigned long *supported,
-                              struct phylink_link_state *state)
+static void mv88e6xxx_get_caps(struct dsa_switch *ds, int port,
+                              struct phylink_config *config)
 {
-       __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
        struct mv88e6xxx_chip *chip = ds->priv;
 
-       /* Allow all the expected bits */
-       phylink_set(mask, Autoneg);
-       phylink_set(mask, Pause);
-       phylink_set_port_modes(mask);
+       chip->info->ops->phylink_get_caps(chip, port, config);
 
-       if (chip->info->ops->phylink_validate)
-               chip->info->ops->phylink_validate(chip, port, mask, state);
-
-       linkmode_and(supported, supported, mask);
-       linkmode_and(state->advertising, state->advertising, mask);
-
-       /* We can only operate at 2500BaseX or 1000BaseX.  If requested
-        * to advertise both, only report advertising at 2500BaseX.
-        */
-       phylink_helper_basex_speed(state);
+       /* Internal ports need GMII for PHYLIB */
+       if (mv88e6xxx_phy_is_internal(ds, port))
+               __set_bit(PHY_INTERFACE_MODE_GMII,
+                         config->supported_interfaces);
 }
 
 static void mv88e6xxx_mac_config(struct dsa_switch *ds, int port,
@@ -1283,8 +1406,15 @@ static u16 mv88e6xxx_port_vlan(struct mv88e6xxx_chip *chip, int dev, int port)
 
        pvlan = 0;
 
-       /* Frames from user ports can egress any local DSA links and CPU ports,
-        * as well as any local member of their bridge group.
+       /* Frames from standalone user ports can only egress on the
+        * upstream port.
+        */
+       if (!dsa_port_bridge_dev_get(dp))
+               return BIT(dsa_switch_upstream_port(ds));
+
+       /* Frames from bridged user ports can egress any local DSA
+        * links and CPU ports, as well as any local member of their
+        * bridge group.
         */
        dsa_switch_for_each_port(other_dp, ds)
                if (other_dp->type == DSA_PORT_TYPE_CPU ||
@@ -1616,21 +1746,11 @@ static int mv88e6xxx_fid_map_vlan(struct mv88e6xxx_chip *chip,
 
 int mv88e6xxx_fid_map(struct mv88e6xxx_chip *chip, unsigned long *fid_bitmap)
 {
-       int i, err;
-       u16 fid;
-
        bitmap_zero(fid_bitmap, MV88E6XXX_N_FID);
 
-       /* Set every FID bit used by the (un)bridged ports */
-       for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) {
-               err = mv88e6xxx_port_get_fid(chip, i, &fid);
-               if (err)
-                       return err;
-
-               set_bit(fid, fid_bitmap);
-       }
-
-       /* Set every FID bit used by the VLAN entries */
+       /* Every FID has an associated VID, so walking the VTU
+        * will discover the full set of FIDs in use.
+        */
        return mv88e6xxx_vtu_walk(chip, mv88e6xxx_fid_map_vlan, fid_bitmap);
 }
 
@@ -1643,10 +1763,7 @@ static int mv88e6xxx_atu_new(struct mv88e6xxx_chip *chip, u16 *fid)
        if (err)
                return err;
 
-       /* The reset value 0x000 is used to indicate that multiple address
-        * databases are not needed. Return the next positive available.
-        */
-       *fid = find_next_zero_bit(fid_bitmap, MV88E6XXX_N_FID, 1);
+       *fid = find_first_zero_bit(fid_bitmap, MV88E6XXX_N_FID);
        if (unlikely(*fid >= mv88e6xxx_num_databases(chip)))
                return -ENOSPC;
 
@@ -2138,6 +2255,9 @@ static int mv88e6xxx_port_vlan_join(struct mv88e6xxx_chip *chip, int port,
        if (!vlan.valid) {
                memset(&vlan, 0, sizeof(vlan));
 
+               if (vid == MV88E6XXX_VID_STANDALONE)
+                       vlan.policy = true;
+
                err = mv88e6xxx_atu_new(chip, &vlan.fid);
                if (err)
                        return err;
@@ -2480,6 +2600,10 @@ static int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int port,
        if (err)
                goto unlock;
 
+       err = mv88e6xxx_port_set_map_da(chip, port, true);
+       if (err)
+               return err;
+
        err = mv88e6xxx_port_commit_pvid(chip, port);
        if (err)
                goto unlock;
@@ -2514,6 +2638,12 @@ static void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, int port,
            mv88e6xxx_port_vlan_map(chip, port))
                dev_err(ds->dev, "failed to remap in-chip Port VLAN\n");
 
+       err = mv88e6xxx_port_set_map_da(chip, port, false);
+       if (err)
+               dev_err(ds->dev,
+                       "port %d failed to restore map-DA: %pe\n",
+                       port, ERR_PTR(err));
+
        err = mv88e6xxx_port_commit_pvid(chip, port);
        if (err)
                dev_err(ds->dev,
@@ -2911,12 +3041,13 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
                return err;
 
        /* Port Control 2: don't force a good FCS, set the MTU size to
-        * 10222 bytes, disable 802.1q tags checking, don't discard tagged or
-        * untagged frames on this port, do a destination address lookup on all
-        * received packets as usual, disable ARP mirroring and don't send a
-        * copy of all transmitted/received frames on this port to the CPU.
+        * 10222 bytes, disable 802.1q tags checking, don't discard
+        * tagged or untagged frames on this port, skip destination
+        * address lookup on user ports, disable ARP mirroring and don't
+        * send a copy of all transmitted/received frames on this port
+        * to the CPU.
         */
-       err = mv88e6xxx_port_set_map_da(chip, port);
+       err = mv88e6xxx_port_set_map_da(chip, port, !dsa_is_user_port(ds, port));
        if (err)
                return err;
 
@@ -2924,8 +3055,44 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
        if (err)
                return err;
 
+       /* On chips that support it, set all downstream DSA ports'
+        * VLAN policy to TRAP. In combination with loading
+        * MV88E6XXX_VID_STANDALONE as a policy entry in the VTU, this
+        * provides a better isolation barrier between standalone
+        * ports, as the ATU is bypassed on any intermediate switches
+        * between the incoming port and the CPU.
+        */
+       if (dsa_is_downstream_port(ds, port) &&
+           chip->info->ops->port_set_policy) {
+               err = chip->info->ops->port_set_policy(chip, port,
+                                               MV88E6XXX_POLICY_MAPPING_VTU,
+                                               MV88E6XXX_POLICY_ACTION_TRAP);
+               if (err)
+                       return err;
+       }
+
+       /* User ports start out in standalone mode and 802.1Q is
+        * therefore disabled. On DSA ports, all valid VIDs are always
+        * loaded in the VTU - therefore, enable 802.1Q in order to take
+        * advantage of VLAN policy on chips that supports it.
+        */
        err = mv88e6xxx_port_set_8021q_mode(chip, port,
-                               MV88E6XXX_PORT_CTL2_8021Q_MODE_DISABLED);
+                               dsa_is_user_port(ds, port) ?
+                               MV88E6XXX_PORT_CTL2_8021Q_MODE_DISABLED :
+                               MV88E6XXX_PORT_CTL2_8021Q_MODE_SECURE);
+       if (err)
+               return err;
+
+       /* Bind MV88E6XXX_VID_STANDALONE to MV88E6XXX_FID_STANDALONE by
+        * virtue of the fact that mv88e6xxx_atu_new() will pick it as
+        * the first free FID. This will be used as the private PVID for
+        * unbridged ports. Shared (DSA and CPU) ports must also be
+        * members of this VID, in order to trap all frames assigned to
+        * it to the CPU.
+        */
+       err = mv88e6xxx_port_vlan_join(chip, port, MV88E6XXX_VID_STANDALONE,
+                                      MV88E6XXX_G1_VTU_DATA_MEMBER_TAG_UNMODIFIED,
+                                      false);
        if (err)
                return err;
 
@@ -2938,7 +3105,7 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
         * relying on their port default FID.
         */
        err = mv88e6xxx_port_vlan_join(chip, port, MV88E6XXX_VID_BRIDGED,
-                                      MV88E6XXX_G1_VTU_DATA_MEMBER_TAG_UNTAGGED,
+                                      MV88E6XXX_G1_VTU_DATA_MEMBER_TAG_UNMODIFIED,
                                       false);
        if (err)
                return err;
@@ -3577,7 +3744,7 @@ static const struct mv88e6xxx_ops mv88e6085_ops = {
        .rmu_disable = mv88e6085_g1_rmu_disable,
        .vtu_getnext = mv88e6352_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
-       .phylink_validate = mv88e6185_phylink_validate,
+       .phylink_get_caps = mv88e6185_phylink_get_caps,
        .set_max_frame_size = mv88e6185_g1_set_max_frame_size,
 };
 
@@ -3611,7 +3778,7 @@ static const struct mv88e6xxx_ops mv88e6095_ops = {
        .reset = mv88e6185_g1_reset,
        .vtu_getnext = mv88e6185_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6185_g1_vtu_loadpurge,
-       .phylink_validate = mv88e6185_phylink_validate,
+       .phylink_get_caps = mv88e6185_phylink_get_caps,
        .set_max_frame_size = mv88e6185_g1_set_max_frame_size,
 };
 
@@ -3627,6 +3794,7 @@ static const struct mv88e6xxx_ops mv88e6097_ops = {
        .port_sync_link = mv88e6185_port_sync_link,
        .port_set_speed_duplex = mv88e6185_port_set_speed_duplex,
        .port_tag_remap = mv88e6095_port_tag_remap,
+       .port_set_policy = mv88e6352_port_set_policy,
        .port_set_frame_mode = mv88e6351_port_set_frame_mode,
        .port_set_ucast_flood = mv88e6352_port_set_ucast_flood,
        .port_set_mcast_flood = mv88e6352_port_set_mcast_flood,
@@ -3657,7 +3825,7 @@ static const struct mv88e6xxx_ops mv88e6097_ops = {
        .rmu_disable = mv88e6085_g1_rmu_disable,
        .vtu_getnext = mv88e6352_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
-       .phylink_validate = mv88e6185_phylink_validate,
+       .phylink_get_caps = mv88e6185_phylink_get_caps,
        .set_max_frame_size = mv88e6185_g1_set_max_frame_size,
 };
 
@@ -3694,7 +3862,7 @@ static const struct mv88e6xxx_ops mv88e6123_ops = {
        .atu_set_hash = mv88e6165_g1_atu_set_hash,
        .vtu_getnext = mv88e6352_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
-       .phylink_validate = mv88e6185_phylink_validate,
+       .phylink_get_caps = mv88e6185_phylink_get_caps,
        .set_max_frame_size = mv88e6185_g1_set_max_frame_size,
 };
 
@@ -3735,7 +3903,7 @@ static const struct mv88e6xxx_ops mv88e6131_ops = {
        .reset = mv88e6185_g1_reset,
        .vtu_getnext = mv88e6185_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6185_g1_vtu_loadpurge,
-       .phylink_validate = mv88e6185_phylink_validate,
+       .phylink_get_caps = mv88e6185_phylink_get_caps,
 };
 
 static const struct mv88e6xxx_ops mv88e6141_ops = {
@@ -3799,7 +3967,7 @@ static const struct mv88e6xxx_ops mv88e6141_ops = {
        .serdes_get_stats = mv88e6390_serdes_get_stats,
        .serdes_get_regs_len = mv88e6390_serdes_get_regs_len,
        .serdes_get_regs = mv88e6390_serdes_get_regs,
-       .phylink_validate = mv88e6341_phylink_validate,
+       .phylink_get_caps = mv88e6341_phylink_get_caps,
 };
 
 static const struct mv88e6xxx_ops mv88e6161_ops = {
@@ -3841,7 +4009,7 @@ static const struct mv88e6xxx_ops mv88e6161_ops = {
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
        .avb_ops = &mv88e6165_avb_ops,
        .ptp_ops = &mv88e6165_ptp_ops,
-       .phylink_validate = mv88e6185_phylink_validate,
+       .phylink_get_caps = mv88e6185_phylink_get_caps,
        .set_max_frame_size = mv88e6185_g1_set_max_frame_size,
 };
 
@@ -3877,7 +4045,7 @@ static const struct mv88e6xxx_ops mv88e6165_ops = {
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
        .avb_ops = &mv88e6165_avb_ops,
        .ptp_ops = &mv88e6165_ptp_ops,
-       .phylink_validate = mv88e6185_phylink_validate,
+       .phylink_get_caps = mv88e6185_phylink_get_caps,
 };
 
 static const struct mv88e6xxx_ops mv88e6171_ops = {
@@ -3919,7 +4087,7 @@ static const struct mv88e6xxx_ops mv88e6171_ops = {
        .atu_set_hash = mv88e6165_g1_atu_set_hash,
        .vtu_getnext = mv88e6352_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
-       .phylink_validate = mv88e6185_phylink_validate,
+       .phylink_get_caps = mv88e6185_phylink_get_caps,
 };
 
 static const struct mv88e6xxx_ops mv88e6172_ops = {
@@ -3974,7 +4142,7 @@ static const struct mv88e6xxx_ops mv88e6172_ops = {
        .serdes_get_regs_len = mv88e6352_serdes_get_regs_len,
        .serdes_get_regs = mv88e6352_serdes_get_regs,
        .gpio_ops = &mv88e6352_gpio_ops,
-       .phylink_validate = mv88e6352_phylink_validate,
+       .phylink_get_caps = mv88e6352_phylink_get_caps,
 };
 
 static const struct mv88e6xxx_ops mv88e6175_ops = {
@@ -4016,7 +4184,7 @@ static const struct mv88e6xxx_ops mv88e6175_ops = {
        .atu_set_hash = mv88e6165_g1_atu_set_hash,
        .vtu_getnext = mv88e6352_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
-       .phylink_validate = mv88e6185_phylink_validate,
+       .phylink_get_caps = mv88e6185_phylink_get_caps,
 };
 
 static const struct mv88e6xxx_ops mv88e6176_ops = {
@@ -4074,7 +4242,7 @@ static const struct mv88e6xxx_ops mv88e6176_ops = {
        .serdes_get_regs_len = mv88e6352_serdes_get_regs_len,
        .serdes_get_regs = mv88e6352_serdes_get_regs,
        .gpio_ops = &mv88e6352_gpio_ops,
-       .phylink_validate = mv88e6352_phylink_validate,
+       .phylink_get_caps = mv88e6352_phylink_get_caps,
 };
 
 static const struct mv88e6xxx_ops mv88e6185_ops = {
@@ -4113,7 +4281,7 @@ static const struct mv88e6xxx_ops mv88e6185_ops = {
        .reset = mv88e6185_g1_reset,
        .vtu_getnext = mv88e6185_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6185_g1_vtu_loadpurge,
-       .phylink_validate = mv88e6185_phylink_validate,
+       .phylink_get_caps = mv88e6185_phylink_get_caps,
        .set_max_frame_size = mv88e6185_g1_set_max_frame_size,
 };
 
@@ -4175,7 +4343,7 @@ static const struct mv88e6xxx_ops mv88e6190_ops = {
        .serdes_get_regs_len = mv88e6390_serdes_get_regs_len,
        .serdes_get_regs = mv88e6390_serdes_get_regs,
        .gpio_ops = &mv88e6352_gpio_ops,
-       .phylink_validate = mv88e6390_phylink_validate,
+       .phylink_get_caps = mv88e6390_phylink_get_caps,
 };
 
 static const struct mv88e6xxx_ops mv88e6190x_ops = {
@@ -4236,7 +4404,7 @@ static const struct mv88e6xxx_ops mv88e6190x_ops = {
        .serdes_get_regs_len = mv88e6390_serdes_get_regs_len,
        .serdes_get_regs = mv88e6390_serdes_get_regs,
        .gpio_ops = &mv88e6352_gpio_ops,
-       .phylink_validate = mv88e6390x_phylink_validate,
+       .phylink_get_caps = mv88e6390x_phylink_get_caps,
 };
 
 static const struct mv88e6xxx_ops mv88e6191_ops = {
@@ -4296,7 +4464,7 @@ static const struct mv88e6xxx_ops mv88e6191_ops = {
        .serdes_get_regs = mv88e6390_serdes_get_regs,
        .avb_ops = &mv88e6390_avb_ops,
        .ptp_ops = &mv88e6352_ptp_ops,
-       .phylink_validate = mv88e6390_phylink_validate,
+       .phylink_get_caps = mv88e6390_phylink_get_caps,
 };
 
 static const struct mv88e6xxx_ops mv88e6240_ops = {
@@ -4356,7 +4524,7 @@ static const struct mv88e6xxx_ops mv88e6240_ops = {
        .gpio_ops = &mv88e6352_gpio_ops,
        .avb_ops = &mv88e6352_avb_ops,
        .ptp_ops = &mv88e6352_ptp_ops,
-       .phylink_validate = mv88e6352_phylink_validate,
+       .phylink_get_caps = mv88e6352_phylink_get_caps,
 };
 
 static const struct mv88e6xxx_ops mv88e6250_ops = {
@@ -4396,7 +4564,7 @@ static const struct mv88e6xxx_ops mv88e6250_ops = {
        .vtu_loadpurge = mv88e6185_g1_vtu_loadpurge,
        .avb_ops = &mv88e6352_avb_ops,
        .ptp_ops = &mv88e6250_ptp_ops,
-       .phylink_validate = mv88e6065_phylink_validate,
+       .phylink_get_caps = mv88e6250_phylink_get_caps,
 };
 
 static const struct mv88e6xxx_ops mv88e6290_ops = {
@@ -4458,7 +4626,7 @@ static const struct mv88e6xxx_ops mv88e6290_ops = {
        .gpio_ops = &mv88e6352_gpio_ops,
        .avb_ops = &mv88e6390_avb_ops,
        .ptp_ops = &mv88e6352_ptp_ops,
-       .phylink_validate = mv88e6390_phylink_validate,
+       .phylink_get_caps = mv88e6390_phylink_get_caps,
 };
 
 static const struct mv88e6xxx_ops mv88e6320_ops = {
@@ -4502,7 +4670,7 @@ static const struct mv88e6xxx_ops mv88e6320_ops = {
        .gpio_ops = &mv88e6352_gpio_ops,
        .avb_ops = &mv88e6352_avb_ops,
        .ptp_ops = &mv88e6352_ptp_ops,
-       .phylink_validate = mv88e6185_phylink_validate,
+       .phylink_get_caps = mv88e6185_phylink_get_caps,
 };
 
 static const struct mv88e6xxx_ops mv88e6321_ops = {
@@ -4544,7 +4712,7 @@ static const struct mv88e6xxx_ops mv88e6321_ops = {
        .gpio_ops = &mv88e6352_gpio_ops,
        .avb_ops = &mv88e6352_avb_ops,
        .ptp_ops = &mv88e6352_ptp_ops,
-       .phylink_validate = mv88e6185_phylink_validate,
+       .phylink_get_caps = mv88e6185_phylink_get_caps,
 };
 
 static const struct mv88e6xxx_ops mv88e6341_ops = {
@@ -4610,7 +4778,7 @@ static const struct mv88e6xxx_ops mv88e6341_ops = {
        .serdes_get_stats = mv88e6390_serdes_get_stats,
        .serdes_get_regs_len = mv88e6390_serdes_get_regs_len,
        .serdes_get_regs = mv88e6390_serdes_get_regs,
-       .phylink_validate = mv88e6341_phylink_validate,
+       .phylink_get_caps = mv88e6341_phylink_get_caps,
 };
 
 static const struct mv88e6xxx_ops mv88e6350_ops = {
@@ -4652,7 +4820,7 @@ static const struct mv88e6xxx_ops mv88e6350_ops = {
        .atu_set_hash = mv88e6165_g1_atu_set_hash,
        .vtu_getnext = mv88e6352_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
-       .phylink_validate = mv88e6185_phylink_validate,
+       .phylink_get_caps = mv88e6185_phylink_get_caps,
 };
 
 static const struct mv88e6xxx_ops mv88e6351_ops = {
@@ -4696,7 +4864,7 @@ static const struct mv88e6xxx_ops mv88e6351_ops = {
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
        .avb_ops = &mv88e6352_avb_ops,
        .ptp_ops = &mv88e6352_ptp_ops,
-       .phylink_validate = mv88e6185_phylink_validate,
+       .phylink_get_caps = mv88e6185_phylink_get_caps,
 };
 
 static const struct mv88e6xxx_ops mv88e6352_ops = {
@@ -4759,7 +4927,7 @@ static const struct mv88e6xxx_ops mv88e6352_ops = {
        .serdes_get_stats = mv88e6352_serdes_get_stats,
        .serdes_get_regs_len = mv88e6352_serdes_get_regs_len,
        .serdes_get_regs = mv88e6352_serdes_get_regs,
-       .phylink_validate = mv88e6352_phylink_validate,
+       .phylink_get_caps = mv88e6352_phylink_get_caps,
 };
 
 static const struct mv88e6xxx_ops mv88e6390_ops = {
@@ -4824,7 +4992,7 @@ static const struct mv88e6xxx_ops mv88e6390_ops = {
        .serdes_get_stats = mv88e6390_serdes_get_stats,
        .serdes_get_regs_len = mv88e6390_serdes_get_regs_len,
        .serdes_get_regs = mv88e6390_serdes_get_regs,
-       .phylink_validate = mv88e6390_phylink_validate,
+       .phylink_get_caps = mv88e6390_phylink_get_caps,
 };
 
 static const struct mv88e6xxx_ops mv88e6390x_ops = {
@@ -4888,7 +5056,7 @@ static const struct mv88e6xxx_ops mv88e6390x_ops = {
        .gpio_ops = &mv88e6352_gpio_ops,
        .avb_ops = &mv88e6390_avb_ops,
        .ptp_ops = &mv88e6352_ptp_ops,
-       .phylink_validate = mv88e6390x_phylink_validate,
+       .phylink_get_caps = mv88e6390x_phylink_get_caps,
 };
 
 static const struct mv88e6xxx_ops mv88e6393x_ops = {
@@ -4952,7 +5120,7 @@ static const struct mv88e6xxx_ops mv88e6393x_ops = {
        .gpio_ops = &mv88e6352_gpio_ops,
        .avb_ops = &mv88e6390_avb_ops,
        .ptp_ops = &mv88e6352_ptp_ops,
-       .phylink_validate = mv88e6393x_phylink_validate,
+       .phylink_get_caps = mv88e6393x_phylink_get_caps,
 };
 
 static const struct mv88e6xxx_info mv88e6xxx_table[] = {
@@ -6221,7 +6389,7 @@ static const struct dsa_switch_ops mv88e6xxx_switch_ops = {
        .teardown               = mv88e6xxx_teardown,
        .port_setup             = mv88e6xxx_port_setup,
        .port_teardown          = mv88e6xxx_port_teardown,
-       .phylink_validate       = mv88e6xxx_validate,
+       .phylink_get_caps       = mv88e6xxx_get_caps,
        .phylink_mac_link_state = mv88e6xxx_serdes_pcs_get_state,
        .phylink_mac_config     = mv88e6xxx_mac_config,
        .phylink_mac_an_restart = mv88e6xxx_serdes_pcs_an_restart,
index 8271b8a..12aa637 100644 (file)
@@ -179,6 +179,7 @@ struct mv88e6xxx_vtu_entry {
        u16     fid;
        u8      sid;
        bool    valid;
+       bool    policy;
        u8      member[DSA_MAX_PORTS];
        u8      state[DSA_MAX_PORTS];
 };
@@ -392,6 +393,7 @@ struct mv88e6xxx_chip {
 struct mv88e6xxx_bus_ops {
        int (*read)(struct mv88e6xxx_chip *chip, int addr, int reg, u16 *val);
        int (*write)(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val);
+       int (*init)(struct mv88e6xxx_chip *chip);
 };
 
 struct mv88e6xxx_mdio_bus {
@@ -609,9 +611,8 @@ struct mv88e6xxx_ops {
        const struct mv88e6xxx_ptp_ops *ptp_ops;
 
        /* Phylink */
-       void (*phylink_validate)(struct mv88e6xxx_chip *chip, int port,
-                                unsigned long *mask,
-                                struct phylink_link_state *state);
+       void (*phylink_get_caps)(struct mv88e6xxx_chip *chip, int port,
+                                struct phylink_config *config);
 
        /* Max Frame Size */
        int (*set_max_frame_size)(struct mv88e6xxx_chip *chip, int mtu);
index 4f3dbb0..2c1607c 100644 (file)
@@ -46,6 +46,7 @@
 
 /* Offset 0x02: VTU FID Register */
 #define MV88E6352_G1_VTU_FID           0x02
+#define MV88E6352_G1_VTU_FID_VID_POLICY        0x1000
 #define MV88E6352_G1_VTU_FID_MASK      0x0fff
 
 /* Offset 0x03: VTU SID Register */
index ae12c98..b1bd927 100644 (file)
@@ -27,7 +27,7 @@ static int mv88e6xxx_g1_vtu_fid_read(struct mv88e6xxx_chip *chip,
                return err;
 
        entry->fid = val & MV88E6352_G1_VTU_FID_MASK;
-
+       entry->policy = !!(val & MV88E6352_G1_VTU_FID_VID_POLICY);
        return 0;
 }
 
@@ -36,6 +36,9 @@ static int mv88e6xxx_g1_vtu_fid_write(struct mv88e6xxx_chip *chip,
 {
        u16 val = entry->fid & MV88E6352_G1_VTU_FID_MASK;
 
+       if (entry->policy)
+               val |= MV88E6352_G1_VTU_FID_VID_POLICY;
+
        return mv88e6xxx_g1_write(chip, MV88E6352_G1_VTU_FID, val);
 }
 
index f3e2757..807aeaa 100644 (file)
 #define MV88E6352_G2_SCRATCH_CONFIG_DATA1_NO_CPU       BIT(2)
 #define MV88E6352_G2_SCRATCH_CONFIG_DATA2      0x72
 #define MV88E6352_G2_SCRATCH_CONFIG_DATA2_P0_MODE_MASK 0x3
+#define MV88E6352_G2_SCRATCH_CONFIG_DATA3      0x73
+#define MV88E6352_G2_SCRATCH_CONFIG_DATA3_S_SEL                BIT(1)
 
 #define MV88E6352_G2_SCRATCH_GPIO_PCTL_GPIO    0
 #define MV88E6352_G2_SCRATCH_GPIO_PCTL_TRIG    1
@@ -370,6 +372,7 @@ extern const struct mv88e6xxx_gpio_ops mv88e6352_gpio_ops;
 
 int mv88e6xxx_g2_scratch_gpio_set_smi(struct mv88e6xxx_chip *chip,
                                      bool external);
+int mv88e6352_g2_scratch_port_has_serdes(struct mv88e6xxx_chip *chip, int port);
 int mv88e6xxx_g2_atu_stats_set(struct mv88e6xxx_chip *chip, u16 kind, u16 bin);
 int mv88e6xxx_g2_atu_stats_get(struct mv88e6xxx_chip *chip, u16 *stats);
 
index eda7100..a9d6e40 100644 (file)
@@ -289,3 +289,31 @@ int mv88e6xxx_g2_scratch_gpio_set_smi(struct mv88e6xxx_chip *chip,
 
        return mv88e6xxx_g2_scratch_write(chip, misc_cfg, val);
 }
+
+/**
+ * mv88e6352_g2_scratch_port_has_serdes - indicate if a port can have a serdes
+ * @chip: chip private data
+ * @port: port number to check for serdes
+ *
+ * Indicates whether the port may have a serdes attached according to the
+ * pin strapping. Returns negative error number, 0 if the port is not
+ * configured to have a serdes, and 1 if the port is configured to have a
+ * serdes attached.
+ */
+int mv88e6352_g2_scratch_port_has_serdes(struct mv88e6xxx_chip *chip, int port)
+{
+       u8 config3, p;
+       int err;
+
+       err = mv88e6xxx_g2_scratch_read(chip, MV88E6352_G2_SCRATCH_CONFIG_DATA3,
+                                       &config3);
+       if (err)
+               return err;
+
+       if (config3 & MV88E6352_G2_SCRATCH_CONFIG_DATA3_S_SEL)
+               p = 5;
+       else
+               p = 4;
+
+       return port == p;
+}
index ab41619..ceb4501 100644 (file)
@@ -1278,7 +1278,7 @@ int mv88e6xxx_port_drop_untagged(struct mv88e6xxx_chip *chip, int port,
        return mv88e6xxx_port_write(chip, port, MV88E6XXX_PORT_CTL2, new);
 }
 
-int mv88e6xxx_port_set_map_da(struct mv88e6xxx_chip *chip, int port)
+int mv88e6xxx_port_set_map_da(struct mv88e6xxx_chip *chip, int port, bool map)
 {
        u16 reg;
        int err;
@@ -1287,7 +1287,10 @@ int mv88e6xxx_port_set_map_da(struct mv88e6xxx_chip *chip, int port)
        if (err)
                return err;
 
-       reg |= MV88E6XXX_PORT_CTL2_MAP_DA;
+       if (map)
+               reg |= MV88E6XXX_PORT_CTL2_MAP_DA;
+       else
+               reg &= ~MV88E6XXX_PORT_CTL2_MAP_DA;
 
        return mv88e6xxx_port_write(chip, port, MV88E6XXX_PORT_CTL2, reg);
 }
index 03382b6..3a13db2 100644 (file)
 #define MV88E6XXX_PORT_STS_TX_PAUSED           0x0020
 #define MV88E6XXX_PORT_STS_FLOW_CTL            0x0010
 #define MV88E6XXX_PORT_STS_CMODE_MASK          0x000f
+#define MV88E6XXX_PORT_STS_CMODE_MII_PHY       0x0001
+#define MV88E6XXX_PORT_STS_CMODE_MII           0x0002
+#define MV88E6XXX_PORT_STS_CMODE_GMII          0x0003
+#define MV88E6XXX_PORT_STS_CMODE_RMII_PHY      0x0004
+#define MV88E6XXX_PORT_STS_CMODE_RMII          0x0005
 #define MV88E6XXX_PORT_STS_CMODE_RGMII         0x0007
 #define MV88E6XXX_PORT_STS_CMODE_100BASEX      0x0008
 #define MV88E6XXX_PORT_STS_CMODE_1000BASEX     0x0009
@@ -425,7 +430,7 @@ int mv88e6185_port_get_cmode(struct mv88e6xxx_chip *chip, int port, u8 *cmode);
 int mv88e6352_port_get_cmode(struct mv88e6xxx_chip *chip, int port, u8 *cmode);
 int mv88e6xxx_port_drop_untagged(struct mv88e6xxx_chip *chip, int port,
                                 bool drop_untagged);
-int mv88e6xxx_port_set_map_da(struct mv88e6xxx_chip *chip, int port);
+int mv88e6xxx_port_set_map_da(struct mv88e6xxx_chip *chip, int port, bool map);
 int mv88e6095_port_set_upstream_port(struct mv88e6xxx_chip *chip, int port,
                                     int upstream_port);
 int mv88e6xxx_port_set_mirror(struct mv88e6xxx_chip *chip, int port,
index 2b05ead..6a177bf 100644 (file)
@@ -272,14 +272,6 @@ int mv88e6352_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
        return lane;
 }
 
-static bool mv88e6352_port_has_serdes(struct mv88e6xxx_chip *chip, int port)
-{
-       if (mv88e6xxx_serdes_get_lane(chip, port) >= 0)
-               return true;
-
-       return false;
-}
-
 struct mv88e6352_serdes_hw_stat {
        char string[ETH_GSTRING_LEN];
        int sizeof_stat;
@@ -293,20 +285,24 @@ static struct mv88e6352_serdes_hw_stat mv88e6352_serdes_hw_stats[] = {
 
 int mv88e6352_serdes_get_sset_count(struct mv88e6xxx_chip *chip, int port)
 {
-       if (mv88e6352_port_has_serdes(chip, port))
-               return ARRAY_SIZE(mv88e6352_serdes_hw_stats);
+       int err;
 
-       return 0;
+       err = mv88e6352_g2_scratch_port_has_serdes(chip, port);
+       if (err <= 0)
+               return err;
+
+       return ARRAY_SIZE(mv88e6352_serdes_hw_stats);
 }
 
 int mv88e6352_serdes_get_strings(struct mv88e6xxx_chip *chip,
                                 int port, uint8_t *data)
 {
        struct mv88e6352_serdes_hw_stat *stat;
-       int i;
+       int err, i;
 
-       if (!mv88e6352_port_has_serdes(chip, port))
-               return 0;
+       err = mv88e6352_g2_scratch_port_has_serdes(chip, port);
+       if (err <= 0)
+               return err;
 
        for (i = 0; i < ARRAY_SIZE(mv88e6352_serdes_hw_stats); i++) {
                stat = &mv88e6352_serdes_hw_stats[i];
@@ -348,11 +344,12 @@ int mv88e6352_serdes_get_stats(struct mv88e6xxx_chip *chip, int port,
 {
        struct mv88e6xxx_port *mv88e6xxx_port = &chip->ports[port];
        struct mv88e6352_serdes_hw_stat *stat;
+       int i, err;
        u64 value;
-       int i;
 
-       if (!mv88e6352_port_has_serdes(chip, port))
-               return 0;
+       err = mv88e6352_g2_scratch_port_has_serdes(chip, port);
+       if (err <= 0)
+               return err;
 
        BUILD_BUG_ON(ARRAY_SIZE(mv88e6352_serdes_hw_stats) >
                     ARRAY_SIZE(mv88e6xxx_port->serdes_stats));
@@ -419,8 +416,13 @@ unsigned int mv88e6352_serdes_irq_mapping(struct mv88e6xxx_chip *chip, int port)
 
 int mv88e6352_serdes_get_regs_len(struct mv88e6xxx_chip *chip, int port)
 {
-       if (!mv88e6352_port_has_serdes(chip, port))
-               return 0;
+       int err;
+
+       mv88e6xxx_reg_lock(chip);
+       err = mv88e6352_g2_scratch_port_has_serdes(chip, port);
+       mv88e6xxx_reg_unlock(chip);
+       if (err <= 0)
+               return err;
 
        return 32 * sizeof(u16);
 }
@@ -432,7 +434,8 @@ void mv88e6352_serdes_get_regs(struct mv88e6xxx_chip *chip, int port, void *_p)
        int err;
        int i;
 
-       if (!mv88e6352_port_has_serdes(chip, port))
+       err = mv88e6352_g2_scratch_port_has_serdes(chip, port);
+       if (err <= 0)
                return;
 
        for (i = 0 ; i < 32; i++) {
index 282fe08..a990271 100644 (file)
@@ -55,11 +55,15 @@ static int mv88e6xxx_smi_direct_write(struct mv88e6xxx_chip *chip,
 static int mv88e6xxx_smi_direct_wait(struct mv88e6xxx_chip *chip,
                                     int dev, int reg, int bit, int val)
 {
+       const unsigned long timeout = jiffies + msecs_to_jiffies(50);
        u16 data;
        int err;
        int i;
 
-       for (i = 0; i < 16; i++) {
+       /* Even if the initial poll takes longer than 50ms, always do
+        * at least one more attempt.
+        */
+       for (i = 0; time_before(jiffies, timeout) || (i < 2); i++) {
                err = mv88e6xxx_smi_direct_read(chip, dev, reg, &data);
                if (err)
                        return err;
@@ -67,7 +71,10 @@ static int mv88e6xxx_smi_direct_wait(struct mv88e6xxx_chip *chip,
                if (!!(data & BIT(bit)) == !!val)
                        return 0;
 
-               usleep_range(1000, 2000);
+               if (i < 2)
+                       cpu_relax();
+               else
+                       usleep_range(1000, 2000);
        }
 
        return -ETIMEDOUT;
@@ -104,11 +111,6 @@ static int mv88e6xxx_smi_indirect_read(struct mv88e6xxx_chip *chip,
 {
        int err;
 
-       err = mv88e6xxx_smi_direct_wait(chip, chip->sw_addr,
-                                       MV88E6XXX_SMI_CMD, 15, 0);
-       if (err)
-               return err;
-
        err = mv88e6xxx_smi_direct_write(chip, chip->sw_addr,
                                         MV88E6XXX_SMI_CMD,
                                         MV88E6XXX_SMI_CMD_BUSY |
@@ -132,11 +134,6 @@ static int mv88e6xxx_smi_indirect_write(struct mv88e6xxx_chip *chip,
 {
        int err;
 
-       err = mv88e6xxx_smi_direct_wait(chip, chip->sw_addr,
-                                       MV88E6XXX_SMI_CMD, 15, 0);
-       if (err)
-               return err;
-
        err = mv88e6xxx_smi_direct_write(chip, chip->sw_addr,
                                         MV88E6XXX_SMI_DATA, data);
        if (err)
@@ -155,9 +152,20 @@ static int mv88e6xxx_smi_indirect_write(struct mv88e6xxx_chip *chip,
                                         MV88E6XXX_SMI_CMD, 15, 0);
 }
 
+static int mv88e6xxx_smi_indirect_init(struct mv88e6xxx_chip *chip)
+{
+       /* Ensure that the chip starts out in the ready state. As both
+        * reads and writes always ensure this on return, they can
+        * safely depend on the chip not being busy on entry.
+        */
+       return mv88e6xxx_smi_direct_wait(chip, chip->sw_addr,
+                                        MV88E6XXX_SMI_CMD, 15, 0);
+}
+
 static const struct mv88e6xxx_bus_ops mv88e6xxx_smi_indirect_ops = {
        .read = mv88e6xxx_smi_indirect_read,
        .write = mv88e6xxx_smi_indirect_write,
+       .init = mv88e6xxx_smi_indirect_init,
 };
 
 int mv88e6xxx_smi_init(struct mv88e6xxx_chip *chip,
@@ -175,5 +183,8 @@ int mv88e6xxx_smi_init(struct mv88e6xxx_chip *chip,
        chip->bus = bus;
        chip->sw_addr = sw_addr;
 
+       if (chip->smi_ops->init)
+               return chip->smi_ops->init(chip);
+
        return 0;
 }
index da0d7e6..3bda701 100644 (file)
@@ -499,52 +499,27 @@ static enum dsa_tag_protocol ar9331_sw_get_tag_protocol(struct dsa_switch *ds,
        return DSA_TAG_PROTO_AR9331;
 }
 
-static void ar9331_sw_phylink_validate(struct dsa_switch *ds, int port,
-                                      unsigned long *supported,
-                                      struct phylink_link_state *state)
+static void ar9331_sw_phylink_get_caps(struct dsa_switch *ds, int port,
+                                      struct phylink_config *config)
 {
-       __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
+       config->mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
+               MAC_10 | MAC_100;
 
        switch (port) {
        case 0:
-               if (state->interface != PHY_INTERFACE_MODE_GMII)
-                       goto unsupported;
-
-               phylink_set(mask, 1000baseT_Full);
-               phylink_set(mask, 1000baseT_Half);
+               __set_bit(PHY_INTERFACE_MODE_GMII,
+                         config->supported_interfaces);
+               config->mac_capabilities |= MAC_1000;
                break;
        case 1:
        case 2:
        case 3:
        case 4:
        case 5:
-               if (state->interface != PHY_INTERFACE_MODE_INTERNAL)
-                       goto unsupported;
+               __set_bit(PHY_INTERFACE_MODE_INTERNAL,
+                         config->supported_interfaces);
                break;
-       default:
-               linkmode_zero(supported);
-               dev_err(ds->dev, "Unsupported port: %i\n", port);
-               return;
        }
-
-       phylink_set_port_modes(mask);
-       phylink_set(mask, Pause);
-       phylink_set(mask, Asym_Pause);
-
-       phylink_set(mask, 10baseT_Half);
-       phylink_set(mask, 10baseT_Full);
-       phylink_set(mask, 100baseT_Half);
-       phylink_set(mask, 100baseT_Full);
-
-       linkmode_and(supported, supported, mask);
-       linkmode_and(state->advertising, state->advertising, mask);
-
-       return;
-
-unsupported:
-       linkmode_zero(supported);
-       dev_err(ds->dev, "Unsupported interface: %d, port: %d\n",
-               state->interface, port);
 }
 
 static void ar9331_sw_phylink_mac_config(struct dsa_switch *ds, int port,
@@ -697,7 +672,7 @@ static const struct dsa_switch_ops ar9331_sw_ops = {
        .get_tag_protocol       = ar9331_sw_get_tag_protocol,
        .setup                  = ar9331_sw_setup,
        .port_disable           = ar9331_sw_port_disable,
-       .phylink_validate       = ar9331_sw_phylink_validate,
+       .phylink_get_caps       = ar9331_sw_phylink_get_caps,
        .phylink_mac_config     = ar9331_sw_phylink_mac_config,
        .phylink_mac_link_down  = ar9331_sw_phylink_mac_link_down,
        .phylink_mac_link_up    = ar9331_sw_phylink_mac_link_up,
index 0396945..83066af 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/phylink.h>
 #include <linux/gpio/consumer.h>
 #include <linux/etherdevice.h>
+#include <linux/dsa/tag_qca.h>
 
 #include "qca8k.h"
 
@@ -74,12 +75,6 @@ static const struct qca8k_mib_desc ar8327_mib[] = {
        MIB_DESC(1, 0xac, "TXUnicast"),
 };
 
-/* The 32bit switch registers are accessed indirectly. To achieve this we need
- * to set the page of the register. Track the last page that was set to reduce
- * mdio writes
- */
-static u16 qca8k_current_page = 0xffff;
-
 static void
 qca8k_split_addr(u32 regaddr, u16 *r1, u16 *r2, u16 *page)
 {
@@ -94,6 +89,44 @@ qca8k_split_addr(u32 regaddr, u16 *r1, u16 *r2, u16 *page)
 }
 
 static int
+qca8k_set_lo(struct qca8k_priv *priv, int phy_id, u32 regnum, u16 lo)
+{
+       u16 *cached_lo = &priv->mdio_cache.lo;
+       struct mii_bus *bus = priv->bus;
+       int ret;
+
+       if (lo == *cached_lo)
+               return 0;
+
+       ret = bus->write(bus, phy_id, regnum, lo);
+       if (ret < 0)
+               dev_err_ratelimited(&bus->dev,
+                                   "failed to write qca8k 32bit lo register\n");
+
+       *cached_lo = lo;
+       return 0;
+}
+
+static int
+qca8k_set_hi(struct qca8k_priv *priv, int phy_id, u32 regnum, u16 hi)
+{
+       u16 *cached_hi = &priv->mdio_cache.hi;
+       struct mii_bus *bus = priv->bus;
+       int ret;
+
+       if (hi == *cached_hi)
+               return 0;
+
+       ret = bus->write(bus, phy_id, regnum, hi);
+       if (ret < 0)
+               dev_err_ratelimited(&bus->dev,
+                                   "failed to write qca8k 32bit hi register\n");
+
+       *cached_hi = hi;
+       return 0;
+}
+
+static int
 qca8k_mii_read32(struct mii_bus *bus, int phy_id, u32 regnum, u32 *val)
 {
        int ret;
@@ -116,7 +149,7 @@ qca8k_mii_read32(struct mii_bus *bus, int phy_id, u32 regnum, u32 *val)
 }
 
 static void
-qca8k_mii_write32(struct mii_bus *bus, int phy_id, u32 regnum, u32 val)
+qca8k_mii_write32(struct qca8k_priv *priv, int phy_id, u32 regnum, u32 val)
 {
        u16 lo, hi;
        int ret;
@@ -124,20 +157,19 @@ qca8k_mii_write32(struct mii_bus *bus, int phy_id, u32 regnum, u32 val)
        lo = val & 0xffff;
        hi = (u16)(val >> 16);
 
-       ret = bus->write(bus, phy_id, regnum, lo);
+       ret = qca8k_set_lo(priv, phy_id, regnum, lo);
        if (ret >= 0)
-               ret = bus->write(bus, phy_id, regnum + 1, hi);
-       if (ret < 0)
-               dev_err_ratelimited(&bus->dev,
-                                   "failed to write qca8k 32bit register\n");
+               ret = qca8k_set_hi(priv, phy_id, regnum + 1, hi);
 }
 
 static int
-qca8k_set_page(struct mii_bus *bus, u16 page)
+qca8k_set_page(struct qca8k_priv *priv, u16 page)
 {
+       u16 *cached_page = &priv->mdio_cache.page;
+       struct mii_bus *bus = priv->bus;
        int ret;
 
-       if (page == qca8k_current_page)
+       if (page == *cached_page)
                return 0;
 
        ret = bus->write(bus, 0x18, 0, page);
@@ -147,7 +179,7 @@ qca8k_set_page(struct mii_bus *bus, u16 page)
                return ret;
        }
 
-       qca8k_current_page = page;
+       *cached_page = page;
        usleep_range(1000, 2000);
        return 0;
 }
@@ -170,6 +202,252 @@ qca8k_rmw(struct qca8k_priv *priv, u32 reg, u32 mask, u32 write_val)
        return regmap_update_bits(priv->regmap, reg, mask, write_val);
 }
 
+static void qca8k_rw_reg_ack_handler(struct dsa_switch *ds, struct sk_buff *skb)
+{
+       struct qca8k_mgmt_eth_data *mgmt_eth_data;
+       struct qca8k_priv *priv = ds->priv;
+       struct qca_mgmt_ethhdr *mgmt_ethhdr;
+       u8 len, cmd;
+
+       mgmt_ethhdr = (struct qca_mgmt_ethhdr *)skb_mac_header(skb);
+       mgmt_eth_data = &priv->mgmt_eth_data;
+
+       cmd = FIELD_GET(QCA_HDR_MGMT_CMD, mgmt_ethhdr->command);
+       len = FIELD_GET(QCA_HDR_MGMT_LENGTH, mgmt_ethhdr->command);
+
+       /* Make sure the seq match the requested packet */
+       if (mgmt_ethhdr->seq == mgmt_eth_data->seq)
+               mgmt_eth_data->ack = true;
+
+       if (cmd == MDIO_READ) {
+               mgmt_eth_data->data[0] = mgmt_ethhdr->mdio_data;
+
+               /* Get the rest of the 12 byte of data.
+                * The read/write function will extract the requested data.
+                */
+               if (len > QCA_HDR_MGMT_DATA1_LEN)
+                       memcpy(mgmt_eth_data->data + 1, skb->data,
+                              QCA_HDR_MGMT_DATA2_LEN);
+       }
+
+       complete(&mgmt_eth_data->rw_done);
+}
+
+static struct sk_buff *qca8k_alloc_mdio_header(enum mdio_cmd cmd, u32 reg, u32 *val,
+                                              int priority, unsigned int len)
+{
+       struct qca_mgmt_ethhdr *mgmt_ethhdr;
+       unsigned int real_len;
+       struct sk_buff *skb;
+       u32 *data2;
+       u16 hdr;
+
+       skb = dev_alloc_skb(QCA_HDR_MGMT_PKT_LEN);
+       if (!skb)
+               return NULL;
+
+       /* Max value for len reg is 15 (0xf) but the switch actually return 16 byte
+        * Actually for some reason the steps are:
+        * 0: nothing
+        * 1-4: first 4 byte
+        * 5-6: first 12 byte
+        * 7-15: all 16 byte
+        */
+       if (len == 16)
+               real_len = 15;
+       else
+               real_len = len;
+
+       skb_reset_mac_header(skb);
+       skb_set_network_header(skb, skb->len);
+
+       mgmt_ethhdr = skb_push(skb, QCA_HDR_MGMT_HEADER_LEN + QCA_HDR_LEN);
+
+       hdr = FIELD_PREP(QCA_HDR_XMIT_VERSION, QCA_HDR_VERSION);
+       hdr |= FIELD_PREP(QCA_HDR_XMIT_PRIORITY, priority);
+       hdr |= QCA_HDR_XMIT_FROM_CPU;
+       hdr |= FIELD_PREP(QCA_HDR_XMIT_DP_BIT, BIT(0));
+       hdr |= FIELD_PREP(QCA_HDR_XMIT_CONTROL, QCA_HDR_XMIT_TYPE_RW_REG);
+
+       mgmt_ethhdr->command = FIELD_PREP(QCA_HDR_MGMT_ADDR, reg);
+       mgmt_ethhdr->command |= FIELD_PREP(QCA_HDR_MGMT_LENGTH, real_len);
+       mgmt_ethhdr->command |= FIELD_PREP(QCA_HDR_MGMT_CMD, cmd);
+       mgmt_ethhdr->command |= FIELD_PREP(QCA_HDR_MGMT_CHECK_CODE,
+                                          QCA_HDR_MGMT_CHECK_CODE_VAL);
+
+       if (cmd == MDIO_WRITE)
+               mgmt_ethhdr->mdio_data = *val;
+
+       mgmt_ethhdr->hdr = htons(hdr);
+
+       data2 = skb_put_zero(skb, QCA_HDR_MGMT_DATA2_LEN + QCA_HDR_MGMT_PADDING_LEN);
+       if (cmd == MDIO_WRITE && len > QCA_HDR_MGMT_DATA1_LEN)
+               memcpy(data2, val + 1, len - QCA_HDR_MGMT_DATA1_LEN);
+
+       return skb;
+}
+
+static void qca8k_mdio_header_fill_seq_num(struct sk_buff *skb, u32 seq_num)
+{
+       struct qca_mgmt_ethhdr *mgmt_ethhdr;
+
+       mgmt_ethhdr = (struct qca_mgmt_ethhdr *)skb->data;
+       mgmt_ethhdr->seq = FIELD_PREP(QCA_HDR_MGMT_SEQ_NUM, seq_num);
+}
+
+static int qca8k_read_eth(struct qca8k_priv *priv, u32 reg, u32 *val, int len)
+{
+       struct qca8k_mgmt_eth_data *mgmt_eth_data = &priv->mgmt_eth_data;
+       struct sk_buff *skb;
+       bool ack;
+       int ret;
+
+       skb = qca8k_alloc_mdio_header(MDIO_READ, reg, NULL,
+                                     QCA8K_ETHERNET_MDIO_PRIORITY, len);
+       if (!skb)
+               return -ENOMEM;
+
+       mutex_lock(&mgmt_eth_data->mutex);
+
+       /* Check mgmt_master if is operational */
+       if (!priv->mgmt_master) {
+               kfree_skb(skb);
+               mutex_unlock(&mgmt_eth_data->mutex);
+               return -EINVAL;
+       }
+
+       skb->dev = priv->mgmt_master;
+
+       reinit_completion(&mgmt_eth_data->rw_done);
+
+       /* Increment seq_num and set it in the mdio pkt */
+       mgmt_eth_data->seq++;
+       qca8k_mdio_header_fill_seq_num(skb, mgmt_eth_data->seq);
+       mgmt_eth_data->ack = false;
+
+       dev_queue_xmit(skb);
+
+       ret = wait_for_completion_timeout(&mgmt_eth_data->rw_done,
+                                         msecs_to_jiffies(QCA8K_ETHERNET_TIMEOUT));
+
+       *val = mgmt_eth_data->data[0];
+       if (len > QCA_HDR_MGMT_DATA1_LEN)
+               memcpy(val + 1, mgmt_eth_data->data + 1, len - QCA_HDR_MGMT_DATA1_LEN);
+
+       ack = mgmt_eth_data->ack;
+
+       mutex_unlock(&mgmt_eth_data->mutex);
+
+       if (ret <= 0)
+               return -ETIMEDOUT;
+
+       if (!ack)
+               return -EINVAL;
+
+       return 0;
+}
+
+static int qca8k_write_eth(struct qca8k_priv *priv, u32 reg, u32 *val, int len)
+{
+       struct qca8k_mgmt_eth_data *mgmt_eth_data = &priv->mgmt_eth_data;
+       struct sk_buff *skb;
+       bool ack;
+       int ret;
+
+       skb = qca8k_alloc_mdio_header(MDIO_WRITE, reg, val,
+                                     QCA8K_ETHERNET_MDIO_PRIORITY, len);
+       if (!skb)
+               return -ENOMEM;
+
+       mutex_lock(&mgmt_eth_data->mutex);
+
+       /* Check mgmt_master if is operational */
+       if (!priv->mgmt_master) {
+               kfree_skb(skb);
+               mutex_unlock(&mgmt_eth_data->mutex);
+               return -EINVAL;
+       }
+
+       skb->dev = priv->mgmt_master;
+
+       reinit_completion(&mgmt_eth_data->rw_done);
+
+       /* Increment seq_num and set it in the mdio pkt */
+       mgmt_eth_data->seq++;
+       qca8k_mdio_header_fill_seq_num(skb, mgmt_eth_data->seq);
+       mgmt_eth_data->ack = false;
+
+       dev_queue_xmit(skb);
+
+       ret = wait_for_completion_timeout(&mgmt_eth_data->rw_done,
+                                         msecs_to_jiffies(QCA8K_ETHERNET_TIMEOUT));
+
+       ack = mgmt_eth_data->ack;
+
+       mutex_unlock(&mgmt_eth_data->mutex);
+
+       if (ret <= 0)
+               return -ETIMEDOUT;
+
+       if (!ack)
+               return -EINVAL;
+
+       return 0;
+}
+
+static int
+qca8k_regmap_update_bits_eth(struct qca8k_priv *priv, u32 reg, u32 mask, u32 write_val)
+{
+       u32 val = 0;
+       int ret;
+
+       ret = qca8k_read_eth(priv, reg, &val, sizeof(val));
+       if (ret)
+               return ret;
+
+       val &= ~mask;
+       val |= write_val;
+
+       return qca8k_write_eth(priv, reg, &val, sizeof(val));
+}
+
+static int
+qca8k_bulk_read(struct qca8k_priv *priv, u32 reg, u32 *val, int len)
+{
+       int i, count = len / sizeof(u32), ret;
+
+       if (priv->mgmt_master && !qca8k_read_eth(priv, reg, val, len))
+               return 0;
+
+       for (i = 0; i < count; i++) {
+               ret = regmap_read(priv->regmap, reg + (i * 4), val + i);
+               if (ret < 0)
+                       return ret;
+       }
+
+       return 0;
+}
+
+static int
+qca8k_bulk_write(struct qca8k_priv *priv, u32 reg, u32 *val, int len)
+{
+       int i, count = len / sizeof(u32), ret;
+       u32 tmp;
+
+       if (priv->mgmt_master && !qca8k_write_eth(priv, reg, val, len))
+               return 0;
+
+       for (i = 0; i < count; i++) {
+               tmp = val[i];
+
+               ret = regmap_write(priv->regmap, reg + (i * 4), tmp);
+               if (ret < 0)
+                       return ret;
+       }
+
+       return 0;
+}
+
 static int
 qca8k_regmap_read(void *ctx, uint32_t reg, uint32_t *val)
 {
@@ -178,11 +456,14 @@ qca8k_regmap_read(void *ctx, uint32_t reg, uint32_t *val)
        u16 r1, r2, page;
        int ret;
 
+       if (!qca8k_read_eth(priv, reg, val, sizeof(val)))
+               return 0;
+
        qca8k_split_addr(reg, &r1, &r2, &page);
 
        mutex_lock_nested(&bus->mdio_lock, MDIO_MUTEX_NESTED);
 
-       ret = qca8k_set_page(bus, page);
+       ret = qca8k_set_page(priv, page);
        if (ret < 0)
                goto exit;
 
@@ -201,15 +482,18 @@ qca8k_regmap_write(void *ctx, uint32_t reg, uint32_t val)
        u16 r1, r2, page;
        int ret;
 
+       if (!qca8k_write_eth(priv, reg, &val, sizeof(val)))
+               return 0;
+
        qca8k_split_addr(reg, &r1, &r2, &page);
 
        mutex_lock_nested(&bus->mdio_lock, MDIO_MUTEX_NESTED);
 
-       ret = qca8k_set_page(bus, page);
+       ret = qca8k_set_page(priv, page);
        if (ret < 0)
                goto exit;
 
-       qca8k_mii_write32(bus, 0x10 | r2, r1, val);
+       qca8k_mii_write32(priv, 0x10 | r2, r1, val);
 
 exit:
        mutex_unlock(&bus->mdio_lock);
@@ -225,11 +509,14 @@ qca8k_regmap_update_bits(void *ctx, uint32_t reg, uint32_t mask, uint32_t write_
        u32 val;
        int ret;
 
+       if (!qca8k_regmap_update_bits_eth(priv, reg, mask, write_val))
+               return 0;
+
        qca8k_split_addr(reg, &r1, &r2, &page);
 
        mutex_lock_nested(&bus->mdio_lock, MDIO_MUTEX_NESTED);
 
-       ret = qca8k_set_page(bus, page);
+       ret = qca8k_set_page(priv, page);
        if (ret < 0)
                goto exit;
 
@@ -239,7 +526,7 @@ qca8k_regmap_update_bits(void *ctx, uint32_t reg, uint32_t mask, uint32_t write_
 
        val &= ~mask;
        val |= write_val;
-       qca8k_mii_write32(bus, 0x10 | r2, r1, val);
+       qca8k_mii_write32(priv, 0x10 | r2, r1, val);
 
 exit:
        mutex_unlock(&bus->mdio_lock);
@@ -296,17 +583,13 @@ qca8k_busy_wait(struct qca8k_priv *priv, u32 reg, u32 mask)
 static int
 qca8k_fdb_read(struct qca8k_priv *priv, struct qca8k_fdb *fdb)
 {
-       u32 reg[4], val;
-       int i, ret;
+       u32 reg[3];
+       int ret;
 
        /* load the ARL table into an array */
-       for (i = 0; i < 4; i++) {
-               ret = qca8k_read(priv, QCA8K_REG_ATU_DATA0 + (i * 4), &val);
-               if (ret < 0)
-                       return ret;
-
-               reg[i] = val;
-       }
+       ret = qca8k_bulk_read(priv, QCA8K_REG_ATU_DATA0, reg, sizeof(reg));
+       if (ret)
+               return ret;
 
        /* vid - 83:72 */
        fdb->vid = FIELD_GET(QCA8K_ATU_VID_MASK, reg[2]);
@@ -330,7 +613,6 @@ qca8k_fdb_write(struct qca8k_priv *priv, u16 vid, u8 port_mask, const u8 *mac,
                u8 aging)
 {
        u32 reg[3] = { 0 };
-       int i;
 
        /* vid - 83:72 */
        reg[2] = FIELD_PREP(QCA8K_ATU_VID_MASK, vid);
@@ -347,8 +629,7 @@ qca8k_fdb_write(struct qca8k_priv *priv, u16 vid, u8 port_mask, const u8 *mac,
        reg[0] |= FIELD_PREP(QCA8K_ATU_ADDR5_MASK, mac[5]);
 
        /* load the array into the ARL table */
-       for (i = 0; i < 3; i++)
-               qca8k_write(priv, QCA8K_REG_ATU_DATA0 + (i * 4), reg[i]);
+       qca8k_bulk_write(priv, QCA8K_REG_ATU_DATA0, reg, sizeof(reg));
 }
 
 static int
@@ -632,7 +913,10 @@ qca8k_mib_init(struct qca8k_priv *priv)
        int ret;
 
        mutex_lock(&priv->reg_mutex);
-       ret = regmap_set_bits(priv->regmap, QCA8K_REG_MIB, QCA8K_MIB_FLUSH | QCA8K_MIB_BUSY);
+       ret = regmap_update_bits(priv->regmap, QCA8K_REG_MIB,
+                                QCA8K_MIB_FUNC | QCA8K_MIB_BUSY,
+                                FIELD_PREP(QCA8K_MIB_FUNC, QCA8K_MIB_FLUSH) |
+                                QCA8K_MIB_BUSY);
        if (ret)
                goto exit;
 
@@ -666,6 +950,199 @@ qca8k_port_set_status(struct qca8k_priv *priv, int port, int enable)
                regmap_clear_bits(priv->regmap, QCA8K_REG_PORT_STATUS(port), mask);
 }
 
+static int
+qca8k_phy_eth_busy_wait(struct qca8k_mgmt_eth_data *mgmt_eth_data,
+                       struct sk_buff *read_skb, u32 *val)
+{
+       struct sk_buff *skb = skb_copy(read_skb, GFP_KERNEL);
+       bool ack;
+       int ret;
+
+       reinit_completion(&mgmt_eth_data->rw_done);
+
+       /* Increment seq_num and set it in the copy pkt */
+       mgmt_eth_data->seq++;
+       qca8k_mdio_header_fill_seq_num(skb, mgmt_eth_data->seq);
+       mgmt_eth_data->ack = false;
+
+       dev_queue_xmit(skb);
+
+       ret = wait_for_completion_timeout(&mgmt_eth_data->rw_done,
+                                         QCA8K_ETHERNET_TIMEOUT);
+
+       ack = mgmt_eth_data->ack;
+
+       if (ret <= 0)
+               return -ETIMEDOUT;
+
+       if (!ack)
+               return -EINVAL;
+
+       *val = mgmt_eth_data->data[0];
+
+       return 0;
+}
+
+static int
+qca8k_phy_eth_command(struct qca8k_priv *priv, bool read, int phy,
+                     int regnum, u16 data)
+{
+       struct sk_buff *write_skb, *clear_skb, *read_skb;
+       struct qca8k_mgmt_eth_data *mgmt_eth_data;
+       u32 write_val, clear_val = 0, val;
+       struct net_device *mgmt_master;
+       int ret, ret1;
+       bool ack;
+
+       if (regnum >= QCA8K_MDIO_MASTER_MAX_REG)
+               return -EINVAL;
+
+       mgmt_eth_data = &priv->mgmt_eth_data;
+
+       write_val = QCA8K_MDIO_MASTER_BUSY | QCA8K_MDIO_MASTER_EN |
+                   QCA8K_MDIO_MASTER_PHY_ADDR(phy) |
+                   QCA8K_MDIO_MASTER_REG_ADDR(regnum);
+
+       if (read) {
+               write_val |= QCA8K_MDIO_MASTER_READ;
+       } else {
+               write_val |= QCA8K_MDIO_MASTER_WRITE;
+               write_val |= QCA8K_MDIO_MASTER_DATA(data);
+       }
+
+       /* Prealloc all the needed skb before the lock */
+       write_skb = qca8k_alloc_mdio_header(MDIO_WRITE, QCA8K_MDIO_MASTER_CTRL, &write_val,
+                                           QCA8K_ETHERNET_PHY_PRIORITY, sizeof(write_val));
+       if (!write_skb)
+               return -ENOMEM;
+
+       clear_skb = qca8k_alloc_mdio_header(MDIO_WRITE, QCA8K_MDIO_MASTER_CTRL, &clear_val,
+                                           QCA8K_ETHERNET_PHY_PRIORITY, sizeof(clear_val));
+       if (!write_skb) {
+               ret = -ENOMEM;
+               goto err_clear_skb;
+       }
+
+       read_skb = qca8k_alloc_mdio_header(MDIO_READ, QCA8K_MDIO_MASTER_CTRL, &clear_val,
+                                          QCA8K_ETHERNET_PHY_PRIORITY, sizeof(clear_val));
+       if (!read_skb) {
+               ret = -ENOMEM;
+               goto err_read_skb;
+       }
+
+       /* Actually start the request:
+        * 1. Send mdio master packet
+        * 2. Busy Wait for mdio master command
+        * 3. Get the data if we are reading
+        * 4. Reset the mdio master (even with error)
+        */
+       mutex_lock(&mgmt_eth_data->mutex);
+
+       /* Check if mgmt_master is operational */
+       mgmt_master = priv->mgmt_master;
+       if (!mgmt_master) {
+               mutex_unlock(&mgmt_eth_data->mutex);
+               ret = -EINVAL;
+               goto err_mgmt_master;
+       }
+
+       read_skb->dev = mgmt_master;
+       clear_skb->dev = mgmt_master;
+       write_skb->dev = mgmt_master;
+
+       reinit_completion(&mgmt_eth_data->rw_done);
+
+       /* Increment seq_num and set it in the write pkt */
+       mgmt_eth_data->seq++;
+       qca8k_mdio_header_fill_seq_num(write_skb, mgmt_eth_data->seq);
+       mgmt_eth_data->ack = false;
+
+       dev_queue_xmit(write_skb);
+
+       ret = wait_for_completion_timeout(&mgmt_eth_data->rw_done,
+                                         QCA8K_ETHERNET_TIMEOUT);
+
+       ack = mgmt_eth_data->ack;
+
+       if (ret <= 0) {
+               ret = -ETIMEDOUT;
+               kfree_skb(read_skb);
+               goto exit;
+       }
+
+       if (!ack) {
+               ret = -EINVAL;
+               kfree_skb(read_skb);
+               goto exit;
+       }
+
+       ret = read_poll_timeout(qca8k_phy_eth_busy_wait, ret1,
+                               !(val & QCA8K_MDIO_MASTER_BUSY), 0,
+                               QCA8K_BUSY_WAIT_TIMEOUT * USEC_PER_MSEC, false,
+                               mgmt_eth_data, read_skb, &val);
+
+       if (ret < 0 && ret1 < 0) {
+               ret = ret1;
+               goto exit;
+       }
+
+       if (read) {
+               reinit_completion(&mgmt_eth_data->rw_done);
+
+               /* Increment seq_num and set it in the read pkt */
+               mgmt_eth_data->seq++;
+               qca8k_mdio_header_fill_seq_num(read_skb, mgmt_eth_data->seq);
+               mgmt_eth_data->ack = false;
+
+               dev_queue_xmit(read_skb);
+
+               ret = wait_for_completion_timeout(&mgmt_eth_data->rw_done,
+                                                 QCA8K_ETHERNET_TIMEOUT);
+
+               ack = mgmt_eth_data->ack;
+
+               if (ret <= 0) {
+                       ret = -ETIMEDOUT;
+                       goto exit;
+               }
+
+               if (!ack) {
+                       ret = -EINVAL;
+                       goto exit;
+               }
+
+               ret = mgmt_eth_data->data[0] & QCA8K_MDIO_MASTER_DATA_MASK;
+       } else {
+               kfree_skb(read_skb);
+       }
+exit:
+       reinit_completion(&mgmt_eth_data->rw_done);
+
+       /* Increment seq_num and set it in the clear pkt */
+       mgmt_eth_data->seq++;
+       qca8k_mdio_header_fill_seq_num(clear_skb, mgmt_eth_data->seq);
+       mgmt_eth_data->ack = false;
+
+       dev_queue_xmit(clear_skb);
+
+       wait_for_completion_timeout(&mgmt_eth_data->rw_done,
+                                   QCA8K_ETHERNET_TIMEOUT);
+
+       mutex_unlock(&mgmt_eth_data->mutex);
+
+       return ret;
+
+       /* Error handling before lock */
+err_mgmt_master:
+       kfree_skb(read_skb);
+err_read_skb:
+       kfree_skb(clear_skb);
+err_clear_skb:
+       kfree_skb(write_skb);
+
+       return ret;
+}
+
 static u32
 qca8k_port_to_phy(int port)
 {
@@ -704,8 +1181,9 @@ qca8k_mdio_busy_wait(struct mii_bus *bus, u32 reg, u32 mask)
 }
 
 static int
-qca8k_mdio_write(struct mii_bus *bus, int phy, int regnum, u16 data)
+qca8k_mdio_write(struct qca8k_priv *priv, int phy, int regnum, u16 data)
 {
+       struct mii_bus *bus = priv->bus;
        u16 r1, r2, page;
        u32 val;
        int ret;
@@ -722,18 +1200,18 @@ qca8k_mdio_write(struct mii_bus *bus, int phy, int regnum, u16 data)
 
        mutex_lock_nested(&bus->mdio_lock, MDIO_MUTEX_NESTED);
 
-       ret = qca8k_set_page(bus, page);
+       ret = qca8k_set_page(priv, page);
        if (ret)
                goto exit;
 
-       qca8k_mii_write32(bus, 0x10 | r2, r1, val);
+       qca8k_mii_write32(priv, 0x10 | r2, r1, val);
 
        ret = qca8k_mdio_busy_wait(bus, QCA8K_MDIO_MASTER_CTRL,
                                   QCA8K_MDIO_MASTER_BUSY);
 
 exit:
        /* even if the busy_wait timeouts try to clear the MASTER_EN */
-       qca8k_mii_write32(bus, 0x10 | r2, r1, 0);
+       qca8k_mii_write32(priv, 0x10 | r2, r1, 0);
 
        mutex_unlock(&bus->mdio_lock);
 
@@ -741,8 +1219,9 @@ exit:
 }
 
 static int
-qca8k_mdio_read(struct mii_bus *bus, int phy, int regnum)
+qca8k_mdio_read(struct qca8k_priv *priv, int phy, int regnum)
 {
+       struct mii_bus *bus = priv->bus;
        u16 r1, r2, page;
        u32 val;
        int ret;
@@ -758,11 +1237,11 @@ qca8k_mdio_read(struct mii_bus *bus, int phy, int regnum)
 
        mutex_lock_nested(&bus->mdio_lock, MDIO_MUTEX_NESTED);
 
-       ret = qca8k_set_page(bus, page);
+       ret = qca8k_set_page(priv, page);
        if (ret)
                goto exit;
 
-       qca8k_mii_write32(bus, 0x10 | r2, r1, val);
+       qca8k_mii_write32(priv, 0x10 | r2, r1, val);
 
        ret = qca8k_mdio_busy_wait(bus, QCA8K_MDIO_MASTER_CTRL,
                                   QCA8K_MDIO_MASTER_BUSY);
@@ -773,7 +1252,7 @@ qca8k_mdio_read(struct mii_bus *bus, int phy, int regnum)
 
 exit:
        /* even if the busy_wait timeouts try to clear the MASTER_EN */
-       qca8k_mii_write32(bus, 0x10 | r2, r1, 0);
+       qca8k_mii_write32(priv, 0x10 | r2, r1, 0);
 
        mutex_unlock(&bus->mdio_lock);
 
@@ -787,24 +1266,35 @@ static int
 qca8k_internal_mdio_write(struct mii_bus *slave_bus, int phy, int regnum, u16 data)
 {
        struct qca8k_priv *priv = slave_bus->priv;
-       struct mii_bus *bus = priv->bus;
+       int ret;
 
-       return qca8k_mdio_write(bus, phy, regnum, data);
+       /* Use mdio Ethernet when available, fallback to legacy one on error */
+       ret = qca8k_phy_eth_command(priv, false, phy, regnum, data);
+       if (!ret)
+               return 0;
+
+       return qca8k_mdio_write(priv, phy, regnum, data);
 }
 
 static int
 qca8k_internal_mdio_read(struct mii_bus *slave_bus, int phy, int regnum)
 {
        struct qca8k_priv *priv = slave_bus->priv;
-       struct mii_bus *bus = priv->bus;
+       int ret;
 
-       return qca8k_mdio_read(bus, phy, regnum);
+       /* Use mdio Ethernet when available, fallback to legacy one on error */
+       ret = qca8k_phy_eth_command(priv, true, phy, regnum, 0);
+       if (ret >= 0)
+               return ret;
+
+       return qca8k_mdio_read(priv, phy, regnum);
 }
 
 static int
 qca8k_phy_write(struct dsa_switch *ds, int port, int regnum, u16 data)
 {
        struct qca8k_priv *priv = ds->priv;
+       int ret;
 
        /* Check if the legacy mapping should be used and the
         * port is not correctly mapped to the right PHY in the
@@ -813,7 +1303,12 @@ qca8k_phy_write(struct dsa_switch *ds, int port, int regnum, u16 data)
        if (priv->legacy_phy_port_mapping)
                port = qca8k_port_to_phy(port) % PHY_MAX_ADDR;
 
-       return qca8k_mdio_write(priv->bus, port, regnum, data);
+       /* Use mdio Ethernet when available, fallback to legacy one on error */
+       ret = qca8k_phy_eth_command(priv, false, port, regnum, 0);
+       if (!ret)
+               return ret;
+
+       return qca8k_mdio_write(priv, port, regnum, data);
 }
 
 static int
@@ -829,7 +1324,12 @@ qca8k_phy_read(struct dsa_switch *ds, int port, int regnum)
        if (priv->legacy_phy_port_mapping)
                port = qca8k_port_to_phy(port) % PHY_MAX_ADDR;
 
-       ret = qca8k_mdio_read(priv->bus, port, regnum);
+       /* Use mdio Ethernet when available, fallback to legacy one on error */
+       ret = qca8k_phy_eth_command(priv, true, port, regnum, 0);
+       if (ret >= 0)
+               return ret;
+
+       ret = qca8k_mdio_read(priv, port, regnum);
 
        if (ret < 0)
                return 0xffff;
@@ -1531,67 +2031,39 @@ qca8k_phylink_mac_config(struct dsa_switch *ds, int port, unsigned int mode,
        }
 }
 
-static void
-qca8k_phylink_validate(struct dsa_switch *ds, int port,
-                      unsigned long *supported,
-                      struct phylink_link_state *state)
+static void qca8k_phylink_get_caps(struct dsa_switch *ds, int port,
+                                  struct phylink_config *config)
 {
-       __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
-
        switch (port) {
        case 0: /* 1st CPU port */
-               if (state->interface != PHY_INTERFACE_MODE_NA &&
-                   state->interface != PHY_INTERFACE_MODE_RGMII &&
-                   state->interface != PHY_INTERFACE_MODE_RGMII_ID &&
-                   state->interface != PHY_INTERFACE_MODE_RGMII_TXID &&
-                   state->interface != PHY_INTERFACE_MODE_RGMII_RXID &&
-                   state->interface != PHY_INTERFACE_MODE_SGMII)
-                       goto unsupported;
+               phy_interface_set_rgmii(config->supported_interfaces);
+               __set_bit(PHY_INTERFACE_MODE_SGMII,
+                         config->supported_interfaces);
                break;
+
        case 1:
        case 2:
        case 3:
        case 4:
        case 5:
                /* Internal PHY */
-               if (state->interface != PHY_INTERFACE_MODE_NA &&
-                   state->interface != PHY_INTERFACE_MODE_GMII &&
-                   state->interface != PHY_INTERFACE_MODE_INTERNAL)
-                       goto unsupported;
+               __set_bit(PHY_INTERFACE_MODE_GMII,
+                         config->supported_interfaces);
+               __set_bit(PHY_INTERFACE_MODE_INTERNAL,
+                         config->supported_interfaces);
                break;
+
        case 6: /* 2nd CPU port / external PHY */
-               if (state->interface != PHY_INTERFACE_MODE_NA &&
-                   state->interface != PHY_INTERFACE_MODE_RGMII &&
-                   state->interface != PHY_INTERFACE_MODE_RGMII_ID &&
-                   state->interface != PHY_INTERFACE_MODE_RGMII_TXID &&
-                   state->interface != PHY_INTERFACE_MODE_RGMII_RXID &&
-                   state->interface != PHY_INTERFACE_MODE_SGMII &&
-                   state->interface != PHY_INTERFACE_MODE_1000BASEX)
-                       goto unsupported;
+               phy_interface_set_rgmii(config->supported_interfaces);
+               __set_bit(PHY_INTERFACE_MODE_SGMII,
+                         config->supported_interfaces);
+               __set_bit(PHY_INTERFACE_MODE_1000BASEX,
+                         config->supported_interfaces);
                break;
-       default:
-unsupported:
-               linkmode_zero(supported);
-               return;
        }
 
-       phylink_set_port_modes(mask);
-       phylink_set(mask, Autoneg);
-
-       phylink_set(mask, 1000baseT_Full);
-       phylink_set(mask, 10baseT_Half);
-       phylink_set(mask, 10baseT_Full);
-       phylink_set(mask, 100baseT_Half);
-       phylink_set(mask, 100baseT_Full);
-
-       if (state->interface == PHY_INTERFACE_MODE_1000BASEX)
-               phylink_set(mask, 1000baseX_Full);
-
-       phylink_set(mask, Pause);
-       phylink_set(mask, Asym_Pause);
-
-       linkmode_and(supported, supported, mask);
-       linkmode_and(state->advertising, state->advertising, mask);
+       config->mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
+               MAC_10 | MAC_100 | MAC_1000FD;
 }
 
 static int
@@ -1703,6 +2175,97 @@ qca8k_get_strings(struct dsa_switch *ds, int port, u32 stringset, uint8_t *data)
                        ETH_GSTRING_LEN);
 }
 
+static void qca8k_mib_autocast_handler(struct dsa_switch *ds, struct sk_buff *skb)
+{
+       const struct qca8k_match_data *match_data;
+       struct qca8k_mib_eth_data *mib_eth_data;
+       struct qca8k_priv *priv = ds->priv;
+       const struct qca8k_mib_desc *mib;
+       struct mib_ethhdr *mib_ethhdr;
+       int i, mib_len, offset = 0;
+       u64 *data;
+       u8 port;
+
+       mib_ethhdr = (struct mib_ethhdr *)skb_mac_header(skb);
+       mib_eth_data = &priv->mib_eth_data;
+
+       /* The switch autocast every port. Ignore other packet and
+        * parse only the requested one.
+        */
+       port = FIELD_GET(QCA_HDR_RECV_SOURCE_PORT, ntohs(mib_ethhdr->hdr));
+       if (port != mib_eth_data->req_port)
+               goto exit;
+
+       match_data = device_get_match_data(priv->dev);
+       data = mib_eth_data->data;
+
+       for (i = 0; i < match_data->mib_count; i++) {
+               mib = &ar8327_mib[i];
+
+               /* First 3 mib are present in the skb head */
+               if (i < 3) {
+                       data[i] = mib_ethhdr->data[i];
+                       continue;
+               }
+
+               mib_len = sizeof(uint32_t);
+
+               /* Some mib are 64 bit wide */
+               if (mib->size == 2)
+                       mib_len = sizeof(uint64_t);
+
+               /* Copy the mib value from packet to the */
+               memcpy(data + i, skb->data + offset, mib_len);
+
+               /* Set the offset for the next mib */
+               offset += mib_len;
+       }
+
+exit:
+       /* Complete on receiving all the mib packet */
+       if (refcount_dec_and_test(&mib_eth_data->port_parsed))
+               complete(&mib_eth_data->rw_done);
+}
+
+static int
+qca8k_get_ethtool_stats_eth(struct dsa_switch *ds, int port, u64 *data)
+{
+       struct dsa_port *dp = dsa_to_port(ds, port);
+       struct qca8k_mib_eth_data *mib_eth_data;
+       struct qca8k_priv *priv = ds->priv;
+       int ret;
+
+       mib_eth_data = &priv->mib_eth_data;
+
+       mutex_lock(&mib_eth_data->mutex);
+
+       reinit_completion(&mib_eth_data->rw_done);
+
+       mib_eth_data->req_port = dp->index;
+       mib_eth_data->data = data;
+       refcount_set(&mib_eth_data->port_parsed, QCA8K_NUM_PORTS);
+
+       mutex_lock(&priv->reg_mutex);
+
+       /* Send mib autocast request */
+       ret = regmap_update_bits(priv->regmap, QCA8K_REG_MIB,
+                                QCA8K_MIB_FUNC | QCA8K_MIB_BUSY,
+                                FIELD_PREP(QCA8K_MIB_FUNC, QCA8K_MIB_CAST) |
+                                QCA8K_MIB_BUSY);
+
+       mutex_unlock(&priv->reg_mutex);
+
+       if (ret)
+               goto exit;
+
+       ret = wait_for_completion_timeout(&mib_eth_data->rw_done, QCA8K_ETHERNET_TIMEOUT);
+
+exit:
+       mutex_unlock(&mib_eth_data->mutex);
+
+       return ret;
+}
+
 static void
 qca8k_get_ethtool_stats(struct dsa_switch *ds, int port,
                        uint64_t *data)
@@ -1714,6 +2277,10 @@ qca8k_get_ethtool_stats(struct dsa_switch *ds, int port,
        u32 hi = 0;
        int ret;
 
+       if (priv->mgmt_master &&
+           qca8k_get_ethtool_stats_eth(ds, port, data) > 0)
+               return;
+
        match_data = of_device_get_match_data(priv->dev);
 
        for (i = 0; i < match_data->mib_count; i++) {
@@ -2383,6 +2950,46 @@ qca8k_port_lag_leave(struct dsa_switch *ds, int port,
        return qca8k_lag_refresh_portmap(ds, port, lag, true);
 }
 
+static void
+qca8k_master_change(struct dsa_switch *ds, const struct net_device *master,
+                   bool operational)
+{
+       struct dsa_port *dp = master->dsa_ptr;
+       struct qca8k_priv *priv = ds->priv;
+
+       /* Ethernet MIB/MDIO is only supported for CPU port 0 */
+       if (dp->index != 0)
+               return;
+
+       mutex_lock(&priv->mgmt_eth_data.mutex);
+       mutex_lock(&priv->mib_eth_data.mutex);
+
+       priv->mgmt_master = operational ? (struct net_device *)master : NULL;
+
+       mutex_unlock(&priv->mib_eth_data.mutex);
+       mutex_unlock(&priv->mgmt_eth_data.mutex);
+}
+
+static int qca8k_connect_tag_protocol(struct dsa_switch *ds,
+                                     enum dsa_tag_protocol proto)
+{
+       struct qca_tagger_data *tagger_data;
+
+       switch (proto) {
+       case DSA_TAG_PROTO_QCA:
+               tagger_data = ds->tagger_data;
+
+               tagger_data->rw_reg_ack_handler = qca8k_rw_reg_ack_handler;
+               tagger_data->mib_autocast_handler = qca8k_mib_autocast_handler;
+
+               break;
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       return 0;
+}
+
 static const struct dsa_switch_ops qca8k_switch_ops = {
        .get_tag_protocol       = qca8k_get_tag_protocol,
        .setup                  = qca8k_setup,
@@ -2410,7 +3017,7 @@ static const struct dsa_switch_ops qca8k_switch_ops = {
        .port_vlan_filtering    = qca8k_port_vlan_filtering,
        .port_vlan_add          = qca8k_port_vlan_add,
        .port_vlan_del          = qca8k_port_vlan_del,
-       .phylink_validate       = qca8k_phylink_validate,
+       .phylink_get_caps       = qca8k_phylink_get_caps,
        .phylink_mac_link_state = qca8k_phylink_mac_link_state,
        .phylink_mac_config     = qca8k_phylink_mac_config,
        .phylink_mac_link_down  = qca8k_phylink_mac_link_down,
@@ -2418,6 +3025,8 @@ static const struct dsa_switch_ops qca8k_switch_ops = {
        .get_phy_flags          = qca8k_get_phy_flags,
        .port_lag_join          = qca8k_port_lag_join,
        .port_lag_leave         = qca8k_port_lag_leave,
+       .master_state_change    = qca8k_master_change,
+       .connect_tag_protocol   = qca8k_connect_tag_protocol,
 };
 
 static int qca8k_read_switch_id(struct qca8k_priv *priv)
@@ -2488,6 +3097,10 @@ qca8k_sw_probe(struct mdio_device *mdiodev)
                return PTR_ERR(priv->regmap);
        }
 
+       priv->mdio_cache.page = 0xffff;
+       priv->mdio_cache.lo = 0xffff;
+       priv->mdio_cache.hi = 0xffff;
+
        /* Check the detected switch id */
        ret = qca8k_read_switch_id(priv);
        if (ret)
@@ -2497,6 +3110,12 @@ qca8k_sw_probe(struct mdio_device *mdiodev)
        if (!priv->ds)
                return -ENOMEM;
 
+       mutex_init(&priv->mgmt_eth_data.mutex);
+       init_completion(&priv->mgmt_eth_data.rw_done);
+
+       mutex_init(&priv->mib_eth_data.mutex);
+       init_completion(&priv->mib_eth_data.rw_done);
+
        priv->ds->dev = &mdiodev->dev;
        priv->ds->num_ports = QCA8K_NUM_PORTS;
        priv->ds->priv = priv;
index ab4a417..c3d3c22 100644 (file)
 #include <linux/delay.h>
 #include <linux/regmap.h>
 #include <linux/gpio.h>
+#include <linux/dsa/tag_qca.h>
+
+#define QCA8K_ETHERNET_MDIO_PRIORITY                   7
+#define QCA8K_ETHERNET_PHY_PRIORITY                    6
+#define QCA8K_ETHERNET_TIMEOUT                         100
 
 #define QCA8K_NUM_PORTS                                        7
 #define QCA8K_NUM_CPU_PORTS                            2
@@ -63,7 +68,7 @@
 #define QCA8K_REG_MODULE_EN                            0x030
 #define   QCA8K_MODULE_EN_MIB                          BIT(0)
 #define QCA8K_REG_MIB                                  0x034
-#define   QCA8K_MIB_FLUSH                              BIT(24)
+#define   QCA8K_MIB_FUNC                               GENMASK(26, 24)
 #define   QCA8K_MIB_CPU_KEEP                           BIT(20)
 #define   QCA8K_MIB_BUSY                               BIT(17)
 #define QCA8K_MDIO_MASTER_CTRL                         0x3c
@@ -313,6 +318,12 @@ enum qca8k_vlan_cmd {
        QCA8K_VLAN_READ = 6,
 };
 
+enum qca8k_mid_cmd {
+       QCA8K_MIB_FLUSH = 1,
+       QCA8K_MIB_FLUSH_PORT = 2,
+       QCA8K_MIB_CAST = 3,
+};
+
 struct ar8xxx_port_status {
        int enabled;
 };
@@ -328,6 +339,22 @@ enum {
        QCA8K_CPU_PORT6,
 };
 
+struct qca8k_mgmt_eth_data {
+       struct completion rw_done;
+       struct mutex mutex; /* Enforce one mdio read/write at time */
+       bool ack;
+       u32 seq;
+       u32 data[4];
+};
+
+struct qca8k_mib_eth_data {
+       struct completion rw_done;
+       struct mutex mutex; /* Process one command at time */
+       refcount_t port_parsed; /* Counter to track parsed port */
+       u8 req_port;
+       u64 *data; /* pointer to ethtool data */
+};
+
 struct qca8k_ports_config {
        bool sgmii_rx_clk_falling_edge;
        bool sgmii_tx_clk_falling_edge;
@@ -336,6 +363,19 @@ struct qca8k_ports_config {
        u8 rgmii_tx_delay[QCA8K_NUM_CPU_PORTS]; /* 0: CPU port0, 1: CPU port6 */
 };
 
+struct qca8k_mdio_cache {
+/* The 32bit switch registers are accessed indirectly. To achieve this we need
+ * to set the page of the register. Track the last page that was set to reduce
+ * mdio writes
+ */
+       u16 page;
+/* lo and hi can also be cached and from Documentation we can skip one
+ * extra mdio write if lo or hi is didn't change.
+ */
+       u16 lo;
+       u16 hi;
+};
+
 struct qca8k_priv {
        u8 switch_id;
        u8 switch_revision;
@@ -353,6 +393,10 @@ struct qca8k_priv {
        struct dsa_switch_ops ops;
        struct gpio_desc *reset_gpio;
        unsigned int port_mtu[QCA8K_NUM_PORTS];
+       struct net_device *mgmt_master; /* Track if mdio/mib Ethernet is available */
+       struct qca8k_mgmt_eth_data mgmt_eth_data;
+       struct qca8k_mib_eth_data mib_eth_data;
+       struct qca8k_mdio_cache mdio_cache;
 };
 
 struct qca8k_mib_desc {
diff --git a/drivers/net/dsa/realtek-smi-core.c b/drivers/net/dsa/realtek-smi-core.c
deleted file mode 100644 (file)
index aae46ad..0000000
+++ /dev/null
@@ -1,523 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/* Realtek Simple Management Interface (SMI) driver
- * It can be discussed how "simple" this interface is.
- *
- * The SMI protocol piggy-backs the MDIO MDC and MDIO signals levels
- * but the protocol is not MDIO at all. Instead it is a Realtek
- * pecularity that need to bit-bang the lines in a special way to
- * communicate with the switch.
- *
- * ASICs we intend to support with this driver:
- *
- * RTL8366   - The original version, apparently
- * RTL8369   - Similar enough to have the same datsheet as RTL8366
- * RTL8366RB - Probably reads out "RTL8366 revision B", has a quite
- *             different register layout from the other two
- * RTL8366S  - Is this "RTL8366 super"?
- * RTL8367   - Has an OpenWRT driver as well
- * RTL8368S  - Seems to be an alternative name for RTL8366RB
- * RTL8370   - Also uses SMI
- *
- * Copyright (C) 2017 Linus Walleij <linus.walleij@linaro.org>
- * Copyright (C) 2010 Antti Seppälä <a.seppala@gmail.com>
- * Copyright (C) 2010 Roman Yeryomin <roman@advem.lv>
- * Copyright (C) 2011 Colin Leitner <colin.leitner@googlemail.com>
- * Copyright (C) 2009-2010 Gabor Juhos <juhosg@openwrt.org>
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/device.h>
-#include <linux/spinlock.h>
-#include <linux/skbuff.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
-#include <linux/of_mdio.h>
-#include <linux/delay.h>
-#include <linux/gpio/consumer.h>
-#include <linux/platform_device.h>
-#include <linux/regmap.h>
-#include <linux/bitops.h>
-#include <linux/if_bridge.h>
-
-#include "realtek-smi-core.h"
-
-#define REALTEK_SMI_ACK_RETRY_COUNT            5
-#define REALTEK_SMI_HW_STOP_DELAY              25      /* msecs */
-#define REALTEK_SMI_HW_START_DELAY             100     /* msecs */
-
-static inline void realtek_smi_clk_delay(struct realtek_smi *smi)
-{
-       ndelay(smi->clk_delay);
-}
-
-static void realtek_smi_start(struct realtek_smi *smi)
-{
-       /* Set GPIO pins to output mode, with initial state:
-        * SCK = 0, SDA = 1
-        */
-       gpiod_direction_output(smi->mdc, 0);
-       gpiod_direction_output(smi->mdio, 1);
-       realtek_smi_clk_delay(smi);
-
-       /* CLK 1: 0 -> 1, 1 -> 0 */
-       gpiod_set_value(smi->mdc, 1);
-       realtek_smi_clk_delay(smi);
-       gpiod_set_value(smi->mdc, 0);
-       realtek_smi_clk_delay(smi);
-
-       /* CLK 2: */
-       gpiod_set_value(smi->mdc, 1);
-       realtek_smi_clk_delay(smi);
-       gpiod_set_value(smi->mdio, 0);
-       realtek_smi_clk_delay(smi);
-       gpiod_set_value(smi->mdc, 0);
-       realtek_smi_clk_delay(smi);
-       gpiod_set_value(smi->mdio, 1);
-}
-
-static void realtek_smi_stop(struct realtek_smi *smi)
-{
-       realtek_smi_clk_delay(smi);
-       gpiod_set_value(smi->mdio, 0);
-       gpiod_set_value(smi->mdc, 1);
-       realtek_smi_clk_delay(smi);
-       gpiod_set_value(smi->mdio, 1);
-       realtek_smi_clk_delay(smi);
-       gpiod_set_value(smi->mdc, 1);
-       realtek_smi_clk_delay(smi);
-       gpiod_set_value(smi->mdc, 0);
-       realtek_smi_clk_delay(smi);
-       gpiod_set_value(smi->mdc, 1);
-
-       /* Add a click */
-       realtek_smi_clk_delay(smi);
-       gpiod_set_value(smi->mdc, 0);
-       realtek_smi_clk_delay(smi);
-       gpiod_set_value(smi->mdc, 1);
-
-       /* Set GPIO pins to input mode */
-       gpiod_direction_input(smi->mdio);
-       gpiod_direction_input(smi->mdc);
-}
-
-static void realtek_smi_write_bits(struct realtek_smi *smi, u32 data, u32 len)
-{
-       for (; len > 0; len--) {
-               realtek_smi_clk_delay(smi);
-
-               /* Prepare data */
-               gpiod_set_value(smi->mdio, !!(data & (1 << (len - 1))));
-               realtek_smi_clk_delay(smi);
-
-               /* Clocking */
-               gpiod_set_value(smi->mdc, 1);
-               realtek_smi_clk_delay(smi);
-               gpiod_set_value(smi->mdc, 0);
-       }
-}
-
-static void realtek_smi_read_bits(struct realtek_smi *smi, u32 len, u32 *data)
-{
-       gpiod_direction_input(smi->mdio);
-
-       for (*data = 0; len > 0; len--) {
-               u32 u;
-
-               realtek_smi_clk_delay(smi);
-
-               /* Clocking */
-               gpiod_set_value(smi->mdc, 1);
-               realtek_smi_clk_delay(smi);
-               u = !!gpiod_get_value(smi->mdio);
-               gpiod_set_value(smi->mdc, 0);
-
-               *data |= (u << (len - 1));
-       }
-
-       gpiod_direction_output(smi->mdio, 0);
-}
-
-static int realtek_smi_wait_for_ack(struct realtek_smi *smi)
-{
-       int retry_cnt;
-
-       retry_cnt = 0;
-       do {
-               u32 ack;
-
-               realtek_smi_read_bits(smi, 1, &ack);
-               if (ack == 0)
-                       break;
-
-               if (++retry_cnt > REALTEK_SMI_ACK_RETRY_COUNT) {
-                       dev_err(smi->dev, "ACK timeout\n");
-                       return -ETIMEDOUT;
-               }
-       } while (1);
-
-       return 0;
-}
-
-static int realtek_smi_write_byte(struct realtek_smi *smi, u8 data)
-{
-       realtek_smi_write_bits(smi, data, 8);
-       return realtek_smi_wait_for_ack(smi);
-}
-
-static int realtek_smi_write_byte_noack(struct realtek_smi *smi, u8 data)
-{
-       realtek_smi_write_bits(smi, data, 8);
-       return 0;
-}
-
-static int realtek_smi_read_byte0(struct realtek_smi *smi, u8 *data)
-{
-       u32 t;
-
-       /* Read data */
-       realtek_smi_read_bits(smi, 8, &t);
-       *data = (t & 0xff);
-
-       /* Send an ACK */
-       realtek_smi_write_bits(smi, 0x00, 1);
-
-       return 0;
-}
-
-static int realtek_smi_read_byte1(struct realtek_smi *smi, u8 *data)
-{
-       u32 t;
-
-       /* Read data */
-       realtek_smi_read_bits(smi, 8, &t);
-       *data = (t & 0xff);
-
-       /* Send an ACK */
-       realtek_smi_write_bits(smi, 0x01, 1);
-
-       return 0;
-}
-
-static int realtek_smi_read_reg(struct realtek_smi *smi, u32 addr, u32 *data)
-{
-       unsigned long flags;
-       u8 lo = 0;
-       u8 hi = 0;
-       int ret;
-
-       spin_lock_irqsave(&smi->lock, flags);
-
-       realtek_smi_start(smi);
-
-       /* Send READ command */
-       ret = realtek_smi_write_byte(smi, smi->cmd_read);
-       if (ret)
-               goto out;
-
-       /* Set ADDR[7:0] */
-       ret = realtek_smi_write_byte(smi, addr & 0xff);
-       if (ret)
-               goto out;
-
-       /* Set ADDR[15:8] */
-       ret = realtek_smi_write_byte(smi, addr >> 8);
-       if (ret)
-               goto out;
-
-       /* Read DATA[7:0] */
-       realtek_smi_read_byte0(smi, &lo);
-       /* Read DATA[15:8] */
-       realtek_smi_read_byte1(smi, &hi);
-
-       *data = ((u32)lo) | (((u32)hi) << 8);
-
-       ret = 0;
-
- out:
-       realtek_smi_stop(smi);
-       spin_unlock_irqrestore(&smi->lock, flags);
-
-       return ret;
-}
-
-static int realtek_smi_write_reg(struct realtek_smi *smi,
-                                u32 addr, u32 data, bool ack)
-{
-       unsigned long flags;
-       int ret;
-
-       spin_lock_irqsave(&smi->lock, flags);
-
-       realtek_smi_start(smi);
-
-       /* Send WRITE command */
-       ret = realtek_smi_write_byte(smi, smi->cmd_write);
-       if (ret)
-               goto out;
-
-       /* Set ADDR[7:0] */
-       ret = realtek_smi_write_byte(smi, addr & 0xff);
-       if (ret)
-               goto out;
-
-       /* Set ADDR[15:8] */
-       ret = realtek_smi_write_byte(smi, addr >> 8);
-       if (ret)
-               goto out;
-
-       /* Write DATA[7:0] */
-       ret = realtek_smi_write_byte(smi, data & 0xff);
-       if (ret)
-               goto out;
-
-       /* Write DATA[15:8] */
-       if (ack)
-               ret = realtek_smi_write_byte(smi, data >> 8);
-       else
-               ret = realtek_smi_write_byte_noack(smi, data >> 8);
-       if (ret)
-               goto out;
-
-       ret = 0;
-
- out:
-       realtek_smi_stop(smi);
-       spin_unlock_irqrestore(&smi->lock, flags);
-
-       return ret;
-}
-
-/* There is one single case when we need to use this accessor and that
- * is when issueing soft reset. Since the device reset as soon as we write
- * that bit, no ACK will come back for natural reasons.
- */
-int realtek_smi_write_reg_noack(struct realtek_smi *smi, u32 addr,
-                               u32 data)
-{
-       return realtek_smi_write_reg(smi, addr, data, false);
-}
-EXPORT_SYMBOL_GPL(realtek_smi_write_reg_noack);
-
-/* Regmap accessors */
-
-static int realtek_smi_write(void *ctx, u32 reg, u32 val)
-{
-       struct realtek_smi *smi = ctx;
-
-       return realtek_smi_write_reg(smi, reg, val, true);
-}
-
-static int realtek_smi_read(void *ctx, u32 reg, u32 *val)
-{
-       struct realtek_smi *smi = ctx;
-
-       return realtek_smi_read_reg(smi, reg, val);
-}
-
-static const struct regmap_config realtek_smi_mdio_regmap_config = {
-       .reg_bits = 10, /* A4..A0 R4..R0 */
-       .val_bits = 16,
-       .reg_stride = 1,
-       /* PHY regs are at 0x8000 */
-       .max_register = 0xffff,
-       .reg_format_endian = REGMAP_ENDIAN_BIG,
-       .reg_read = realtek_smi_read,
-       .reg_write = realtek_smi_write,
-       .cache_type = REGCACHE_NONE,
-};
-
-static int realtek_smi_mdio_read(struct mii_bus *bus, int addr, int regnum)
-{
-       struct realtek_smi *smi = bus->priv;
-
-       return smi->ops->phy_read(smi, addr, regnum);
-}
-
-static int realtek_smi_mdio_write(struct mii_bus *bus, int addr, int regnum,
-                                 u16 val)
-{
-       struct realtek_smi *smi = bus->priv;
-
-       return smi->ops->phy_write(smi, addr, regnum, val);
-}
-
-int realtek_smi_setup_mdio(struct realtek_smi *smi)
-{
-       struct device_node *mdio_np;
-       int ret;
-
-       mdio_np = of_get_compatible_child(smi->dev->of_node, "realtek,smi-mdio");
-       if (!mdio_np) {
-               dev_err(smi->dev, "no MDIO bus node\n");
-               return -ENODEV;
-       }
-
-       smi->slave_mii_bus = devm_mdiobus_alloc(smi->dev);
-       if (!smi->slave_mii_bus) {
-               ret = -ENOMEM;
-               goto err_put_node;
-       }
-       smi->slave_mii_bus->priv = smi;
-       smi->slave_mii_bus->name = "SMI slave MII";
-       smi->slave_mii_bus->read = realtek_smi_mdio_read;
-       smi->slave_mii_bus->write = realtek_smi_mdio_write;
-       snprintf(smi->slave_mii_bus->id, MII_BUS_ID_SIZE, "SMI-%d",
-                smi->ds->index);
-       smi->slave_mii_bus->dev.of_node = mdio_np;
-       smi->slave_mii_bus->parent = smi->dev;
-       smi->ds->slave_mii_bus = smi->slave_mii_bus;
-
-       ret = devm_of_mdiobus_register(smi->dev, smi->slave_mii_bus, mdio_np);
-       if (ret) {
-               dev_err(smi->dev, "unable to register MDIO bus %s\n",
-                       smi->slave_mii_bus->id);
-               goto err_put_node;
-       }
-
-       return 0;
-
-err_put_node:
-       of_node_put(mdio_np);
-
-       return ret;
-}
-
-static int realtek_smi_probe(struct platform_device *pdev)
-{
-       const struct realtek_smi_variant *var;
-       struct device *dev = &pdev->dev;
-       struct realtek_smi *smi;
-       struct device_node *np;
-       int ret;
-
-       var = of_device_get_match_data(dev);
-       np = dev->of_node;
-
-       smi = devm_kzalloc(dev, sizeof(*smi) + var->chip_data_sz, GFP_KERNEL);
-       if (!smi)
-               return -ENOMEM;
-       smi->chip_data = (void *)smi + sizeof(*smi);
-       smi->map = devm_regmap_init(dev, NULL, smi,
-                                   &realtek_smi_mdio_regmap_config);
-       if (IS_ERR(smi->map)) {
-               ret = PTR_ERR(smi->map);
-               dev_err(dev, "regmap init failed: %d\n", ret);
-               return ret;
-       }
-
-       /* Link forward and backward */
-       smi->dev = dev;
-       smi->clk_delay = var->clk_delay;
-       smi->cmd_read = var->cmd_read;
-       smi->cmd_write = var->cmd_write;
-       smi->ops = var->ops;
-
-       dev_set_drvdata(dev, smi);
-       spin_lock_init(&smi->lock);
-
-       /* TODO: if power is software controlled, set up any regulators here */
-
-       /* Assert then deassert RESET */
-       smi->reset = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH);
-       if (IS_ERR(smi->reset)) {
-               dev_err(dev, "failed to get RESET GPIO\n");
-               return PTR_ERR(smi->reset);
-       }
-       msleep(REALTEK_SMI_HW_STOP_DELAY);
-       gpiod_set_value(smi->reset, 0);
-       msleep(REALTEK_SMI_HW_START_DELAY);
-       dev_info(dev, "deasserted RESET\n");
-
-       /* Fetch MDIO pins */
-       smi->mdc = devm_gpiod_get_optional(dev, "mdc", GPIOD_OUT_LOW);
-       if (IS_ERR(smi->mdc))
-               return PTR_ERR(smi->mdc);
-       smi->mdio = devm_gpiod_get_optional(dev, "mdio", GPIOD_OUT_LOW);
-       if (IS_ERR(smi->mdio))
-               return PTR_ERR(smi->mdio);
-
-       smi->leds_disabled = of_property_read_bool(np, "realtek,disable-leds");
-
-       ret = smi->ops->detect(smi);
-       if (ret) {
-               dev_err(dev, "unable to detect switch\n");
-               return ret;
-       }
-
-       smi->ds = devm_kzalloc(dev, sizeof(*smi->ds), GFP_KERNEL);
-       if (!smi->ds)
-               return -ENOMEM;
-
-       smi->ds->dev = dev;
-       smi->ds->num_ports = smi->num_ports;
-       smi->ds->priv = smi;
-
-       smi->ds->ops = var->ds_ops;
-       ret = dsa_register_switch(smi->ds);
-       if (ret) {
-               dev_err_probe(dev, ret, "unable to register switch\n");
-               return ret;
-       }
-       return 0;
-}
-
-static int realtek_smi_remove(struct platform_device *pdev)
-{
-       struct realtek_smi *smi = platform_get_drvdata(pdev);
-
-       if (!smi)
-               return 0;
-
-       dsa_unregister_switch(smi->ds);
-       if (smi->slave_mii_bus)
-               of_node_put(smi->slave_mii_bus->dev.of_node);
-       gpiod_set_value(smi->reset, 1);
-
-       platform_set_drvdata(pdev, NULL);
-
-       return 0;
-}
-
-static void realtek_smi_shutdown(struct platform_device *pdev)
-{
-       struct realtek_smi *smi = platform_get_drvdata(pdev);
-
-       if (!smi)
-               return;
-
-       dsa_switch_shutdown(smi->ds);
-
-       platform_set_drvdata(pdev, NULL);
-}
-
-static const struct of_device_id realtek_smi_of_match[] = {
-       {
-               .compatible = "realtek,rtl8366rb",
-               .data = &rtl8366rb_variant,
-       },
-       {
-               /* FIXME: add support for RTL8366S and more */
-               .compatible = "realtek,rtl8366s",
-               .data = NULL,
-       },
-       {
-               .compatible = "realtek,rtl8365mb",
-               .data = &rtl8365mb_variant,
-       },
-       { /* sentinel */ },
-};
-MODULE_DEVICE_TABLE(of, realtek_smi_of_match);
-
-static struct platform_driver realtek_smi_driver = {
-       .driver = {
-               .name = "realtek-smi",
-               .of_match_table = of_match_ptr(realtek_smi_of_match),
-       },
-       .probe  = realtek_smi_probe,
-       .remove = realtek_smi_remove,
-       .shutdown = realtek_smi_shutdown,
-};
-module_platform_driver(realtek_smi_driver);
-
-MODULE_LICENSE("GPL");
diff --git a/drivers/net/dsa/realtek/Kconfig b/drivers/net/dsa/realtek/Kconfig
new file mode 100644 (file)
index 0000000..5242698
--- /dev/null
@@ -0,0 +1,44 @@
+# SPDX-License-Identifier: GPL-2.0-only
+menuconfig NET_DSA_REALTEK
+       tristate "Realtek Ethernet switch family support"
+       depends on NET_DSA
+       select FIXED_PHY
+       select IRQ_DOMAIN
+       select REALTEK_PHY
+       select REGMAP
+       help
+         Select to enable support for Realtek Ethernet switch chips.
+
+config NET_DSA_REALTEK_MDIO
+       tristate "Realtek MDIO connected switch driver"
+       depends on NET_DSA_REALTEK
+       default y
+       help
+         Select to enable support for registering switches configured
+         through MDIO.
+
+config NET_DSA_REALTEK_SMI
+       tristate "Realtek SMI connected switch driver"
+       depends on NET_DSA_REALTEK
+       default y
+       help
+         Select to enable support for registering switches connected
+         through SMI.
+
+config NET_DSA_REALTEK_RTL8365MB
+       tristate "Realtek RTL8365MB switch subdriver"
+       default y
+       depends on NET_DSA_REALTEK
+       depends on NET_DSA_REALTEK_SMI || NET_DSA_REALTEK_MDIO
+       select NET_DSA_TAG_RTL8_4
+       help
+         Select to enable support for Realtek RTL8365MB-VC and RTL8367S.
+
+config NET_DSA_REALTEK_RTL8366RB
+       tristate "Realtek RTL8366RB switch subdriver"
+       default y
+       depends on NET_DSA_REALTEK
+       depends on NET_DSA_REALTEK_SMI || NET_DSA_REALTEK_MDIO
+       select NET_DSA_TAG_RTL4_A
+       help
+         Select to enable support for Realtek RTL8366RB
diff --git a/drivers/net/dsa/realtek/Makefile b/drivers/net/dsa/realtek/Makefile
new file mode 100644 (file)
index 0000000..0aab572
--- /dev/null
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_NET_DSA_REALTEK_MDIO)     += realtek-mdio.o
+obj-$(CONFIG_NET_DSA_REALTEK_SMI)      += realtek-smi.o
+obj-$(CONFIG_NET_DSA_REALTEK_RTL8366RB) += rtl8366.o
+rtl8366-objs                           := rtl8366-core.o rtl8366rb.o
+obj-$(CONFIG_NET_DSA_REALTEK_RTL8365MB) += rtl8365mb.o
diff --git a/drivers/net/dsa/realtek/realtek-mdio.c b/drivers/net/dsa/realtek/realtek-mdio.c
new file mode 100644 (file)
index 0000000..0c5f2bd
--- /dev/null
@@ -0,0 +1,229 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Realtek MDIO interface driver
+ *
+ * ASICs we intend to support with this driver:
+ *
+ * RTL8366   - The original version, apparently
+ * RTL8369   - Similar enough to have the same datsheet as RTL8366
+ * RTL8366RB - Probably reads out "RTL8366 revision B", has a quite
+ *             different register layout from the other two
+ * RTL8366S  - Is this "RTL8366 super"?
+ * RTL8367   - Has an OpenWRT driver as well
+ * RTL8368S  - Seems to be an alternative name for RTL8366RB
+ * RTL8370   - Also uses SMI
+ *
+ * Copyright (C) 2017 Linus Walleij <linus.walleij@linaro.org>
+ * Copyright (C) 2010 Antti Seppälä <a.seppala@gmail.com>
+ * Copyright (C) 2010 Roman Yeryomin <roman@advem.lv>
+ * Copyright (C) 2011 Colin Leitner <colin.leitner@googlemail.com>
+ * Copyright (C) 2009-2010 Gabor Juhos <juhosg@openwrt.org>
+ */
+
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/regmap.h>
+
+#include "realtek.h"
+
+/* Read/write via mdiobus */
+#define REALTEK_MDIO_CTRL0_REG         31
+#define REALTEK_MDIO_START_REG         29
+#define REALTEK_MDIO_CTRL1_REG         21
+#define REALTEK_MDIO_ADDRESS_REG       23
+#define REALTEK_MDIO_DATA_WRITE_REG    24
+#define REALTEK_MDIO_DATA_READ_REG     25
+
+#define REALTEK_MDIO_START_OP          0xFFFF
+#define REALTEK_MDIO_ADDR_OP           0x000E
+#define REALTEK_MDIO_READ_OP           0x0001
+#define REALTEK_MDIO_WRITE_OP          0x0003
+
+static int realtek_mdio_write(void *ctx, u32 reg, u32 val)
+{
+       struct realtek_priv *priv = ctx;
+       struct mii_bus *bus = priv->bus;
+       int ret;
+
+       mutex_lock(&bus->mdio_lock);
+
+       ret = bus->write(bus, priv->mdio_addr, REALTEK_MDIO_CTRL0_REG, REALTEK_MDIO_ADDR_OP);
+       if (ret)
+               goto out_unlock;
+
+       ret = bus->write(bus, priv->mdio_addr, REALTEK_MDIO_ADDRESS_REG, reg);
+       if (ret)
+               goto out_unlock;
+
+       ret = bus->write(bus, priv->mdio_addr, REALTEK_MDIO_DATA_WRITE_REG, val);
+       if (ret)
+               goto out_unlock;
+
+       ret = bus->write(bus, priv->mdio_addr, REALTEK_MDIO_CTRL1_REG, REALTEK_MDIO_WRITE_OP);
+
+out_unlock:
+       mutex_unlock(&bus->mdio_lock);
+
+       return ret;
+}
+
+static int realtek_mdio_read(void *ctx, u32 reg, u32 *val)
+{
+       struct realtek_priv *priv = ctx;
+       struct mii_bus *bus = priv->bus;
+       int ret;
+
+       mutex_lock(&bus->mdio_lock);
+
+       ret = bus->write(bus, priv->mdio_addr, REALTEK_MDIO_CTRL0_REG, REALTEK_MDIO_ADDR_OP);
+       if (ret)
+               goto out_unlock;
+
+       ret = bus->write(bus, priv->mdio_addr, REALTEK_MDIO_ADDRESS_REG, reg);
+       if (ret)
+               goto out_unlock;
+
+       ret = bus->write(bus, priv->mdio_addr, REALTEK_MDIO_CTRL1_REG, REALTEK_MDIO_READ_OP);
+       if (ret)
+               goto out_unlock;
+
+       ret = bus->read(bus, priv->mdio_addr, REALTEK_MDIO_DATA_READ_REG);
+       if (ret >= 0) {
+               *val = ret;
+               ret = 0;
+       }
+
+out_unlock:
+       mutex_unlock(&bus->mdio_lock);
+
+       return ret;
+}
+
+static const struct regmap_config realtek_mdio_regmap_config = {
+       .reg_bits = 10, /* A4..A0 R4..R0 */
+       .val_bits = 16,
+       .reg_stride = 1,
+       /* PHY regs are at 0x8000 */
+       .max_register = 0xffff,
+       .reg_format_endian = REGMAP_ENDIAN_BIG,
+       .reg_read = realtek_mdio_read,
+       .reg_write = realtek_mdio_write,
+       .cache_type = REGCACHE_NONE,
+};
+
+static int realtek_mdio_probe(struct mdio_device *mdiodev)
+{
+       struct realtek_priv *priv;
+       struct device *dev = &mdiodev->dev;
+       const struct realtek_variant *var;
+       int ret;
+       struct device_node *np;
+
+       var = of_device_get_match_data(dev);
+       if (!var)
+               return -EINVAL;
+
+       priv = devm_kzalloc(&mdiodev->dev, sizeof(*priv), GFP_KERNEL);
+       if (!priv)
+               return -ENOMEM;
+
+       priv->map = devm_regmap_init(dev, NULL, priv, &realtek_mdio_regmap_config);
+       if (IS_ERR(priv->map)) {
+               ret = PTR_ERR(priv->map);
+               dev_err(dev, "regmap init failed: %d\n", ret);
+               return ret;
+       }
+
+       priv->mdio_addr = mdiodev->addr;
+       priv->bus = mdiodev->bus;
+       priv->dev = &mdiodev->dev;
+       priv->chip_data = (void *)priv + sizeof(*priv);
+
+       priv->clk_delay = var->clk_delay;
+       priv->cmd_read = var->cmd_read;
+       priv->cmd_write = var->cmd_write;
+       priv->ops = var->ops;
+
+       priv->write_reg_noack = realtek_mdio_write;
+
+       np = dev->of_node;
+
+       dev_set_drvdata(dev, priv);
+
+       /* TODO: if power is software controlled, set up any regulators here */
+       priv->leds_disabled = of_property_read_bool(np, "realtek,disable-leds");
+
+       ret = priv->ops->detect(priv);
+       if (ret) {
+               dev_err(dev, "unable to detect switch\n");
+               return ret;
+       }
+
+       priv->ds = devm_kzalloc(dev, sizeof(*priv->ds), GFP_KERNEL);
+       if (!priv->ds)
+               return -ENOMEM;
+
+       priv->ds->dev = dev;
+       priv->ds->num_ports = priv->num_ports;
+       priv->ds->priv = priv;
+       priv->ds->ops = var->ds_ops_mdio;
+
+       ret = dsa_register_switch(priv->ds);
+       if (ret) {
+               dev_err(priv->dev, "unable to register switch ret = %d\n", ret);
+               return ret;
+       }
+
+       return 0;
+}
+
+static void realtek_mdio_remove(struct mdio_device *mdiodev)
+{
+       struct realtek_priv *priv = dev_get_drvdata(&mdiodev->dev);
+
+       if (!priv)
+               return;
+
+       dsa_unregister_switch(priv->ds);
+
+       dev_set_drvdata(&mdiodev->dev, NULL);
+}
+
+static void realtek_mdio_shutdown(struct mdio_device *mdiodev)
+{
+       struct realtek_priv *priv = dev_get_drvdata(&mdiodev->dev);
+
+       if (!priv)
+               return;
+
+       dsa_switch_shutdown(priv->ds);
+
+       dev_set_drvdata(&mdiodev->dev, NULL);
+}
+
+static const struct of_device_id realtek_mdio_of_match[] = {
+#if IS_ENABLED(CONFIG_NET_DSA_REALTEK_RTL8366RB)
+       { .compatible = "realtek,rtl8366rb", .data = &rtl8366rb_variant, },
+#endif
+#if IS_ENABLED(CONFIG_NET_DSA_REALTEK_RTL8365MB)
+       { .compatible = "realtek,rtl8365mb", .data = &rtl8365mb_variant, },
+       { .compatible = "realtek,rtl8367s", .data = &rtl8365mb_variant, },
+#endif
+       { /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, realtek_mdio_of_match);
+
+static struct mdio_driver realtek_mdio_driver = {
+       .mdiodrv.driver = {
+               .name = "realtek-mdio",
+               .of_match_table = of_match_ptr(realtek_mdio_of_match),
+       },
+       .probe  = realtek_mdio_probe,
+       .remove = realtek_mdio_remove,
+       .shutdown = realtek_mdio_shutdown,
+};
+
+mdio_module_driver(realtek_mdio_driver);
+
+MODULE_AUTHOR("Luiz Angelo Daros de Luca <luizluca@gmail.com>");
+MODULE_DESCRIPTION("Driver for Realtek ethernet switch connected via MDIO interface");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/dsa/realtek/realtek-smi.c b/drivers/net/dsa/realtek/realtek-smi.c
new file mode 100644 (file)
index 0000000..946fbbd
--- /dev/null
@@ -0,0 +1,535 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Realtek Simple Management Interface (SMI) driver
+ * It can be discussed how "simple" this interface is.
+ *
+ * The SMI protocol piggy-backs the MDIO MDC and MDIO signals levels
+ * but the protocol is not MDIO at all. Instead it is a Realtek
+ * pecularity that need to bit-bang the lines in a special way to
+ * communicate with the switch.
+ *
+ * ASICs we intend to support with this driver:
+ *
+ * RTL8366   - The original version, apparently
+ * RTL8369   - Similar enough to have the same datsheet as RTL8366
+ * RTL8366RB - Probably reads out "RTL8366 revision B", has a quite
+ *             different register layout from the other two
+ * RTL8366S  - Is this "RTL8366 super"?
+ * RTL8367   - Has an OpenWRT driver as well
+ * RTL8368S  - Seems to be an alternative name for RTL8366RB
+ * RTL8370   - Also uses SMI
+ *
+ * Copyright (C) 2017 Linus Walleij <linus.walleij@linaro.org>
+ * Copyright (C) 2010 Antti Seppälä <a.seppala@gmail.com>
+ * Copyright (C) 2010 Roman Yeryomin <roman@advem.lv>
+ * Copyright (C) 2011 Colin Leitner <colin.leitner@googlemail.com>
+ * Copyright (C) 2009-2010 Gabor Juhos <juhosg@openwrt.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/spinlock.h>
+#include <linux/skbuff.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_mdio.h>
+#include <linux/delay.h>
+#include <linux/gpio/consumer.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/bitops.h>
+#include <linux/if_bridge.h>
+
+#include "realtek.h"
+
+#define REALTEK_SMI_ACK_RETRY_COUNT            5
+#define REALTEK_SMI_HW_STOP_DELAY              25      /* msecs */
+#define REALTEK_SMI_HW_START_DELAY             100     /* msecs */
+
+static inline void realtek_smi_clk_delay(struct realtek_priv *priv)
+{
+       ndelay(priv->clk_delay);
+}
+
+static void realtek_smi_start(struct realtek_priv *priv)
+{
+       /* Set GPIO pins to output mode, with initial state:
+        * SCK = 0, SDA = 1
+        */
+       gpiod_direction_output(priv->mdc, 0);
+       gpiod_direction_output(priv->mdio, 1);
+       realtek_smi_clk_delay(priv);
+
+       /* CLK 1: 0 -> 1, 1 -> 0 */
+       gpiod_set_value(priv->mdc, 1);
+       realtek_smi_clk_delay(priv);
+       gpiod_set_value(priv->mdc, 0);
+       realtek_smi_clk_delay(priv);
+
+       /* CLK 2: */
+       gpiod_set_value(priv->mdc, 1);
+       realtek_smi_clk_delay(priv);
+       gpiod_set_value(priv->mdio, 0);
+       realtek_smi_clk_delay(priv);
+       gpiod_set_value(priv->mdc, 0);
+       realtek_smi_clk_delay(priv);
+       gpiod_set_value(priv->mdio, 1);
+}
+
+static void realtek_smi_stop(struct realtek_priv *priv)
+{
+       realtek_smi_clk_delay(priv);
+       gpiod_set_value(priv->mdio, 0);
+       gpiod_set_value(priv->mdc, 1);
+       realtek_smi_clk_delay(priv);
+       gpiod_set_value(priv->mdio, 1);
+       realtek_smi_clk_delay(priv);
+       gpiod_set_value(priv->mdc, 1);
+       realtek_smi_clk_delay(priv);
+       gpiod_set_value(priv->mdc, 0);
+       realtek_smi_clk_delay(priv);
+       gpiod_set_value(priv->mdc, 1);
+
+       /* Add a click */
+       realtek_smi_clk_delay(priv);
+       gpiod_set_value(priv->mdc, 0);
+       realtek_smi_clk_delay(priv);
+       gpiod_set_value(priv->mdc, 1);
+
+       /* Set GPIO pins to input mode */
+       gpiod_direction_input(priv->mdio);
+       gpiod_direction_input(priv->mdc);
+}
+
+static void realtek_smi_write_bits(struct realtek_priv *priv, u32 data, u32 len)
+{
+       for (; len > 0; len--) {
+               realtek_smi_clk_delay(priv);
+
+               /* Prepare data */
+               gpiod_set_value(priv->mdio, !!(data & (1 << (len - 1))));
+               realtek_smi_clk_delay(priv);
+
+               /* Clocking */
+               gpiod_set_value(priv->mdc, 1);
+               realtek_smi_clk_delay(priv);
+               gpiod_set_value(priv->mdc, 0);
+       }
+}
+
+static void realtek_smi_read_bits(struct realtek_priv *priv, u32 len, u32 *data)
+{
+       gpiod_direction_input(priv->mdio);
+
+       for (*data = 0; len > 0; len--) {
+               u32 u;
+
+               realtek_smi_clk_delay(priv);
+
+               /* Clocking */
+               gpiod_set_value(priv->mdc, 1);
+               realtek_smi_clk_delay(priv);
+               u = !!gpiod_get_value(priv->mdio);
+               gpiod_set_value(priv->mdc, 0);
+
+               *data |= (u << (len - 1));
+       }
+
+       gpiod_direction_output(priv->mdio, 0);
+}
+
+static int realtek_smi_wait_for_ack(struct realtek_priv *priv)
+{
+       int retry_cnt;
+
+       retry_cnt = 0;
+       do {
+               u32 ack;
+
+               realtek_smi_read_bits(priv, 1, &ack);
+               if (ack == 0)
+                       break;
+
+               if (++retry_cnt > REALTEK_SMI_ACK_RETRY_COUNT) {
+                       dev_err(priv->dev, "ACK timeout\n");
+                       return -ETIMEDOUT;
+               }
+       } while (1);
+
+       return 0;
+}
+
+static int realtek_smi_write_byte(struct realtek_priv *priv, u8 data)
+{
+       realtek_smi_write_bits(priv, data, 8);
+       return realtek_smi_wait_for_ack(priv);
+}
+
+static int realtek_smi_write_byte_noack(struct realtek_priv *priv, u8 data)
+{
+       realtek_smi_write_bits(priv, data, 8);
+       return 0;
+}
+
+static int realtek_smi_read_byte0(struct realtek_priv *priv, u8 *data)
+{
+       u32 t;
+
+       /* Read data */
+       realtek_smi_read_bits(priv, 8, &t);
+       *data = (t & 0xff);
+
+       /* Send an ACK */
+       realtek_smi_write_bits(priv, 0x00, 1);
+
+       return 0;
+}
+
+static int realtek_smi_read_byte1(struct realtek_priv *priv, u8 *data)
+{
+       u32 t;
+
+       /* Read data */
+       realtek_smi_read_bits(priv, 8, &t);
+       *data = (t & 0xff);
+
+       /* Send an ACK */
+       realtek_smi_write_bits(priv, 0x01, 1);
+
+       return 0;
+}
+
+static int realtek_smi_read_reg(struct realtek_priv *priv, u32 addr, u32 *data)
+{
+       unsigned long flags;
+       u8 lo = 0;
+       u8 hi = 0;
+       int ret;
+
+       spin_lock_irqsave(&priv->lock, flags);
+
+       realtek_smi_start(priv);
+
+       /* Send READ command */
+       ret = realtek_smi_write_byte(priv, priv->cmd_read);
+       if (ret)
+               goto out;
+
+       /* Set ADDR[7:0] */
+       ret = realtek_smi_write_byte(priv, addr & 0xff);
+       if (ret)
+               goto out;
+
+       /* Set ADDR[15:8] */
+       ret = realtek_smi_write_byte(priv, addr >> 8);
+       if (ret)
+               goto out;
+
+       /* Read DATA[7:0] */
+       realtek_smi_read_byte0(priv, &lo);
+       /* Read DATA[15:8] */
+       realtek_smi_read_byte1(priv, &hi);
+
+       *data = ((u32)lo) | (((u32)hi) << 8);
+
+       ret = 0;
+
+ out:
+       realtek_smi_stop(priv);
+       spin_unlock_irqrestore(&priv->lock, flags);
+
+       return ret;
+}
+
+static int realtek_smi_write_reg(struct realtek_priv *priv,
+                                u32 addr, u32 data, bool ack)
+{
+       unsigned long flags;
+       int ret;
+
+       spin_lock_irqsave(&priv->lock, flags);
+
+       realtek_smi_start(priv);
+
+       /* Send WRITE command */
+       ret = realtek_smi_write_byte(priv, priv->cmd_write);
+       if (ret)
+               goto out;
+
+       /* Set ADDR[7:0] */
+       ret = realtek_smi_write_byte(priv, addr & 0xff);
+       if (ret)
+               goto out;
+
+       /* Set ADDR[15:8] */
+       ret = realtek_smi_write_byte(priv, addr >> 8);
+       if (ret)
+               goto out;
+
+       /* Write DATA[7:0] */
+       ret = realtek_smi_write_byte(priv, data & 0xff);
+       if (ret)
+               goto out;
+
+       /* Write DATA[15:8] */
+       if (ack)
+               ret = realtek_smi_write_byte(priv, data >> 8);
+       else
+               ret = realtek_smi_write_byte_noack(priv, data >> 8);
+       if (ret)
+               goto out;
+
+       ret = 0;
+
+ out:
+       realtek_smi_stop(priv);
+       spin_unlock_irqrestore(&priv->lock, flags);
+
+       return ret;
+}
+
+/* There is one single case when we need to use this accessor and that
+ * is when issueing soft reset. Since the device reset as soon as we write
+ * that bit, no ACK will come back for natural reasons.
+ */
+static int realtek_smi_write_reg_noack(void *ctx, u32 reg, u32 val)
+{
+       return realtek_smi_write_reg(ctx, reg, val, false);
+}
+
+/* Regmap accessors */
+
+static int realtek_smi_write(void *ctx, u32 reg, u32 val)
+{
+       struct realtek_priv *priv = ctx;
+
+       return realtek_smi_write_reg(priv, reg, val, true);
+}
+
+static int realtek_smi_read(void *ctx, u32 reg, u32 *val)
+{
+       struct realtek_priv *priv = ctx;
+
+       return realtek_smi_read_reg(priv, reg, val);
+}
+
+static const struct regmap_config realtek_smi_mdio_regmap_config = {
+       .reg_bits = 10, /* A4..A0 R4..R0 */
+       .val_bits = 16,
+       .reg_stride = 1,
+       /* PHY regs are at 0x8000 */
+       .max_register = 0xffff,
+       .reg_format_endian = REGMAP_ENDIAN_BIG,
+       .reg_read = realtek_smi_read,
+       .reg_write = realtek_smi_write,
+       .cache_type = REGCACHE_NONE,
+};
+
+static int realtek_smi_mdio_read(struct mii_bus *bus, int addr, int regnum)
+{
+       struct realtek_priv *priv = bus->priv;
+
+       return priv->ops->phy_read(priv, addr, regnum);
+}
+
+static int realtek_smi_mdio_write(struct mii_bus *bus, int addr, int regnum,
+                                 u16 val)
+{
+       struct realtek_priv *priv = bus->priv;
+
+       return priv->ops->phy_write(priv, addr, regnum, val);
+}
+
+static int realtek_smi_setup_mdio(struct dsa_switch *ds)
+{
+       struct realtek_priv *priv =  ds->priv;
+       struct device_node *mdio_np;
+       int ret;
+
+       mdio_np = of_get_compatible_child(priv->dev->of_node, "realtek,smi-mdio");
+       if (!mdio_np) {
+               dev_err(priv->dev, "no MDIO bus node\n");
+               return -ENODEV;
+       }
+
+       priv->slave_mii_bus = devm_mdiobus_alloc(priv->dev);
+       if (!priv->slave_mii_bus) {
+               ret = -ENOMEM;
+               goto err_put_node;
+       }
+       priv->slave_mii_bus->priv = priv;
+       priv->slave_mii_bus->name = "SMI slave MII";
+       priv->slave_mii_bus->read = realtek_smi_mdio_read;
+       priv->slave_mii_bus->write = realtek_smi_mdio_write;
+       snprintf(priv->slave_mii_bus->id, MII_BUS_ID_SIZE, "SMI-%d",
+                ds->index);
+       priv->slave_mii_bus->dev.of_node = mdio_np;
+       priv->slave_mii_bus->parent = priv->dev;
+       ds->slave_mii_bus = priv->slave_mii_bus;
+
+       ret = devm_of_mdiobus_register(priv->dev, priv->slave_mii_bus, mdio_np);
+       if (ret) {
+               dev_err(priv->dev, "unable to register MDIO bus %s\n",
+                       priv->slave_mii_bus->id);
+               goto err_put_node;
+       }
+
+       return 0;
+
+err_put_node:
+       of_node_put(mdio_np);
+
+       return ret;
+}
+
+static int realtek_smi_probe(struct platform_device *pdev)
+{
+       const struct realtek_variant *var;
+       struct device *dev = &pdev->dev;
+       struct realtek_priv *priv;
+       struct device_node *np;
+       int ret;
+
+       var = of_device_get_match_data(dev);
+       np = dev->of_node;
+
+       priv = devm_kzalloc(dev, sizeof(*priv) + var->chip_data_sz, GFP_KERNEL);
+       if (!priv)
+               return -ENOMEM;
+       priv->chip_data = (void *)priv + sizeof(*priv);
+       priv->map = devm_regmap_init(dev, NULL, priv,
+                                    &realtek_smi_mdio_regmap_config);
+       if (IS_ERR(priv->map)) {
+               ret = PTR_ERR(priv->map);
+               dev_err(dev, "regmap init failed: %d\n", ret);
+               return ret;
+       }
+
+       /* Link forward and backward */
+       priv->dev = dev;
+       priv->clk_delay = var->clk_delay;
+       priv->cmd_read = var->cmd_read;
+       priv->cmd_write = var->cmd_write;
+       priv->ops = var->ops;
+
+       priv->setup_interface = realtek_smi_setup_mdio;
+       priv->write_reg_noack = realtek_smi_write_reg_noack;
+
+       dev_set_drvdata(dev, priv);
+       spin_lock_init(&priv->lock);
+
+       /* TODO: if power is software controlled, set up any regulators here */
+
+       /* Assert then deassert RESET */
+       priv->reset = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH);
+       if (IS_ERR(priv->reset)) {
+               dev_err(dev, "failed to get RESET GPIO\n");
+               return PTR_ERR(priv->reset);
+       }
+       msleep(REALTEK_SMI_HW_STOP_DELAY);
+       gpiod_set_value(priv->reset, 0);
+       msleep(REALTEK_SMI_HW_START_DELAY);
+       dev_info(dev, "deasserted RESET\n");
+
+       /* Fetch MDIO pins */
+       priv->mdc = devm_gpiod_get_optional(dev, "mdc", GPIOD_OUT_LOW);
+       if (IS_ERR(priv->mdc))
+               return PTR_ERR(priv->mdc);
+       priv->mdio = devm_gpiod_get_optional(dev, "mdio", GPIOD_OUT_LOW);
+       if (IS_ERR(priv->mdio))
+               return PTR_ERR(priv->mdio);
+
+       priv->leds_disabled = of_property_read_bool(np, "realtek,disable-leds");
+
+       ret = priv->ops->detect(priv);
+       if (ret) {
+               dev_err(dev, "unable to detect switch\n");
+               return ret;
+       }
+
+       priv->ds = devm_kzalloc(dev, sizeof(*priv->ds), GFP_KERNEL);
+       if (!priv->ds)
+               return -ENOMEM;
+
+       priv->ds->dev = dev;
+       priv->ds->num_ports = priv->num_ports;
+       priv->ds->priv = priv;
+
+       priv->ds->ops = var->ds_ops_smi;
+       ret = dsa_register_switch(priv->ds);
+       if (ret) {
+               dev_err_probe(dev, ret, "unable to register switch\n");
+               return ret;
+       }
+       return 0;
+}
+
+static int realtek_smi_remove(struct platform_device *pdev)
+{
+       struct realtek_priv *priv = platform_get_drvdata(pdev);
+
+       if (!priv)
+               return 0;
+
+       dsa_unregister_switch(priv->ds);
+       if (priv->slave_mii_bus)
+               of_node_put(priv->slave_mii_bus->dev.of_node);
+       gpiod_set_value(priv->reset, 1);
+
+       platform_set_drvdata(pdev, NULL);
+
+       return 0;
+}
+
+static void realtek_smi_shutdown(struct platform_device *pdev)
+{
+       struct realtek_priv *priv = platform_get_drvdata(pdev);
+
+       if (!priv)
+               return;
+
+       dsa_switch_shutdown(priv->ds);
+
+       platform_set_drvdata(pdev, NULL);
+}
+
+static const struct of_device_id realtek_smi_of_match[] = {
+#if IS_ENABLED(CONFIG_NET_DSA_REALTEK_RTL8366RB)
+       {
+               .compatible = "realtek,rtl8366rb",
+               .data = &rtl8366rb_variant,
+       },
+#endif
+       {
+               /* FIXME: add support for RTL8366S and more */
+               .compatible = "realtek,rtl8366s",
+               .data = NULL,
+       },
+#if IS_ENABLED(CONFIG_NET_DSA_REALTEK_RTL8365MB)
+       {
+               .compatible = "realtek,rtl8365mb",
+               .data = &rtl8365mb_variant,
+       },
+       {
+               .compatible = "realtek,rtl8367s",
+               .data = &rtl8365mb_variant,
+       },
+#endif
+       { /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, realtek_smi_of_match);
+
+static struct platform_driver realtek_smi_driver = {
+       .driver = {
+               .name = "realtek-smi",
+               .of_match_table = of_match_ptr(realtek_smi_of_match),
+       },
+       .probe  = realtek_smi_probe,
+       .remove = realtek_smi_remove,
+       .shutdown = realtek_smi_shutdown,
+};
+module_platform_driver(realtek_smi_driver);
+
+MODULE_AUTHOR("Linus Walleij <linus.walleij@linaro.org>");
+MODULE_DESCRIPTION("Driver for Realtek ethernet switch connected via SMI interface");
+MODULE_LICENSE("GPL");
similarity index 55%
rename from drivers/net/dsa/realtek-smi-core.h
rename to drivers/net/dsa/realtek/realtek.h
index 5bfa53e..ed5abf6 100644 (file)
@@ -13,7 +13,7 @@
 #include <linux/gpio/consumer.h>
 #include <net/dsa.h>
 
-struct realtek_smi_ops;
+struct realtek_ops;
 struct dentry;
 struct inode;
 struct file;
@@ -25,7 +25,7 @@ struct rtl8366_mib_counter {
        const char      *name;
 };
 
-/**
+/*
  * struct rtl8366_vlan_mc - Virtual LAN member configuration
  */
 struct rtl8366_vlan_mc {
@@ -43,13 +43,15 @@ struct rtl8366_vlan_4k {
        u8      fid;
 };
 
-struct realtek_smi {
+struct realtek_priv {
        struct device           *dev;
        struct gpio_desc        *reset;
        struct gpio_desc        *mdc;
        struct gpio_desc        *mdio;
        struct regmap           *map;
        struct mii_bus          *slave_mii_bus;
+       struct mii_bus          *bus;
+       int                     mdio_addr;
 
        unsigned int            clk_delay;
        u8                      cmd_read;
@@ -65,7 +67,9 @@ struct realtek_smi {
        unsigned int            num_mib_counters;
        struct rtl8366_mib_counter *mib_counters;
 
-       const struct realtek_smi_ops *ops;
+       const struct realtek_ops *ops;
+       int                     (*setup_interface)(struct dsa_switch *ds);
+       int                     (*write_reg_noack)(void *ctx, u32 addr, u32 data);
 
        int                     vlan_enabled;
        int                     vlan4k_enabled;
@@ -74,61 +78,57 @@ struct realtek_smi {
        void                    *chip_data; /* Per-chip extra variant data */
 };
 
-/**
- * struct realtek_smi_ops - vtable for the per-SMI-chiptype operations
+/*
+ * struct realtek_ops - vtable for the per-SMI-chiptype operations
  * @detect: detects the chiptype
  */
-struct realtek_smi_ops {
-       int     (*detect)(struct realtek_smi *smi);
-       int     (*reset_chip)(struct realtek_smi *smi);
-       int     (*setup)(struct realtek_smi *smi);
-       void    (*cleanup)(struct realtek_smi *smi);
-       int     (*get_mib_counter)(struct realtek_smi *smi,
+struct realtek_ops {
+       int     (*detect)(struct realtek_priv *priv);
+       int     (*reset_chip)(struct realtek_priv *priv);
+       int     (*setup)(struct realtek_priv *priv);
+       void    (*cleanup)(struct realtek_priv *priv);
+       int     (*get_mib_counter)(struct realtek_priv *priv,
                                   int port,
                                   struct rtl8366_mib_counter *mib,
                                   u64 *mibvalue);
-       int     (*get_vlan_mc)(struct realtek_smi *smi, u32 index,
+       int     (*get_vlan_mc)(struct realtek_priv *priv, u32 index,
                               struct rtl8366_vlan_mc *vlanmc);
-       int     (*set_vlan_mc)(struct realtek_smi *smi, u32 index,
+       int     (*set_vlan_mc)(struct realtek_priv *priv, u32 index,
                               const struct rtl8366_vlan_mc *vlanmc);
-       int     (*get_vlan_4k)(struct realtek_smi *smi, u32 vid,
+       int     (*get_vlan_4k)(struct realtek_priv *priv, u32 vid,
                               struct rtl8366_vlan_4k *vlan4k);
-       int     (*set_vlan_4k)(struct realtek_smi *smi,
+       int     (*set_vlan_4k)(struct realtek_priv *priv,
                               const struct rtl8366_vlan_4k *vlan4k);
-       int     (*get_mc_index)(struct realtek_smi *smi, int port, int *val);
-       int     (*set_mc_index)(struct realtek_smi *smi, int port, int index);
-       bool    (*is_vlan_valid)(struct realtek_smi *smi, unsigned int vlan);
-       int     (*enable_vlan)(struct realtek_smi *smi, bool enable);
-       int     (*enable_vlan4k)(struct realtek_smi *smi, bool enable);
-       int     (*enable_port)(struct realtek_smi *smi, int port, bool enable);
-       int     (*phy_read)(struct realtek_smi *smi, int phy, int regnum);
-       int     (*phy_write)(struct realtek_smi *smi, int phy, int regnum,
+       int     (*get_mc_index)(struct realtek_priv *priv, int port, int *val);
+       int     (*set_mc_index)(struct realtek_priv *priv, int port, int index);
+       bool    (*is_vlan_valid)(struct realtek_priv *priv, unsigned int vlan);
+       int     (*enable_vlan)(struct realtek_priv *priv, bool enable);
+       int     (*enable_vlan4k)(struct realtek_priv *priv, bool enable);
+       int     (*enable_port)(struct realtek_priv *priv, int port, bool enable);
+       int     (*phy_read)(struct realtek_priv *priv, int phy, int regnum);
+       int     (*phy_write)(struct realtek_priv *priv, int phy, int regnum,
                             u16 val);
 };
 
-struct realtek_smi_variant {
-       const struct dsa_switch_ops *ds_ops;
-       const struct realtek_smi_ops *ops;
+struct realtek_variant {
+       const struct dsa_switch_ops *ds_ops_smi;
+       const struct dsa_switch_ops *ds_ops_mdio;
+       const struct realtek_ops *ops;
        unsigned int clk_delay;
        u8 cmd_read;
        u8 cmd_write;
        size_t chip_data_sz;
 };
 
-/* SMI core calls */
-int realtek_smi_write_reg_noack(struct realtek_smi *smi, u32 addr,
-                               u32 data);
-int realtek_smi_setup_mdio(struct realtek_smi *smi);
-
 /* RTL8366 library helpers */
-int rtl8366_mc_is_used(struct realtek_smi *smi, int mc_index, int *used);
-int rtl8366_set_vlan(struct realtek_smi *smi, int vid, u32 member,
+int rtl8366_mc_is_used(struct realtek_priv *priv, int mc_index, int *used);
+int rtl8366_set_vlan(struct realtek_priv *priv, int vid, u32 member,
                     u32 untag, u32 fid);
-int rtl8366_set_pvid(struct realtek_smi *smi, unsigned int port,
+int rtl8366_set_pvid(struct realtek_priv *priv, unsigned int port,
                     unsigned int vid);
-int rtl8366_enable_vlan4k(struct realtek_smi *smi, bool enable);
-int rtl8366_enable_vlan(struct realtek_smi *smi, bool enable);
-int rtl8366_reset_vlan(struct realtek_smi *smi);
+int rtl8366_enable_vlan4k(struct realtek_priv *priv, bool enable);
+int rtl8366_enable_vlan(struct realtek_priv *priv, bool enable);
+int rtl8366_reset_vlan(struct realtek_priv *priv);
 int rtl8366_vlan_add(struct dsa_switch *ds, int port,
                     const struct switchdev_obj_port_vlan *vlan,
                     struct netlink_ext_ack *extack);
@@ -139,7 +139,7 @@ void rtl8366_get_strings(struct dsa_switch *ds, int port, u32 stringset,
 int rtl8366_get_sset_count(struct dsa_switch *ds, int port, int sset);
 void rtl8366_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *data);
 
-extern const struct realtek_smi_variant rtl8366rb_variant;
-extern const struct realtek_smi_variant rtl8365mb_variant;
+extern const struct realtek_variant rtl8366rb_variant;
+extern const struct realtek_variant rtl8365mb_variant;
 
 #endif /*  _REALTEK_SMI_H */
similarity index 75%
rename from drivers/net/dsa/rtl8365mb.c
rename to drivers/net/dsa/realtek/rtl8365mb.c
index 3b72954..e1c5a67 100644 (file)
 #include <linux/regmap.h>
 #include <linux/if_bridge.h>
 
-#include "realtek-smi-core.h"
+#include "realtek.h"
 
 /* Chip-specific data and limits */
-#define RTL8365MB_CHIP_ID_8365MB_VC            0x6367
-#define RTL8365MB_CPU_PORT_NUM_8365MB_VC       6
-#define RTL8365MB_LEARN_LIMIT_MAX_8365MB_VC    2112
+#define RTL8365MB_CHIP_ID_8365MB_VC    0x6367
+#define RTL8365MB_CHIP_VER_8365MB_VC   0x0040
+
+#define RTL8365MB_CHIP_ID_8367S                0x6367
+#define RTL8365MB_CHIP_VER_8367S       0x00A0
+
+#define RTL8365MB_CHIP_ID_8367RB       0x6367
+#define RTL8365MB_CHIP_VER_8367RB      0x0020
 
 /* Family-specific data and limits */
-#define RTL8365MB_PHYADDRMAX   7
-#define RTL8365MB_NUM_PHYREGS  32
-#define RTL8365MB_PHYREGMAX    (RTL8365MB_NUM_PHYREGS - 1)
-#define RTL8365MB_MAX_NUM_PORTS        (RTL8365MB_CPU_PORT_NUM_8365MB_VC + 1)
+#define RTL8365MB_PHYADDRMAX           7
+#define RTL8365MB_NUM_PHYREGS          32
+#define RTL8365MB_PHYREGMAX            (RTL8365MB_NUM_PHYREGS - 1)
+/* RTL8370MB and RTL8310SR, possibly suportable by this driver, have 10 ports */
+#define RTL8365MB_MAX_NUM_PORTS                10
+#define RTL8365MB_LEARN_LIMIT_MAX      2112
+
+/* valid for all 6-port or less variants */
+static const int rtl8365mb_extint_port_map[]  = { -1, -1, -1, -1, -1, -1, 1, 2, -1, -1};
 
 /* Chip identification registers */
 #define RTL8365MB_CHIP_ID_REG          0x1300
 /* The PHY OCP addresses of PHY registers 0~31 start here */
 #define RTL8365MB_PHY_OCP_ADDR_PHYREG_BASE             0xA400
 
-/* EXT port interface mode values - used in DIGITAL_INTERFACE_SELECT */
+/* EXT interface port mode values - used in DIGITAL_INTERFACE_SELECT */
 #define RTL8365MB_EXT_PORT_MODE_DISABLE                0
 #define RTL8365MB_EXT_PORT_MODE_RGMII          1
 #define RTL8365MB_EXT_PORT_MODE_MII_MAC                2
 #define RTL8365MB_EXT_PORT_MODE_1000X          12
 #define RTL8365MB_EXT_PORT_MODE_100FX          13
 
-/* EXT port interface mode configuration registers 0~1 */
-#define RTL8365MB_DIGITAL_INTERFACE_SELECT_REG0                0x1305
-#define RTL8365MB_DIGITAL_INTERFACE_SELECT_REG1                0x13C3
-#define RTL8365MB_DIGITAL_INTERFACE_SELECT_REG(_extport)   \
-               (RTL8365MB_DIGITAL_INTERFACE_SELECT_REG0 + \
-                ((_extport) >> 1) * (0x13C3 - 0x1305))
-#define   RTL8365MB_DIGITAL_INTERFACE_SELECT_MODE_MASK(_extport) \
-               (0xF << (((_extport) % 2)))
-#define   RTL8365MB_DIGITAL_INTERFACE_SELECT_MODE_OFFSET(_extport) \
-               (((_extport) % 2) * 4)
-
-/* EXT port RGMII TX/RX delay configuration registers 1~2 */
-#define RTL8365MB_EXT_RGMXF_REG1               0x1307
-#define RTL8365MB_EXT_RGMXF_REG2               0x13C5
-#define RTL8365MB_EXT_RGMXF_REG(_extport)   \
-               (RTL8365MB_EXT_RGMXF_REG1 + \
-                (((_extport) >> 1) * (0x13C5 - 0x1307)))
+/* Realtek docs and driver uses logic number as EXT_PORT0=16, EXT_PORT1=17,
+ * EXT_PORT2=18, to interact with switch ports. That logic number is internally
+ * converted to either a physical port number (0..9) or an external interface id (0..2),
+ * depending on which function was called. The external interface id is calculated as
+ * (ext_id=logic_port-15), while the logical to physical map depends on the chip id/version.
+ *
+ * EXT_PORT0 mentioned in datasheets and rtl8367c driver is used in this driver
+ * as extid==1, EXT_PORT2, mentioned in Realtek rtl8367c driver for 10-port switches,
+ * would have an ext_id of 3 (out of range for most extint macros) and ext_id 0 does
+ * not seem to be used as well for this family.
+ */
+
+/* EXT interface mode configuration registers 0~1 */
+#define RTL8365MB_DIGITAL_INTERFACE_SELECT_REG0                0x1305 /* EXT1 */
+#define RTL8365MB_DIGITAL_INTERFACE_SELECT_REG1                0x13C3 /* EXT2 */
+#define RTL8365MB_DIGITAL_INTERFACE_SELECT_REG(_extint) \
+               ((_extint) == 1 ? RTL8365MB_DIGITAL_INTERFACE_SELECT_REG0 : \
+                (_extint) == 2 ? RTL8365MB_DIGITAL_INTERFACE_SELECT_REG1 : \
+                0x0)
+#define   RTL8365MB_DIGITAL_INTERFACE_SELECT_MODE_MASK(_extint) \
+               (0xF << (((_extint) % 2)))
+#define   RTL8365MB_DIGITAL_INTERFACE_SELECT_MODE_OFFSET(_extint) \
+               (((_extint) % 2) * 4)
+
+/* EXT interface RGMII TX/RX delay configuration registers 0~2 */
+#define RTL8365MB_EXT_RGMXF_REG0               0x1306 /* EXT0 */
+#define RTL8365MB_EXT_RGMXF_REG1               0x1307 /* EXT1 */
+#define RTL8365MB_EXT_RGMXF_REG2               0x13C5 /* EXT2 */
+#define RTL8365MB_EXT_RGMXF_REG(_extint) \
+               ((_extint) == 0 ? RTL8365MB_EXT_RGMXF_REG0 : \
+                (_extint) == 1 ? RTL8365MB_EXT_RGMXF_REG1 : \
+                (_extint) == 2 ? RTL8365MB_EXT_RGMXF_REG2 : \
+                0x0)
 #define   RTL8365MB_EXT_RGMXF_RXDELAY_MASK     0x0007
 #define   RTL8365MB_EXT_RGMXF_TXDELAY_MASK     0x0008
 
-/* External port speed values - used in DIGITAL_INTERFACE_FORCE */
+/* External interface port speed values - used in DIGITAL_INTERFACE_FORCE */
 #define RTL8365MB_PORT_SPEED_10M       0
 #define RTL8365MB_PORT_SPEED_100M      1
 #define RTL8365MB_PORT_SPEED_1000M     2
 
-/* EXT port force configuration registers 0~2 */
-#define RTL8365MB_DIGITAL_INTERFACE_FORCE_REG0                 0x1310
-#define RTL8365MB_DIGITAL_INTERFACE_FORCE_REG1                 0x1311
-#define RTL8365MB_DIGITAL_INTERFACE_FORCE_REG2                 0x13C4
-#define RTL8365MB_DIGITAL_INTERFACE_FORCE_REG(_extport)   \
-               (RTL8365MB_DIGITAL_INTERFACE_FORCE_REG0 + \
-                ((_extport) & 0x1) +                     \
-                ((((_extport) >> 1) & 0x1) * (0x13C4 - 0x1310)))
+/* EXT interface force configuration registers 0~2 */
+#define RTL8365MB_DIGITAL_INTERFACE_FORCE_REG0         0x1310 /* EXT0 */
+#define RTL8365MB_DIGITAL_INTERFACE_FORCE_REG1         0x1311 /* EXT1 */
+#define RTL8365MB_DIGITAL_INTERFACE_FORCE_REG2         0x13C4 /* EXT2 */
+#define RTL8365MB_DIGITAL_INTERFACE_FORCE_REG(_extint) \
+               ((_extint) == 0 ? RTL8365MB_DIGITAL_INTERFACE_FORCE_REG0 : \
+                (_extint) == 1 ? RTL8365MB_DIGITAL_INTERFACE_FORCE_REG1 : \
+                (_extint) == 2 ? RTL8365MB_DIGITAL_INTERFACE_FORCE_REG2 : \
+                0x0)
 #define   RTL8365MB_DIGITAL_INTERFACE_FORCE_EN_MASK            0x1000
 #define   RTL8365MB_DIGITAL_INTERFACE_FORCE_NWAY_MASK          0x0080
 #define   RTL8365MB_DIGITAL_INTERFACE_FORCE_TXPAUSE_MASK       0x0040
@@ -516,7 +543,7 @@ struct rtl8365mb_cpu {
 
 /**
  * struct rtl8365mb_port - private per-port data
- * @smi: pointer to parent realtek_smi data
+ * @priv: pointer to parent realtek_priv data
  * @index: DSA port index, same as dsa_port::index
  * @stats: link statistics populated by rtl8365mb_stats_poll, ready for atomic
  *         access via rtl8365mb_get_stats64
@@ -524,7 +551,7 @@ struct rtl8365mb_cpu {
  * @mib_work: delayed work for polling MIB counters
  */
 struct rtl8365mb_port {
-       struct realtek_smi *smi;
+       struct realtek_priv *priv;
        unsigned int index;
        struct rtnl_link_stats64 stats;
        spinlock_t stats_lock;
@@ -533,13 +560,12 @@ struct rtl8365mb_port {
 
 /**
  * struct rtl8365mb - private chip-specific driver data
- * @smi: pointer to parent realtek_smi data
+ * @priv: pointer to parent realtek_priv data
  * @irq: registered IRQ or zero
  * @chip_id: chip identifier
  * @chip_ver: chip silicon revision
  * @port_mask: mask of all ports
  * @learn_limit_max: maximum number of L2 addresses the chip can learn
- * @cpu: CPU tagging and CPU port configuration for this chip
  * @mib_lock: prevent concurrent reads of MIB counters
  * @ports: per-port data
  * @jam_table: chip-specific initialization jam table
@@ -548,29 +574,28 @@ struct rtl8365mb_port {
  * Private data for this driver.
  */
 struct rtl8365mb {
-       struct realtek_smi *smi;
+       struct realtek_priv *priv;
        int irq;
        u32 chip_id;
        u32 chip_ver;
        u32 port_mask;
        u32 learn_limit_max;
-       struct rtl8365mb_cpu cpu;
        struct mutex mib_lock;
        struct rtl8365mb_port ports[RTL8365MB_MAX_NUM_PORTS];
        const struct rtl8365mb_jam_tbl_entry *jam_table;
        size_t jam_size;
 };
 
-static int rtl8365mb_phy_poll_busy(struct realtek_smi *smi)
+static int rtl8365mb_phy_poll_busy(struct realtek_priv *priv)
 {
        u32 val;
 
-       return regmap_read_poll_timeout(smi->map,
+       return regmap_read_poll_timeout(priv->map,
                                        RTL8365MB_INDIRECT_ACCESS_STATUS_REG,
                                        val, !val, 10, 100);
 }
 
-static int rtl8365mb_phy_ocp_prepare(struct realtek_smi *smi, int phy,
+static int rtl8365mb_phy_ocp_prepare(struct realtek_priv *priv, int phy,
                                     u32 ocp_addr)
 {
        u32 val;
@@ -579,7 +604,7 @@ static int rtl8365mb_phy_ocp_prepare(struct realtek_smi *smi, int phy,
        /* Set OCP prefix */
        val = FIELD_GET(RTL8365MB_PHY_OCP_ADDR_PREFIX_MASK, ocp_addr);
        ret = regmap_update_bits(
-               smi->map, RTL8365MB_GPHY_OCP_MSB_0_REG,
+               priv->map, RTL8365MB_GPHY_OCP_MSB_0_REG,
                RTL8365MB_GPHY_OCP_MSB_0_CFG_CPU_OCPADR_MASK,
                FIELD_PREP(RTL8365MB_GPHY_OCP_MSB_0_CFG_CPU_OCPADR_MASK, val));
        if (ret)
@@ -592,7 +617,7 @@ static int rtl8365mb_phy_ocp_prepare(struct realtek_smi *smi, int phy,
                          ocp_addr >> 1);
        val |= FIELD_PREP(RTL8365MB_INDIRECT_ACCESS_ADDRESS_OCPADR_9_6_MASK,
                          ocp_addr >> 6);
-       ret = regmap_write(smi->map, RTL8365MB_INDIRECT_ACCESS_ADDRESS_REG,
+       ret = regmap_write(priv->map, RTL8365MB_INDIRECT_ACCESS_ADDRESS_REG,
                           val);
        if (ret)
                return ret;
@@ -600,17 +625,17 @@ static int rtl8365mb_phy_ocp_prepare(struct realtek_smi *smi, int phy,
        return 0;
 }
 
-static int rtl8365mb_phy_ocp_read(struct realtek_smi *smi, int phy,
+static int rtl8365mb_phy_ocp_read(struct realtek_priv *priv, int phy,
                                  u32 ocp_addr, u16 *data)
 {
        u32 val;
        int ret;
 
-       ret = rtl8365mb_phy_poll_busy(smi);
+       ret = rtl8365mb_phy_poll_busy(priv);
        if (ret)
                return ret;
 
-       ret = rtl8365mb_phy_ocp_prepare(smi, phy, ocp_addr);
+       ret = rtl8365mb_phy_ocp_prepare(priv, phy, ocp_addr);
        if (ret)
                return ret;
 
@@ -619,16 +644,16 @@ static int rtl8365mb_phy_ocp_read(struct realtek_smi *smi, int phy,
                         RTL8365MB_INDIRECT_ACCESS_CTRL_CMD_VALUE) |
              FIELD_PREP(RTL8365MB_INDIRECT_ACCESS_CTRL_RW_MASK,
                         RTL8365MB_INDIRECT_ACCESS_CTRL_RW_READ);
-       ret = regmap_write(smi->map, RTL8365MB_INDIRECT_ACCESS_CTRL_REG, val);
+       ret = regmap_write(priv->map, RTL8365MB_INDIRECT_ACCESS_CTRL_REG, val);
        if (ret)
                return ret;
 
-       ret = rtl8365mb_phy_poll_busy(smi);
+       ret = rtl8365mb_phy_poll_busy(priv);
        if (ret)
                return ret;
 
        /* Get PHY register data */
-       ret = regmap_read(smi->map, RTL8365MB_INDIRECT_ACCESS_READ_DATA_REG,
+       ret = regmap_read(priv->map, RTL8365MB_INDIRECT_ACCESS_READ_DATA_REG,
                          &val);
        if (ret)
                return ret;
@@ -638,22 +663,22 @@ static int rtl8365mb_phy_ocp_read(struct realtek_smi *smi, int phy,
        return 0;
 }
 
-static int rtl8365mb_phy_ocp_write(struct realtek_smi *smi, int phy,
+static int rtl8365mb_phy_ocp_write(struct realtek_priv *priv, int phy,
                                   u32 ocp_addr, u16 data)
 {
        u32 val;
        int ret;
 
-       ret = rtl8365mb_phy_poll_busy(smi);
+       ret = rtl8365mb_phy_poll_busy(priv);
        if (ret)
                return ret;
 
-       ret = rtl8365mb_phy_ocp_prepare(smi, phy, ocp_addr);
+       ret = rtl8365mb_phy_ocp_prepare(priv, phy, ocp_addr);
        if (ret)
                return ret;
 
        /* Set PHY register data */
-       ret = regmap_write(smi->map, RTL8365MB_INDIRECT_ACCESS_WRITE_DATA_REG,
+       ret = regmap_write(priv->map, RTL8365MB_INDIRECT_ACCESS_WRITE_DATA_REG,
                           data);
        if (ret)
                return ret;
@@ -663,18 +688,18 @@ static int rtl8365mb_phy_ocp_write(struct realtek_smi *smi, int phy,
                         RTL8365MB_INDIRECT_ACCESS_CTRL_CMD_VALUE) |
              FIELD_PREP(RTL8365MB_INDIRECT_ACCESS_CTRL_RW_MASK,
                         RTL8365MB_INDIRECT_ACCESS_CTRL_RW_WRITE);
-       ret = regmap_write(smi->map, RTL8365MB_INDIRECT_ACCESS_CTRL_REG, val);
+       ret = regmap_write(priv->map, RTL8365MB_INDIRECT_ACCESS_CTRL_REG, val);
        if (ret)
                return ret;
 
-       ret = rtl8365mb_phy_poll_busy(smi);
+       ret = rtl8365mb_phy_poll_busy(priv);
        if (ret)
                return ret;
 
        return 0;
 }
 
-static int rtl8365mb_phy_read(struct realtek_smi *smi, int phy, int regnum)
+static int rtl8365mb_phy_read(struct realtek_priv *priv, int phy, int regnum)
 {
        u32 ocp_addr;
        u16 val;
@@ -688,21 +713,21 @@ static int rtl8365mb_phy_read(struct realtek_smi *smi, int phy, int regnum)
 
        ocp_addr = RTL8365MB_PHY_OCP_ADDR_PHYREG_BASE + regnum * 2;
 
-       ret = rtl8365mb_phy_ocp_read(smi, phy, ocp_addr, &val);
+       ret = rtl8365mb_phy_ocp_read(priv, phy, ocp_addr, &val);
        if (ret) {
-               dev_err(smi->dev,
+               dev_err(priv->dev,
                        "failed to read PHY%d reg %02x @ %04x, ret %d\n", phy,
                        regnum, ocp_addr, ret);
                return ret;
        }
 
-       dev_dbg(smi->dev, "read PHY%d register 0x%02x @ %04x, val <- %04x\n",
+       dev_dbg(priv->dev, "read PHY%d register 0x%02x @ %04x, val <- %04x\n",
                phy, regnum, ocp_addr, val);
 
        return val;
 }
 
-static int rtl8365mb_phy_write(struct realtek_smi *smi, int phy, int regnum,
+static int rtl8365mb_phy_write(struct realtek_priv *priv, int phy, int regnum,
                               u16 val)
 {
        u32 ocp_addr;
@@ -716,20 +741,31 @@ static int rtl8365mb_phy_write(struct realtek_smi *smi, int phy, int regnum,
 
        ocp_addr = RTL8365MB_PHY_OCP_ADDR_PHYREG_BASE + regnum * 2;
 
-       ret = rtl8365mb_phy_ocp_write(smi, phy, ocp_addr, val);
+       ret = rtl8365mb_phy_ocp_write(priv, phy, ocp_addr, val);
        if (ret) {
-               dev_err(smi->dev,
+               dev_err(priv->dev,
                        "failed to write PHY%d reg %02x @ %04x, ret %d\n", phy,
                        regnum, ocp_addr, ret);
                return ret;
        }
 
-       dev_dbg(smi->dev, "write PHY%d register 0x%02x @ %04x, val -> %04x\n",
+       dev_dbg(priv->dev, "write PHY%d register 0x%02x @ %04x, val -> %04x\n",
                phy, regnum, ocp_addr, val);
 
        return 0;
 }
 
+static int rtl8365mb_dsa_phy_read(struct dsa_switch *ds, int phy, int regnum)
+{
+       return rtl8365mb_phy_read(ds->priv, phy, regnum);
+}
+
+static int rtl8365mb_dsa_phy_write(struct dsa_switch *ds, int phy, int regnum,
+                                  u16 val)
+{
+       return rtl8365mb_phy_write(ds->priv, phy, regnum, val);
+}
+
 static enum dsa_tag_protocol
 rtl8365mb_get_tag_protocol(struct dsa_switch *ds, int port,
                           enum dsa_tag_protocol mp)
@@ -737,25 +773,25 @@ rtl8365mb_get_tag_protocol(struct dsa_switch *ds, int port,
        return DSA_TAG_PROTO_RTL8_4;
 }
 
-static int rtl8365mb_ext_config_rgmii(struct realtek_smi *smi, int port,
+static int rtl8365mb_ext_config_rgmii(struct realtek_priv *priv, int port,
                                      phy_interface_t interface)
 {
        struct device_node *dn;
        struct dsa_port *dp;
        int tx_delay = 0;
        int rx_delay = 0;
-       int ext_port;
+       int ext_int;
        u32 val;
        int ret;
 
-       if (port == smi->cpu_port) {
-               ext_port = 1;
-       } else {
-               dev_err(smi->dev, "only one EXT port is currently supported\n");
+       ext_int = rtl8365mb_extint_port_map[port];
+
+       if (ext_int <= 0) {
+               dev_err(priv->dev, "Port %d is not an external interface port\n", port);
                return -EINVAL;
        }
 
-       dp = dsa_to_port(smi->ds, port);
+       dp = dsa_to_port(priv->ds, port);
        dn = dp->dn;
 
        /* Set the RGMII TX/RX delay
@@ -786,8 +822,8 @@ static int rtl8365mb_ext_config_rgmii(struct realtek_smi *smi, int port,
                if (val == 0 || val == 2)
                        tx_delay = val / 2;
                else
-                       dev_warn(smi->dev,
-                                "EXT port TX delay must be 0 or 2 ns\n");
+                       dev_warn(priv->dev,
+                                "EXT interface TX delay must be 0 or 2 ns\n");
        }
 
        if (!of_property_read_u32(dn, "rx-internal-delay-ps", &val)) {
@@ -796,12 +832,12 @@ static int rtl8365mb_ext_config_rgmii(struct realtek_smi *smi, int port,
                if (val <= 7)
                        rx_delay = val;
                else
-                       dev_warn(smi->dev,
-                                "EXT port RX delay must be 0 to 2.1 ns\n");
+                       dev_warn(priv->dev,
+                                "EXT interface RX delay must be 0 to 2.1 ns\n");
        }
 
        ret = regmap_update_bits(
-               smi->map, RTL8365MB_EXT_RGMXF_REG(ext_port),
+               priv->map, RTL8365MB_EXT_RGMXF_REG(ext_int),
                RTL8365MB_EXT_RGMXF_TXDELAY_MASK |
                        RTL8365MB_EXT_RGMXF_RXDELAY_MASK,
                FIELD_PREP(RTL8365MB_EXT_RGMXF_TXDELAY_MASK, tx_delay) |
@@ -810,18 +846,18 @@ static int rtl8365mb_ext_config_rgmii(struct realtek_smi *smi, int port,
                return ret;
 
        ret = regmap_update_bits(
-               smi->map, RTL8365MB_DIGITAL_INTERFACE_SELECT_REG(ext_port),
-               RTL8365MB_DIGITAL_INTERFACE_SELECT_MODE_MASK(ext_port),
+               priv->map, RTL8365MB_DIGITAL_INTERFACE_SELECT_REG(ext_int),
+               RTL8365MB_DIGITAL_INTERFACE_SELECT_MODE_MASK(ext_int),
                RTL8365MB_EXT_PORT_MODE_RGMII
                        << RTL8365MB_DIGITAL_INTERFACE_SELECT_MODE_OFFSET(
-                                  ext_port));
+                                  ext_int));
        if (ret)
                return ret;
 
        return 0;
 }
 
-static int rtl8365mb_ext_config_forcemode(struct realtek_smi *smi, int port,
+static int rtl8365mb_ext_config_forcemode(struct realtek_priv *priv, int port,
                                          bool link, int speed, int duplex,
                                          bool tx_pause, bool rx_pause)
 {
@@ -830,14 +866,14 @@ static int rtl8365mb_ext_config_forcemode(struct realtek_smi *smi, int port,
        u32 r_duplex;
        u32 r_speed;
        u32 r_link;
-       int ext_port;
+       int ext_int;
        int val;
        int ret;
 
-       if (port == smi->cpu_port) {
-               ext_port = 1;
-       } else {
-               dev_err(smi->dev, "only one EXT port is currently supported\n");
+       ext_int = rtl8365mb_extint_port_map[port];
+
+       if (ext_int <= 0) {
+               dev_err(priv->dev, "Port %d is not an external interface port\n", port);
                return -EINVAL;
        }
 
@@ -854,7 +890,7 @@ static int rtl8365mb_ext_config_forcemode(struct realtek_smi *smi, int port,
                } else if (speed == SPEED_10) {
                        r_speed = RTL8365MB_PORT_SPEED_10M;
                } else {
-                       dev_err(smi->dev, "unsupported port speed %s\n",
+                       dev_err(priv->dev, "unsupported port speed %s\n",
                                phy_speed_to_str(speed));
                        return -EINVAL;
                }
@@ -864,7 +900,7 @@ static int rtl8365mb_ext_config_forcemode(struct realtek_smi *smi, int port,
                } else if (duplex == DUPLEX_HALF) {
                        r_duplex = 0;
                } else {
-                       dev_err(smi->dev, "unsupported duplex %s\n",
+                       dev_err(priv->dev, "unsupported duplex %s\n",
                                phy_duplex_to_str(duplex));
                        return -EINVAL;
                }
@@ -886,8 +922,8 @@ static int rtl8365mb_ext_config_forcemode(struct realtek_smi *smi, int port,
              FIELD_PREP(RTL8365MB_DIGITAL_INTERFACE_FORCE_DUPLEX_MASK,
                         r_duplex) |
              FIELD_PREP(RTL8365MB_DIGITAL_INTERFACE_FORCE_SPEED_MASK, r_speed);
-       ret = regmap_write(smi->map,
-                          RTL8365MB_DIGITAL_INTERFACE_FORCE_REG(ext_port),
+       ret = regmap_write(priv->map,
+                          RTL8365MB_DIGITAL_INTERFACE_FORCE_REG(ext_int),
                           val);
        if (ret)
                return ret;
@@ -898,13 +934,17 @@ static int rtl8365mb_ext_config_forcemode(struct realtek_smi *smi, int port,
 static bool rtl8365mb_phy_mode_supported(struct dsa_switch *ds, int port,
                                         phy_interface_t interface)
 {
-       if (dsa_is_user_port(ds, port) &&
+       int ext_int;
+
+       ext_int = rtl8365mb_extint_port_map[port];
+
+       if (ext_int < 0 &&
            (interface == PHY_INTERFACE_MODE_NA ||
             interface == PHY_INTERFACE_MODE_INTERNAL ||
             interface == PHY_INTERFACE_MODE_GMII))
                /* Internal PHY */
                return true;
-       else if (dsa_is_cpu_port(ds, port) &&
+       else if ((ext_int >= 1) &&
                 phy_interface_mode_is_rgmii(interface))
                /* Extension MAC */
                return true;
@@ -916,7 +956,7 @@ static void rtl8365mb_phylink_validate(struct dsa_switch *ds, int port,
                                       unsigned long *supported,
                                       struct phylink_link_state *state)
 {
-       struct realtek_smi *smi = ds->priv;
+       struct realtek_priv *priv = ds->priv;
        __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0 };
 
        /* include/linux/phylink.h says:
@@ -925,7 +965,7 @@ static void rtl8365mb_phylink_validate(struct dsa_switch *ds, int port,
         */
        if (state->interface != PHY_INTERFACE_MODE_NA &&
            !rtl8365mb_phy_mode_supported(ds, port, state->interface)) {
-               dev_err(smi->dev, "phy mode %s is unsupported on port %d\n",
+               dev_err(priv->dev, "phy mode %s is unsupported on port %d\n",
                        phy_modes(state->interface), port);
                linkmode_zero(supported);
                return;
@@ -951,26 +991,26 @@ static void rtl8365mb_phylink_mac_config(struct dsa_switch *ds, int port,
                                         unsigned int mode,
                                         const struct phylink_link_state *state)
 {
-       struct realtek_smi *smi = ds->priv;
+       struct realtek_priv *priv = ds->priv;
        int ret;
 
        if (!rtl8365mb_phy_mode_supported(ds, port, state->interface)) {
-               dev_err(smi->dev, "phy mode %s is unsupported on port %d\n",
+               dev_err(priv->dev, "phy mode %s is unsupported on port %d\n",
                        phy_modes(state->interface), port);
                return;
        }
 
        if (mode != MLO_AN_PHY && mode != MLO_AN_FIXED) {
-               dev_err(smi->dev,
+               dev_err(priv->dev,
                        "port %d supports only conventional PHY or fixed-link\n",
                        port);
                return;
        }
 
        if (phy_interface_mode_is_rgmii(state->interface)) {
-               ret = rtl8365mb_ext_config_rgmii(smi, port, state->interface);
+               ret = rtl8365mb_ext_config_rgmii(priv, port, state->interface);
                if (ret)
-                       dev_err(smi->dev,
+                       dev_err(priv->dev,
                                "failed to configure RGMII mode on port %d: %d\n",
                                port, ret);
                return;
@@ -985,20 +1025,20 @@ static void rtl8365mb_phylink_mac_link_down(struct dsa_switch *ds, int port,
                                            unsigned int mode,
                                            phy_interface_t interface)
 {
-       struct realtek_smi *smi = ds->priv;
+       struct realtek_priv *priv = ds->priv;
        struct rtl8365mb_port *p;
        struct rtl8365mb *mb;
        int ret;
 
-       mb = smi->chip_data;
+       mb = priv->chip_data;
        p = &mb->ports[port];
        cancel_delayed_work_sync(&p->mib_work);
 
        if (phy_interface_mode_is_rgmii(interface)) {
-               ret = rtl8365mb_ext_config_forcemode(smi, port, false, 0, 0,
+               ret = rtl8365mb_ext_config_forcemode(priv, port, false, 0, 0,
                                                     false, false);
                if (ret)
-                       dev_err(smi->dev,
+                       dev_err(priv->dev,
                                "failed to reset forced mode on port %d: %d\n",
                                port, ret);
 
@@ -1013,21 +1053,21 @@ static void rtl8365mb_phylink_mac_link_up(struct dsa_switch *ds, int port,
                                          int duplex, bool tx_pause,
                                          bool rx_pause)
 {
-       struct realtek_smi *smi = ds->priv;
+       struct realtek_priv *priv = ds->priv;
        struct rtl8365mb_port *p;
        struct rtl8365mb *mb;
        int ret;
 
-       mb = smi->chip_data;
+       mb = priv->chip_data;
        p = &mb->ports[port];
        schedule_delayed_work(&p->mib_work, 0);
 
        if (phy_interface_mode_is_rgmii(interface)) {
-               ret = rtl8365mb_ext_config_forcemode(smi, port, true, speed,
+               ret = rtl8365mb_ext_config_forcemode(priv, port, true, speed,
                                                     duplex, tx_pause,
                                                     rx_pause);
                if (ret)
-                       dev_err(smi->dev,
+                       dev_err(priv->dev,
                                "failed to force mode on port %d: %d\n", port,
                                ret);
 
@@ -1038,7 +1078,7 @@ static void rtl8365mb_phylink_mac_link_up(struct dsa_switch *ds, int port,
 static void rtl8365mb_port_stp_state_set(struct dsa_switch *ds, int port,
                                         u8 state)
 {
-       struct realtek_smi *smi = ds->priv;
+       struct realtek_priv *priv = ds->priv;
        enum rtl8365mb_stp_state val;
        int msti = 0;
 
@@ -1057,36 +1097,36 @@ static void rtl8365mb_port_stp_state_set(struct dsa_switch *ds, int port,
                val = RTL8365MB_STP_STATE_FORWARDING;
                break;
        default:
-               dev_err(smi->dev, "invalid STP state: %u\n", state);
+               dev_err(priv->dev, "invalid STP state: %u\n", state);
                return;
        }
 
-       regmap_update_bits(smi->map, RTL8365MB_MSTI_CTRL_REG(msti, port),
+       regmap_update_bits(priv->map, RTL8365MB_MSTI_CTRL_REG(msti, port),
                           RTL8365MB_MSTI_CTRL_PORT_STATE_MASK(port),
                           val << RTL8365MB_MSTI_CTRL_PORT_STATE_OFFSET(port));
 }
 
-static int rtl8365mb_port_set_learning(struct realtek_smi *smi, int port,
+static int rtl8365mb_port_set_learning(struct realtek_priv *priv, int port,
                                       bool enable)
 {
-       struct rtl8365mb *mb = smi->chip_data;
+       struct rtl8365mb *mb = priv->chip_data;
 
        /* Enable/disable learning by limiting the number of L2 addresses the
         * port can learn. Realtek documentation states that a limit of zero
         * disables learning. When enabling learning, set it to the chip's
         * maximum.
         */
-       return regmap_write(smi->map, RTL8365MB_LUT_PORT_LEARN_LIMIT_REG(port),
+       return regmap_write(priv->map, RTL8365MB_LUT_PORT_LEARN_LIMIT_REG(port),
                            enable ? mb->learn_limit_max : 0);
 }
 
-static int rtl8365mb_port_set_isolation(struct realtek_smi *smi, int port,
+static int rtl8365mb_port_set_isolation(struct realtek_priv *priv, int port,
                                        u32 mask)
 {
-       return regmap_write(smi->map, RTL8365MB_PORT_ISOLATION_REG(port), mask);
+       return regmap_write(priv->map, RTL8365MB_PORT_ISOLATION_REG(port), mask);
 }
 
-static int rtl8365mb_mib_counter_read(struct realtek_smi *smi, int port,
+static int rtl8365mb_mib_counter_read(struct realtek_priv *priv, int port,
                                      u32 offset, u32 length, u64 *mibvalue)
 {
        u64 tmpvalue = 0;
@@ -1098,13 +1138,13 @@ static int rtl8365mb_mib_counter_read(struct realtek_smi *smi, int port,
         * and then poll the control register before reading the value from some
         * counter registers.
         */
-       ret = regmap_write(smi->map, RTL8365MB_MIB_ADDRESS_REG,
+       ret = regmap_write(priv->map, RTL8365MB_MIB_ADDRESS_REG,
                           RTL8365MB_MIB_ADDRESS(port, offset));
        if (ret)
                return ret;
 
        /* Poll for completion */
-       ret = regmap_read_poll_timeout(smi->map, RTL8365MB_MIB_CTRL0_REG, val,
+       ret = regmap_read_poll_timeout(priv->map, RTL8365MB_MIB_CTRL0_REG, val,
                                       !(val & RTL8365MB_MIB_CTRL0_BUSY_MASK),
                                       10, 100);
        if (ret)
@@ -1126,7 +1166,7 @@ static int rtl8365mb_mib_counter_read(struct realtek_smi *smi, int port,
 
        /* Read the MIB counter 16 bits at a time */
        for (i = 0; i < length; i++) {
-               ret = regmap_read(smi->map,
+               ret = regmap_read(priv->map,
                                  RTL8365MB_MIB_COUNTER_REG(offset - i), &val);
                if (ret)
                        return ret;
@@ -1142,21 +1182,21 @@ static int rtl8365mb_mib_counter_read(struct realtek_smi *smi, int port,
 
 static void rtl8365mb_get_ethtool_stats(struct dsa_switch *ds, int port, u64 *data)
 {
-       struct realtek_smi *smi = ds->priv;
+       struct realtek_priv *priv = ds->priv;
        struct rtl8365mb *mb;
        int ret;
        int i;
 
-       mb = smi->chip_data;
+       mb = priv->chip_data;
 
        mutex_lock(&mb->mib_lock);
        for (i = 0; i < RTL8365MB_MIB_END; i++) {
                struct rtl8365mb_mib_counter *mib = &rtl8365mb_mib_counters[i];
 
-               ret = rtl8365mb_mib_counter_read(smi, port, mib->offset,
+               ret = rtl8365mb_mib_counter_read(priv, port, mib->offset,
                                                 mib->length, &data[i]);
                if (ret) {
-                       dev_err(smi->dev,
+                       dev_err(priv->dev,
                                "failed to read port %d counters: %d\n", port,
                                ret);
                        break;
@@ -1190,15 +1230,15 @@ static int rtl8365mb_get_sset_count(struct dsa_switch *ds, int port, int sset)
 static void rtl8365mb_get_phy_stats(struct dsa_switch *ds, int port,
                                    struct ethtool_eth_phy_stats *phy_stats)
 {
-       struct realtek_smi *smi = ds->priv;
+       struct realtek_priv *priv = ds->priv;
        struct rtl8365mb_mib_counter *mib;
        struct rtl8365mb *mb;
 
-       mb = smi->chip_data;
+       mb = priv->chip_data;
        mib = &rtl8365mb_mib_counters[RTL8365MB_MIB_dot3StatsSymbolErrors];
 
        mutex_lock(&mb->mib_lock);
-       rtl8365mb_mib_counter_read(smi, port, mib->offset, mib->length,
+       rtl8365mb_mib_counter_read(priv, port, mib->offset, mib->length,
                                   &phy_stats->SymbolErrorDuringCarrier);
        mutex_unlock(&mb->mib_lock);
 }
@@ -1226,12 +1266,12 @@ static void rtl8365mb_get_mac_stats(struct dsa_switch *ds, int port,
                [RTL8365MB_MIB_dot3StatsExcessiveCollisions] = 1,
 
        };
-       struct realtek_smi *smi = ds->priv;
+       struct realtek_priv *priv = ds->priv;
        struct rtl8365mb *mb;
        int ret;
        int i;
 
-       mb = smi->chip_data;
+       mb = priv->chip_data;
 
        mutex_lock(&mb->mib_lock);
        for (i = 0; i < RTL8365MB_MIB_END; i++) {
@@ -1241,7 +1281,7 @@ static void rtl8365mb_get_mac_stats(struct dsa_switch *ds, int port,
                if (!cnt[i])
                        continue;
 
-               ret = rtl8365mb_mib_counter_read(smi, port, mib->offset,
+               ret = rtl8365mb_mib_counter_read(priv, port, mib->offset,
                                                 mib->length, &cnt[i]);
                if (ret)
                        break;
@@ -1291,20 +1331,20 @@ static void rtl8365mb_get_mac_stats(struct dsa_switch *ds, int port,
 static void rtl8365mb_get_ctrl_stats(struct dsa_switch *ds, int port,
                                     struct ethtool_eth_ctrl_stats *ctrl_stats)
 {
-       struct realtek_smi *smi = ds->priv;
+       struct realtek_priv *priv = ds->priv;
        struct rtl8365mb_mib_counter *mib;
        struct rtl8365mb *mb;
 
-       mb = smi->chip_data;
+       mb = priv->chip_data;
        mib = &rtl8365mb_mib_counters[RTL8365MB_MIB_dot3ControlInUnknownOpcodes];
 
        mutex_lock(&mb->mib_lock);
-       rtl8365mb_mib_counter_read(smi, port, mib->offset, mib->length,
+       rtl8365mb_mib_counter_read(priv, port, mib->offset, mib->length,
                                   &ctrl_stats->UnsupportedOpcodesReceived);
        mutex_unlock(&mb->mib_lock);
 }
 
-static void rtl8365mb_stats_update(struct realtek_smi *smi, int port)
+static void rtl8365mb_stats_update(struct realtek_priv *priv, int port)
 {
        u64 cnt[RTL8365MB_MIB_END] = {
                [RTL8365MB_MIB_ifOutOctets] = 1,
@@ -1323,7 +1363,7 @@ static void rtl8365mb_stats_update(struct realtek_smi *smi, int port)
                [RTL8365MB_MIB_dot3StatsFCSErrors] = 1,
                [RTL8365MB_MIB_dot3StatsLateCollisions] = 1,
        };
-       struct rtl8365mb *mb = smi->chip_data;
+       struct rtl8365mb *mb = priv->chip_data;
        struct rtnl_link_stats64 *stats;
        int ret;
        int i;
@@ -1338,7 +1378,7 @@ static void rtl8365mb_stats_update(struct realtek_smi *smi, int port)
                if (!cnt[i])
                        continue;
 
-               ret = rtl8365mb_mib_counter_read(smi, port, c->offset,
+               ret = rtl8365mb_mib_counter_read(priv, port, c->offset,
                                                 c->length, &cnt[i]);
                if (ret)
                        break;
@@ -1388,9 +1428,9 @@ static void rtl8365mb_stats_poll(struct work_struct *work)
        struct rtl8365mb_port *p = container_of(to_delayed_work(work),
                                                struct rtl8365mb_port,
                                                mib_work);
-       struct realtek_smi *smi = p->smi;
+       struct realtek_priv *priv = p->priv;
 
-       rtl8365mb_stats_update(smi, p->index);
+       rtl8365mb_stats_update(priv, p->index);
 
        schedule_delayed_work(&p->mib_work, RTL8365MB_STATS_INTERVAL_JIFFIES);
 }
@@ -1398,11 +1438,11 @@ static void rtl8365mb_stats_poll(struct work_struct *work)
 static void rtl8365mb_get_stats64(struct dsa_switch *ds, int port,
                                  struct rtnl_link_stats64 *s)
 {
-       struct realtek_smi *smi = ds->priv;
+       struct realtek_priv *priv = ds->priv;
        struct rtl8365mb_port *p;
        struct rtl8365mb *mb;
 
-       mb = smi->chip_data;
+       mb = priv->chip_data;
        p = &mb->ports[port];
 
        spin_lock(&p->stats_lock);
@@ -1410,9 +1450,9 @@ static void rtl8365mb_get_stats64(struct dsa_switch *ds, int port,
        spin_unlock(&p->stats_lock);
 }
 
-static void rtl8365mb_stats_setup(struct realtek_smi *smi)
+static void rtl8365mb_stats_setup(struct realtek_priv *priv)
 {
-       struct rtl8365mb *mb = smi->chip_data;
+       struct rtl8365mb *mb = priv->chip_data;
        int i;
 
        /* Per-chip global mutex to protect MIB counter access, since doing
@@ -1420,10 +1460,10 @@ static void rtl8365mb_stats_setup(struct realtek_smi *smi)
         */
        mutex_init(&mb->mib_lock);
 
-       for (i = 0; i < smi->num_ports; i++) {
+       for (i = 0; i < priv->num_ports; i++) {
                struct rtl8365mb_port *p = &mb->ports[i];
 
-               if (dsa_is_unused_port(smi->ds, i))
+               if (dsa_is_unused_port(priv->ds, i))
                        continue;
 
                /* Per-port spinlock to protect the stats64 data */
@@ -1436,45 +1476,45 @@ static void rtl8365mb_stats_setup(struct realtek_smi *smi)
        }
 }
 
-static void rtl8365mb_stats_teardown(struct realtek_smi *smi)
+static void rtl8365mb_stats_teardown(struct realtek_priv *priv)
 {
-       struct rtl8365mb *mb = smi->chip_data;
+       struct rtl8365mb *mb = priv->chip_data;
        int i;
 
-       for (i = 0; i < smi->num_ports; i++) {
+       for (i = 0; i < priv->num_ports; i++) {
                struct rtl8365mb_port *p = &mb->ports[i];
 
-               if (dsa_is_unused_port(smi->ds, i))
+               if (dsa_is_unused_port(priv->ds, i))
                        continue;
 
                cancel_delayed_work_sync(&p->mib_work);
        }
 }
 
-static int rtl8365mb_get_and_clear_status_reg(struct realtek_smi *smi, u32 reg,
+static int rtl8365mb_get_and_clear_status_reg(struct realtek_priv *priv, u32 reg,
                                              u32 *val)
 {
        int ret;
 
-       ret = regmap_read(smi->map, reg, val);
+       ret = regmap_read(priv->map, reg, val);
        if (ret)
                return ret;
 
-       return regmap_write(smi->map, reg, *val);
+       return regmap_write(priv->map, reg, *val);
 }
 
 static irqreturn_t rtl8365mb_irq(int irq, void *data)
 {
-       struct realtek_smi *smi = data;
+       struct realtek_priv *priv = data;
        unsigned long line_changes = 0;
        struct rtl8365mb *mb;
        u32 stat;
        int line;
        int ret;
 
-       mb = smi->chip_data;
+       mb = priv->chip_data;
 
-       ret = rtl8365mb_get_and_clear_status_reg(smi, RTL8365MB_INTR_STATUS_REG,
+       ret = rtl8365mb_get_and_clear_status_reg(priv, RTL8365MB_INTR_STATUS_REG,
                                                 &stat);
        if (ret)
                goto out_error;
@@ -1485,14 +1525,14 @@ static irqreturn_t rtl8365mb_irq(int irq, void *data)
                u32 val;
 
                ret = rtl8365mb_get_and_clear_status_reg(
-                       smi, RTL8365MB_PORT_LINKUP_IND_REG, &val);
+                       priv, RTL8365MB_PORT_LINKUP_IND_REG, &val);
                if (ret)
                        goto out_error;
 
                linkup_ind = FIELD_GET(RTL8365MB_PORT_LINKUP_IND_MASK, val);
 
                ret = rtl8365mb_get_and_clear_status_reg(
-                       smi, RTL8365MB_PORT_LINKDOWN_IND_REG, &val);
+                       priv, RTL8365MB_PORT_LINKDOWN_IND_REG, &val);
                if (ret)
                        goto out_error;
 
@@ -1504,8 +1544,8 @@ static irqreturn_t rtl8365mb_irq(int irq, void *data)
        if (!line_changes)
                goto out_none;
 
-       for_each_set_bit(line, &line_changes, smi->num_ports) {
-               int child_irq = irq_find_mapping(smi->irqdomain, line);
+       for_each_set_bit(line, &line_changes, priv->num_ports) {
+               int child_irq = irq_find_mapping(priv->irqdomain, line);
 
                handle_nested_irq(child_irq);
        }
@@ -1513,7 +1553,7 @@ static irqreturn_t rtl8365mb_irq(int irq, void *data)
        return IRQ_HANDLED;
 
 out_error:
-       dev_err(smi->dev, "failed to read interrupt status: %d\n", ret);
+       dev_err(priv->dev, "failed to read interrupt status: %d\n", ret);
 
 out_none:
        return IRQ_NONE;
@@ -1548,27 +1588,27 @@ static const struct irq_domain_ops rtl8365mb_irqdomain_ops = {
        .xlate = irq_domain_xlate_onecell,
 };
 
-static int rtl8365mb_set_irq_enable(struct realtek_smi *smi, bool enable)
+static int rtl8365mb_set_irq_enable(struct realtek_priv *priv, bool enable)
 {
-       return regmap_update_bits(smi->map, RTL8365MB_INTR_CTRL_REG,
+       return regmap_update_bits(priv->map, RTL8365MB_INTR_CTRL_REG,
                                  RTL8365MB_INTR_LINK_CHANGE_MASK,
                                  FIELD_PREP(RTL8365MB_INTR_LINK_CHANGE_MASK,
                                             enable ? 1 : 0));
 }
 
-static int rtl8365mb_irq_enable(struct realtek_smi *smi)
+static int rtl8365mb_irq_enable(struct realtek_priv *priv)
 {
-       return rtl8365mb_set_irq_enable(smi, true);
+       return rtl8365mb_set_irq_enable(priv, true);
 }
 
-static int rtl8365mb_irq_disable(struct realtek_smi *smi)
+static int rtl8365mb_irq_disable(struct realtek_priv *priv)
 {
-       return rtl8365mb_set_irq_enable(smi, false);
+       return rtl8365mb_set_irq_enable(priv, false);
 }
 
-static int rtl8365mb_irq_setup(struct realtek_smi *smi)
+static int rtl8365mb_irq_setup(struct realtek_priv *priv)
 {
-       struct rtl8365mb *mb = smi->chip_data;
+       struct rtl8365mb *mb = priv->chip_data;
        struct device_node *intc;
        u32 irq_trig;
        int virq;
@@ -1577,9 +1617,9 @@ static int rtl8365mb_irq_setup(struct realtek_smi *smi)
        int ret;
        int i;
 
-       intc = of_get_child_by_name(smi->dev->of_node, "interrupt-controller");
+       intc = of_get_child_by_name(priv->dev->of_node, "interrupt-controller");
        if (!intc) {
-               dev_err(smi->dev, "missing child interrupt-controller node\n");
+               dev_err(priv->dev, "missing child interrupt-controller node\n");
                return -EINVAL;
        }
 
@@ -1587,24 +1627,24 @@ static int rtl8365mb_irq_setup(struct realtek_smi *smi)
        irq = of_irq_get(intc, 0);
        if (irq <= 0) {
                if (irq != -EPROBE_DEFER)
-                       dev_err(smi->dev, "failed to get parent irq: %d\n",
+                       dev_err(priv->dev, "failed to get parent irq: %d\n",
                                irq);
                ret = irq ? irq : -EINVAL;
                goto out_put_node;
        }
 
-       smi->irqdomain = irq_domain_add_linear(intc, smi->num_ports,
-                                              &rtl8365mb_irqdomain_ops, smi);
-       if (!smi->irqdomain) {
-               dev_err(smi->dev, "failed to add irq domain\n");
+       priv->irqdomain = irq_domain_add_linear(intc, priv->num_ports,
+                                               &rtl8365mb_irqdomain_ops, priv);
+       if (!priv->irqdomain) {
+               dev_err(priv->dev, "failed to add irq domain\n");
                ret = -ENOMEM;
                goto out_put_node;
        }
 
-       for (i = 0; i < smi->num_ports; i++) {
-               virq = irq_create_mapping(smi->irqdomain, i);
+       for (i = 0; i < priv->num_ports; i++) {
+               virq = irq_create_mapping(priv->irqdomain, i);
                if (!virq) {
-                       dev_err(smi->dev,
+                       dev_err(priv->dev,
                                "failed to create irq domain mapping\n");
                        ret = -EINVAL;
                        goto out_remove_irqdomain;
@@ -1625,40 +1665,40 @@ static int rtl8365mb_irq_setup(struct realtek_smi *smi)
                val = RTL8365MB_INTR_POLARITY_LOW;
                break;
        default:
-               dev_err(smi->dev, "unsupported irq trigger type %u\n",
+               dev_err(priv->dev, "unsupported irq trigger type %u\n",
                        irq_trig);
                ret = -EINVAL;
                goto out_remove_irqdomain;
        }
 
-       ret = regmap_update_bits(smi->map, RTL8365MB_INTR_POLARITY_REG,
+       ret = regmap_update_bits(priv->map, RTL8365MB_INTR_POLARITY_REG,
                                 RTL8365MB_INTR_POLARITY_MASK,
                                 FIELD_PREP(RTL8365MB_INTR_POLARITY_MASK, val));
        if (ret)
                goto out_remove_irqdomain;
 
        /* Disable the interrupt in case the chip has it enabled on reset */
-       ret = rtl8365mb_irq_disable(smi);
+       ret = rtl8365mb_irq_disable(priv);
        if (ret)
                goto out_remove_irqdomain;
 
        /* Clear the interrupt status register */
-       ret = regmap_write(smi->map, RTL8365MB_INTR_STATUS_REG,
+       ret = regmap_write(priv->map, RTL8365MB_INTR_STATUS_REG,
                           RTL8365MB_INTR_ALL_MASK);
        if (ret)
                goto out_remove_irqdomain;
 
        ret = request_threaded_irq(irq, NULL, rtl8365mb_irq, IRQF_ONESHOT,
-                                  "rtl8365mb", smi);
+                                  "rtl8365mb", priv);
        if (ret) {
-               dev_err(smi->dev, "failed to request irq: %d\n", ret);
+               dev_err(priv->dev, "failed to request irq: %d\n", ret);
                goto out_remove_irqdomain;
        }
 
        /* Store the irq so that we know to free it during teardown */
        mb->irq = irq;
 
-       ret = rtl8365mb_irq_enable(smi);
+       ret = rtl8365mb_irq_enable(priv);
        if (ret)
                goto out_free_irq;
 
@@ -1667,17 +1707,17 @@ static int rtl8365mb_irq_setup(struct realtek_smi *smi)
        return 0;
 
 out_free_irq:
-       free_irq(mb->irq, smi);
+       free_irq(mb->irq, priv);
        mb->irq = 0;
 
 out_remove_irqdomain:
-       for (i = 0; i < smi->num_ports; i++) {
-               virq = irq_find_mapping(smi->irqdomain, i);
+       for (i = 0; i < priv->num_ports; i++) {
+               virq = irq_find_mapping(priv->irqdomain, i);
                irq_dispose_mapping(virq);
        }
 
-       irq_domain_remove(smi->irqdomain);
-       smi->irqdomain = NULL;
+       irq_domain_remove(priv->irqdomain);
+       priv->irqdomain = NULL;
 
 out_put_node:
        of_node_put(intc);
@@ -1685,36 +1725,34 @@ out_put_node:
        return ret;
 }
 
-static void rtl8365mb_irq_teardown(struct realtek_smi *smi)
+static void rtl8365mb_irq_teardown(struct realtek_priv *priv)
 {
-       struct rtl8365mb *mb = smi->chip_data;
+       struct rtl8365mb *mb = priv->chip_data;
        int virq;
        int i;
 
        if (mb->irq) {
-               free_irq(mb->irq, smi);
+               free_irq(mb->irq, priv);
                mb->irq = 0;
        }
 
-       if (smi->irqdomain) {
-               for (i = 0; i < smi->num_ports; i++) {
-                       virq = irq_find_mapping(smi->irqdomain, i);
+       if (priv->irqdomain) {
+               for (i = 0; i < priv->num_ports; i++) {
+                       virq = irq_find_mapping(priv->irqdomain, i);
                        irq_dispose_mapping(virq);
                }
 
-               irq_domain_remove(smi->irqdomain);
-               smi->irqdomain = NULL;
+               irq_domain_remove(priv->irqdomain);
+               priv->irqdomain = NULL;
        }
 }
 
-static int rtl8365mb_cpu_config(struct realtek_smi *smi)
+static int rtl8365mb_cpu_config(struct realtek_priv *priv, const struct rtl8365mb_cpu *cpu)
 {
-       struct rtl8365mb *mb = smi->chip_data;
-       struct rtl8365mb_cpu *cpu = &mb->cpu;
        u32 val;
        int ret;
 
-       ret = regmap_update_bits(smi->map, RTL8365MB_CPU_PORT_MASK_REG,
+       ret = regmap_update_bits(priv->map, RTL8365MB_CPU_PORT_MASK_REG,
                                 RTL8365MB_CPU_PORT_MASK_MASK,
                                 FIELD_PREP(RTL8365MB_CPU_PORT_MASK_MASK,
                                            cpu->mask));
@@ -1726,26 +1764,26 @@ static int rtl8365mb_cpu_config(struct realtek_smi *smi)
              FIELD_PREP(RTL8365MB_CPU_CTRL_TAG_POSITION_MASK, cpu->position) |
              FIELD_PREP(RTL8365MB_CPU_CTRL_RXBYTECOUNT_MASK, cpu->rx_length) |
              FIELD_PREP(RTL8365MB_CPU_CTRL_TAG_FORMAT_MASK, cpu->format) |
-             FIELD_PREP(RTL8365MB_CPU_CTRL_TRAP_PORT_MASK, cpu->trap_port) |
+             FIELD_PREP(RTL8365MB_CPU_CTRL_TRAP_PORT_MASK, cpu->trap_port & 0x7) |
              FIELD_PREP(RTL8365MB_CPU_CTRL_TRAP_PORT_EXT_MASK,
-                        cpu->trap_port >> 3);
-       ret = regmap_write(smi->map, RTL8365MB_CPU_CTRL_REG, val);
+                        cpu->trap_port >> 3 & 0x1);
+       ret = regmap_write(priv->map, RTL8365MB_CPU_CTRL_REG, val);
        if (ret)
                return ret;
 
        return 0;
 }
 
-static int rtl8365mb_switch_init(struct realtek_smi *smi)
+static int rtl8365mb_switch_init(struct realtek_priv *priv)
 {
-       struct rtl8365mb *mb = smi->chip_data;
+       struct rtl8365mb *mb = priv->chip_data;
        int ret;
        int i;
 
        /* Do any chip-specific init jam before getting to the common stuff */
        if (mb->jam_table) {
                for (i = 0; i < mb->jam_size; i++) {
-                       ret = regmap_write(smi->map, mb->jam_table[i].reg,
+                       ret = regmap_write(priv->map, mb->jam_table[i].reg,
                                           mb->jam_table[i].val);
                        if (ret)
                                return ret;
@@ -1754,7 +1792,7 @@ static int rtl8365mb_switch_init(struct realtek_smi *smi)
 
        /* Common init jam */
        for (i = 0; i < ARRAY_SIZE(rtl8365mb_init_jam_common); i++) {
-               ret = regmap_write(smi->map, rtl8365mb_init_jam_common[i].reg,
+               ret = regmap_write(priv->map, rtl8365mb_init_jam_common[i].reg,
                                   rtl8365mb_init_jam_common[i].val);
                if (ret)
                        return ret;
@@ -1763,75 +1801,86 @@ static int rtl8365mb_switch_init(struct realtek_smi *smi)
        return 0;
 }
 
-static int rtl8365mb_reset_chip(struct realtek_smi *smi)
+static int rtl8365mb_reset_chip(struct realtek_priv *priv)
 {
        u32 val;
 
-       realtek_smi_write_reg_noack(smi, RTL8365MB_CHIP_RESET_REG,
-                                   FIELD_PREP(RTL8365MB_CHIP_RESET_HW_MASK,
-                                              1));
+       priv->write_reg_noack(priv, RTL8365MB_CHIP_RESET_REG,
+                             FIELD_PREP(RTL8365MB_CHIP_RESET_HW_MASK, 1));
 
        /* Realtek documentation says the chip needs 1 second to reset. Sleep
         * for 100 ms before accessing any registers to prevent ACK timeouts.
         */
        msleep(100);
-       return regmap_read_poll_timeout(smi->map, RTL8365MB_CHIP_RESET_REG, val,
+       return regmap_read_poll_timeout(priv->map, RTL8365MB_CHIP_RESET_REG, val,
                                        !(val & RTL8365MB_CHIP_RESET_HW_MASK),
                                        20000, 1e6);
 }
 
 static int rtl8365mb_setup(struct dsa_switch *ds)
 {
-       struct realtek_smi *smi = ds->priv;
+       struct realtek_priv *priv = ds->priv;
+       struct rtl8365mb_cpu cpu = {0};
+       struct dsa_port *cpu_dp;
        struct rtl8365mb *mb;
        int ret;
        int i;
 
-       mb = smi->chip_data;
+       mb = priv->chip_data;
 
-       ret = rtl8365mb_reset_chip(smi);
+       ret = rtl8365mb_reset_chip(priv);
        if (ret) {
-               dev_err(smi->dev, "failed to reset chip: %d\n", ret);
+               dev_err(priv->dev, "failed to reset chip: %d\n", ret);
                goto out_error;
        }
 
        /* Configure switch to vendor-defined initial state */
-       ret = rtl8365mb_switch_init(smi);
+       ret = rtl8365mb_switch_init(priv);
        if (ret) {
-               dev_err(smi->dev, "failed to initialize switch: %d\n", ret);
+               dev_err(priv->dev, "failed to initialize switch: %d\n", ret);
                goto out_error;
        }
 
        /* Set up cascading IRQs */
-       ret = rtl8365mb_irq_setup(smi);
+       ret = rtl8365mb_irq_setup(priv);
        if (ret == -EPROBE_DEFER)
                return ret;
        else if (ret)
-               dev_info(smi->dev, "no interrupt support\n");
+               dev_info(priv->dev, "no interrupt support\n");
 
        /* Configure CPU tagging */
-       ret = rtl8365mb_cpu_config(smi);
+       cpu.trap_port = RTL8365MB_MAX_NUM_PORTS;
+       dsa_switch_for_each_cpu_port(cpu_dp, priv->ds) {
+               cpu.mask |= BIT(cpu_dp->index);
+
+               if (cpu.trap_port == RTL8365MB_MAX_NUM_PORTS)
+                       cpu.trap_port = cpu_dp->index;
+       }
+
+       cpu.enable = cpu.mask > 0;
+       cpu.insert = RTL8365MB_CPU_INSERT_TO_ALL;
+       cpu.position = RTL8365MB_CPU_POS_AFTER_SA;
+       cpu.rx_length = RTL8365MB_CPU_RXLEN_64BYTES;
+       cpu.format = RTL8365MB_CPU_FORMAT_8BYTES;
+
+       ret = rtl8365mb_cpu_config(priv, &cpu);
        if (ret)
                goto out_teardown_irq;
 
        /* Configure ports */
-       for (i = 0; i < smi->num_ports; i++) {
+       for (i = 0; i < priv->num_ports; i++) {
                struct rtl8365mb_port *p = &mb->ports[i];
 
-               if (dsa_is_unused_port(smi->ds, i))
+               if (dsa_is_unused_port(priv->ds, i))
                        continue;
 
-               /* Set up per-port private data */
-               p->smi = smi;
-               p->index = i;
-
                /* Forward only to the CPU */
-               ret = rtl8365mb_port_set_isolation(smi, i, BIT(smi->cpu_port));
+               ret = rtl8365mb_port_set_isolation(priv, i, cpu.mask);
                if (ret)
                        goto out_teardown_irq;
 
                /* Disable learning */
-               ret = rtl8365mb_port_set_learning(smi, i, false);
+               ret = rtl8365mb_port_set_learning(priv, i, false);
                if (ret)
                        goto out_teardown_irq;
 
@@ -1839,29 +1888,35 @@ static int rtl8365mb_setup(struct dsa_switch *ds)
                 * ports will still forward frames to the CPU despite being
                 * administratively down by default.
                 */
-               rtl8365mb_port_stp_state_set(smi->ds, i, BR_STATE_DISABLED);
+               rtl8365mb_port_stp_state_set(priv->ds, i, BR_STATE_DISABLED);
+
+               /* Set up per-port private data */
+               p->priv = priv;
+               p->index = i;
        }
 
        /* Set maximum packet length to 1536 bytes */
-       ret = regmap_update_bits(smi->map, RTL8365MB_CFG0_MAX_LEN_REG,
+       ret = regmap_update_bits(priv->map, RTL8365MB_CFG0_MAX_LEN_REG,
                                 RTL8365MB_CFG0_MAX_LEN_MASK,
                                 FIELD_PREP(RTL8365MB_CFG0_MAX_LEN_MASK, 1536));
        if (ret)
                goto out_teardown_irq;
 
-       ret = realtek_smi_setup_mdio(smi);
-       if (ret) {
-               dev_err(smi->dev, "could not set up MDIO bus\n");
-               goto out_teardown_irq;
+       if (priv->setup_interface) {
+               ret = priv->setup_interface(ds);
+               if (ret) {
+                       dev_err(priv->dev, "could not set up MDIO bus\n");
+                       goto out_teardown_irq;
+               }
        }
 
        /* Start statistics counter polling */
-       rtl8365mb_stats_setup(smi);
+       rtl8365mb_stats_setup(priv);
 
        return 0;
 
 out_teardown_irq:
-       rtl8365mb_irq_teardown(smi);
+       rtl8365mb_irq_teardown(priv);
 
 out_error:
        return ret;
@@ -1869,10 +1924,10 @@ out_error:
 
 static void rtl8365mb_teardown(struct dsa_switch *ds)
 {
-       struct realtek_smi *smi = ds->priv;
+       struct realtek_priv *priv = ds->priv;
 
-       rtl8365mb_stats_teardown(smi);
-       rtl8365mb_irq_teardown(smi);
+       rtl8365mb_stats_teardown(priv);
+       rtl8365mb_irq_teardown(priv);
 }
 
 static int rtl8365mb_get_chip_id_and_ver(struct regmap *map, u32 *id, u32 *ver)
@@ -1902,48 +1957,57 @@ static int rtl8365mb_get_chip_id_and_ver(struct regmap *map, u32 *id, u32 *ver)
        return 0;
 }
 
-static int rtl8365mb_detect(struct realtek_smi *smi)
+static int rtl8365mb_detect(struct realtek_priv *priv)
 {
-       struct rtl8365mb *mb = smi->chip_data;
+       struct rtl8365mb *mb = priv->chip_data;
        u32 chip_id;
        u32 chip_ver;
        int ret;
 
-       ret = rtl8365mb_get_chip_id_and_ver(smi->map, &chip_id, &chip_ver);
+       ret = rtl8365mb_get_chip_id_and_ver(priv->map, &chip_id, &chip_ver);
        if (ret) {
-               dev_err(smi->dev, "failed to read chip id and version: %d\n",
+               dev_err(priv->dev, "failed to read chip id and version: %d\n",
                        ret);
                return ret;
        }
 
        switch (chip_id) {
        case RTL8365MB_CHIP_ID_8365MB_VC:
-               dev_info(smi->dev,
-                        "found an RTL8365MB-VC switch (ver=0x%04x)\n",
-                        chip_ver);
+               switch (chip_ver) {
+               case RTL8365MB_CHIP_VER_8365MB_VC:
+                       dev_info(priv->dev,
+                                "found an RTL8365MB-VC switch (ver=0x%04x)\n",
+                                chip_ver);
+                       break;
+               case RTL8365MB_CHIP_VER_8367RB:
+                       dev_info(priv->dev,
+                                "found an RTL8367RB-VB switch (ver=0x%04x)\n",
+                                chip_ver);
+                       break;
+               case RTL8365MB_CHIP_VER_8367S:
+                       dev_info(priv->dev,
+                                "found an RTL8367S switch (ver=0x%04x)\n",
+                                chip_ver);
+                       break;
+               default:
+                       dev_err(priv->dev, "unrecognized switch version (ver=0x%04x)",
+                               chip_ver);
+                       return -ENODEV;
+               }
 
-               smi->cpu_port = RTL8365MB_CPU_PORT_NUM_8365MB_VC;
-               smi->num_ports = smi->cpu_port + 1;
+               priv->num_ports = RTL8365MB_MAX_NUM_PORTS;
 
-               mb->smi = smi;
+               mb->priv = priv;
                mb->chip_id = chip_id;
                mb->chip_ver = chip_ver;
-               mb->port_mask = BIT(smi->num_ports) - 1;
-               mb->learn_limit_max = RTL8365MB_LEARN_LIMIT_MAX_8365MB_VC;
+               mb->port_mask = GENMASK(priv->num_ports - 1, 0);
+               mb->learn_limit_max = RTL8365MB_LEARN_LIMIT_MAX;
                mb->jam_table = rtl8365mb_init_jam_8365mb_vc;
                mb->jam_size = ARRAY_SIZE(rtl8365mb_init_jam_8365mb_vc);
 
-               mb->cpu.enable = 1;
-               mb->cpu.mask = BIT(smi->cpu_port);
-               mb->cpu.trap_port = smi->cpu_port;
-               mb->cpu.insert = RTL8365MB_CPU_INSERT_TO_ALL;
-               mb->cpu.position = RTL8365MB_CPU_POS_AFTER_SA;
-               mb->cpu.rx_length = RTL8365MB_CPU_RXLEN_64BYTES;
-               mb->cpu.format = RTL8365MB_CPU_FORMAT_8BYTES;
-
                break;
        default:
-               dev_err(smi->dev,
+               dev_err(priv->dev,
                        "found an unknown Realtek switch (id=0x%04x, ver=0x%04x)\n",
                        chip_id, chip_ver);
                return -ENODEV;
@@ -1952,7 +2016,25 @@ static int rtl8365mb_detect(struct realtek_smi *smi)
        return 0;
 }
 
-static const struct dsa_switch_ops rtl8365mb_switch_ops = {
+static const struct dsa_switch_ops rtl8365mb_switch_ops_smi = {
+       .get_tag_protocol = rtl8365mb_get_tag_protocol,
+       .setup = rtl8365mb_setup,
+       .teardown = rtl8365mb_teardown,
+       .phylink_validate = rtl8365mb_phylink_validate,
+       .phylink_mac_config = rtl8365mb_phylink_mac_config,
+       .phylink_mac_link_down = rtl8365mb_phylink_mac_link_down,
+       .phylink_mac_link_up = rtl8365mb_phylink_mac_link_up,
+       .port_stp_state_set = rtl8365mb_port_stp_state_set,
+       .get_strings = rtl8365mb_get_strings,
+       .get_ethtool_stats = rtl8365mb_get_ethtool_stats,
+       .get_sset_count = rtl8365mb_get_sset_count,
+       .get_eth_phy_stats = rtl8365mb_get_phy_stats,
+       .get_eth_mac_stats = rtl8365mb_get_mac_stats,
+       .get_eth_ctrl_stats = rtl8365mb_get_ctrl_stats,
+       .get_stats64 = rtl8365mb_get_stats64,
+};
+
+static const struct dsa_switch_ops rtl8365mb_switch_ops_mdio = {
        .get_tag_protocol = rtl8365mb_get_tag_protocol,
        .setup = rtl8365mb_setup,
        .teardown = rtl8365mb_teardown,
@@ -1960,6 +2042,8 @@ static const struct dsa_switch_ops rtl8365mb_switch_ops = {
        .phylink_mac_config = rtl8365mb_phylink_mac_config,
        .phylink_mac_link_down = rtl8365mb_phylink_mac_link_down,
        .phylink_mac_link_up = rtl8365mb_phylink_mac_link_up,
+       .phy_read = rtl8365mb_dsa_phy_read,
+       .phy_write = rtl8365mb_dsa_phy_write,
        .port_stp_state_set = rtl8365mb_port_stp_state_set,
        .get_strings = rtl8365mb_get_strings,
        .get_ethtool_stats = rtl8365mb_get_ethtool_stats,
@@ -1970,18 +2054,23 @@ static const struct dsa_switch_ops rtl8365mb_switch_ops = {
        .get_stats64 = rtl8365mb_get_stats64,
 };
 
-static const struct realtek_smi_ops rtl8365mb_smi_ops = {
+static const struct realtek_ops rtl8365mb_ops = {
        .detect = rtl8365mb_detect,
        .phy_read = rtl8365mb_phy_read,
        .phy_write = rtl8365mb_phy_write,
 };
 
-const struct realtek_smi_variant rtl8365mb_variant = {
-       .ds_ops = &rtl8365mb_switch_ops,
-       .ops = &rtl8365mb_smi_ops,
+const struct realtek_variant rtl8365mb_variant = {
+       .ds_ops_smi = &rtl8365mb_switch_ops_smi,
+       .ds_ops_mdio = &rtl8365mb_switch_ops_mdio,
+       .ops = &rtl8365mb_ops,
        .clk_delay = 10,
        .cmd_read = 0xb9,
        .cmd_write = 0xb8,
        .chip_data_sz = sizeof(struct rtl8365mb),
 };
 EXPORT_SYMBOL_GPL(rtl8365mb_variant);
+
+MODULE_AUTHOR("Alvin Å ipraga <alsi@bang-olufsen.dk>");
+MODULE_DESCRIPTION("Driver for RTL8365MB-VC ethernet switch");
+MODULE_LICENSE("GPL");
similarity index 61%
rename from drivers/net/dsa/rtl8366.c
rename to drivers/net/dsa/realtek/rtl8366-core.c
index bdb8d8d..dc5f75b 100644 (file)
 #include <linux/if_bridge.h>
 #include <net/dsa.h>
 
-#include "realtek-smi-core.h"
+#include "realtek.h"
 
-int rtl8366_mc_is_used(struct realtek_smi *smi, int mc_index, int *used)
+int rtl8366_mc_is_used(struct realtek_priv *priv, int mc_index, int *used)
 {
        int ret;
        int i;
 
        *used = 0;
-       for (i = 0; i < smi->num_ports; i++) {
+       for (i = 0; i < priv->num_ports; i++) {
                int index = 0;
 
-               ret = smi->ops->get_mc_index(smi, i, &index);
+               ret = priv->ops->get_mc_index(priv, i, &index);
                if (ret)
                        return ret;
 
@@ -38,13 +38,13 @@ EXPORT_SYMBOL_GPL(rtl8366_mc_is_used);
 
 /**
  * rtl8366_obtain_mc() - retrieve or allocate a VLAN member configuration
- * @smi: the Realtek SMI device instance
+ * @priv: the Realtek SMI device instance
  * @vid: the VLAN ID to look up or allocate
  * @vlanmc: the pointer will be assigned to a pointer to a valid member config
  * if successful
  * @return: index of a new member config or negative error number
  */
-static int rtl8366_obtain_mc(struct realtek_smi *smi, int vid,
+static int rtl8366_obtain_mc(struct realtek_priv *priv, int vid,
                             struct rtl8366_vlan_mc *vlanmc)
 {
        struct rtl8366_vlan_4k vlan4k;
@@ -52,10 +52,10 @@ static int rtl8366_obtain_mc(struct realtek_smi *smi, int vid,
        int i;
 
        /* Try to find an existing member config entry for this VID */
-       for (i = 0; i < smi->num_vlan_mc; i++) {
-               ret = smi->ops->get_vlan_mc(smi, i, vlanmc);
+       for (i = 0; i < priv->num_vlan_mc; i++) {
+               ret = priv->ops->get_vlan_mc(priv, i, vlanmc);
                if (ret) {
-                       dev_err(smi->dev, "error searching for VLAN MC %d for VID %d\n",
+                       dev_err(priv->dev, "error searching for VLAN MC %d for VID %d\n",
                                i, vid);
                        return ret;
                }
@@ -65,19 +65,19 @@ static int rtl8366_obtain_mc(struct realtek_smi *smi, int vid,
        }
 
        /* We have no MC entry for this VID, try to find an empty one */
-       for (i = 0; i < smi->num_vlan_mc; i++) {
-               ret = smi->ops->get_vlan_mc(smi, i, vlanmc);
+       for (i = 0; i < priv->num_vlan_mc; i++) {
+               ret = priv->ops->get_vlan_mc(priv, i, vlanmc);
                if (ret) {
-                       dev_err(smi->dev, "error searching for VLAN MC %d for VID %d\n",
+                       dev_err(priv->dev, "error searching for VLAN MC %d for VID %d\n",
                                i, vid);
                        return ret;
                }
 
                if (vlanmc->vid == 0 && vlanmc->member == 0) {
                        /* Update the entry from the 4K table */
-                       ret = smi->ops->get_vlan_4k(smi, vid, &vlan4k);
+                       ret = priv->ops->get_vlan_4k(priv, vid, &vlan4k);
                        if (ret) {
-                               dev_err(smi->dev, "error looking for 4K VLAN MC %d for VID %d\n",
+                               dev_err(priv->dev, "error looking for 4K VLAN MC %d for VID %d\n",
                                        i, vid);
                                return ret;
                        }
@@ -86,30 +86,30 @@ static int rtl8366_obtain_mc(struct realtek_smi *smi, int vid,
                        vlanmc->member = vlan4k.member;
                        vlanmc->untag = vlan4k.untag;
                        vlanmc->fid = vlan4k.fid;
-                       ret = smi->ops->set_vlan_mc(smi, i, vlanmc);
+                       ret = priv->ops->set_vlan_mc(priv, i, vlanmc);
                        if (ret) {
-                               dev_err(smi->dev, "unable to set/update VLAN MC %d for VID %d\n",
+                               dev_err(priv->dev, "unable to set/update VLAN MC %d for VID %d\n",
                                        i, vid);
                                return ret;
                        }
 
-                       dev_dbg(smi->dev, "created new MC at index %d for VID %d\n",
+                       dev_dbg(priv->dev, "created new MC at index %d for VID %d\n",
                                i, vid);
                        return i;
                }
        }
 
        /* MC table is full, try to find an unused entry and replace it */
-       for (i = 0; i < smi->num_vlan_mc; i++) {
+       for (i = 0; i < priv->num_vlan_mc; i++) {
                int used;
 
-               ret = rtl8366_mc_is_used(smi, i, &used);
+               ret = rtl8366_mc_is_used(priv, i, &used);
                if (ret)
                        return ret;
 
                if (!used) {
                        /* Update the entry from the 4K table */
-                       ret = smi->ops->get_vlan_4k(smi, vid, &vlan4k);
+                       ret = priv->ops->get_vlan_4k(priv, vid, &vlan4k);
                        if (ret)
                                return ret;
 
@@ -117,23 +117,23 @@ static int rtl8366_obtain_mc(struct realtek_smi *smi, int vid,
                        vlanmc->member = vlan4k.member;
                        vlanmc->untag = vlan4k.untag;
                        vlanmc->fid = vlan4k.fid;
-                       ret = smi->ops->set_vlan_mc(smi, i, vlanmc);
+                       ret = priv->ops->set_vlan_mc(priv, i, vlanmc);
                        if (ret) {
-                               dev_err(smi->dev, "unable to set/update VLAN MC %d for VID %d\n",
+                               dev_err(priv->dev, "unable to set/update VLAN MC %d for VID %d\n",
                                        i, vid);
                                return ret;
                        }
-                       dev_dbg(smi->dev, "recycled MC at index %i for VID %d\n",
+                       dev_dbg(priv->dev, "recycled MC at index %i for VID %d\n",
                                i, vid);
                        return i;
                }
        }
 
-       dev_err(smi->dev, "all VLAN member configurations are in use\n");
+       dev_err(priv->dev, "all VLAN member configurations are in use\n");
        return -ENOSPC;
 }
 
-int rtl8366_set_vlan(struct realtek_smi *smi, int vid, u32 member,
+int rtl8366_set_vlan(struct realtek_priv *priv, int vid, u32 member,
                     u32 untag, u32 fid)
 {
        struct rtl8366_vlan_mc vlanmc;
@@ -141,31 +141,31 @@ int rtl8366_set_vlan(struct realtek_smi *smi, int vid, u32 member,
        int mc;
        int ret;
 
-       if (!smi->ops->is_vlan_valid(smi, vid))
+       if (!priv->ops->is_vlan_valid(priv, vid))
                return -EINVAL;
 
-       dev_dbg(smi->dev,
+       dev_dbg(priv->dev,
                "setting VLAN%d 4k members: 0x%02x, untagged: 0x%02x\n",
                vid, member, untag);
 
        /* Update the 4K table */
-       ret = smi->ops->get_vlan_4k(smi, vid, &vlan4k);
+       ret = priv->ops->get_vlan_4k(priv, vid, &vlan4k);
        if (ret)
                return ret;
 
        vlan4k.member |= member;
        vlan4k.untag |= untag;
        vlan4k.fid = fid;
-       ret = smi->ops->set_vlan_4k(smi, &vlan4k);
+       ret = priv->ops->set_vlan_4k(priv, &vlan4k);
        if (ret)
                return ret;
 
-       dev_dbg(smi->dev,
+       dev_dbg(priv->dev,
                "resulting VLAN%d 4k members: 0x%02x, untagged: 0x%02x\n",
                vid, vlan4k.member, vlan4k.untag);
 
        /* Find or allocate a member config for this VID */
-       ret = rtl8366_obtain_mc(smi, vid, &vlanmc);
+       ret = rtl8366_obtain_mc(priv, vid, &vlanmc);
        if (ret < 0)
                return ret;
        mc = ret;
@@ -176,12 +176,12 @@ int rtl8366_set_vlan(struct realtek_smi *smi, int vid, u32 member,
        vlanmc.fid = fid;
 
        /* Commit updates to the MC entry */
-       ret = smi->ops->set_vlan_mc(smi, mc, &vlanmc);
+       ret = priv->ops->set_vlan_mc(priv, mc, &vlanmc);
        if (ret)
-               dev_err(smi->dev, "failed to commit changes to VLAN MC index %d for VID %d\n",
+               dev_err(priv->dev, "failed to commit changes to VLAN MC index %d for VID %d\n",
                        mc, vid);
        else
-               dev_dbg(smi->dev,
+               dev_dbg(priv->dev,
                        "resulting VLAN%d MC members: 0x%02x, untagged: 0x%02x\n",
                        vid, vlanmc.member, vlanmc.untag);
 
@@ -189,37 +189,37 @@ int rtl8366_set_vlan(struct realtek_smi *smi, int vid, u32 member,
 }
 EXPORT_SYMBOL_GPL(rtl8366_set_vlan);
 
-int rtl8366_set_pvid(struct realtek_smi *smi, unsigned int port,
+int rtl8366_set_pvid(struct realtek_priv *priv, unsigned int port,
                     unsigned int vid)
 {
        struct rtl8366_vlan_mc vlanmc;
        int mc;
        int ret;
 
-       if (!smi->ops->is_vlan_valid(smi, vid))
+       if (!priv->ops->is_vlan_valid(priv, vid))
                return -EINVAL;
 
        /* Find or allocate a member config for this VID */
-       ret = rtl8366_obtain_mc(smi, vid, &vlanmc);
+       ret = rtl8366_obtain_mc(priv, vid, &vlanmc);
        if (ret < 0)
                return ret;
        mc = ret;
 
-       ret = smi->ops->set_mc_index(smi, port, mc);
+       ret = priv->ops->set_mc_index(priv, port, mc);
        if (ret) {
-               dev_err(smi->dev, "set PVID: failed to set MC index %d for port %d\n",
+               dev_err(priv->dev, "set PVID: failed to set MC index %d for port %d\n",
                        mc, port);
                return ret;
        }
 
-       dev_dbg(smi->dev, "set PVID: the PVID for port %d set to %d using existing MC index %d\n",
+       dev_dbg(priv->dev, "set PVID: the PVID for port %d set to %d using existing MC index %d\n",
                port, vid, mc);
 
        return 0;
 }
 EXPORT_SYMBOL_GPL(rtl8366_set_pvid);
 
-int rtl8366_enable_vlan4k(struct realtek_smi *smi, bool enable)
+int rtl8366_enable_vlan4k(struct realtek_priv *priv, bool enable)
 {
        int ret;
 
@@ -229,52 +229,52 @@ int rtl8366_enable_vlan4k(struct realtek_smi *smi, bool enable)
         */
        if (enable) {
                /* Make sure VLAN is ON */
-               ret = smi->ops->enable_vlan(smi, true);
+               ret = priv->ops->enable_vlan(priv, true);
                if (ret)
                        return ret;
 
-               smi->vlan_enabled = true;
+               priv->vlan_enabled = true;
        }
 
-       ret = smi->ops->enable_vlan4k(smi, enable);
+       ret = priv->ops->enable_vlan4k(priv, enable);
        if (ret)
                return ret;
 
-       smi->vlan4k_enabled = enable;
+       priv->vlan4k_enabled = enable;
        return 0;
 }
 EXPORT_SYMBOL_GPL(rtl8366_enable_vlan4k);
 
-int rtl8366_enable_vlan(struct realtek_smi *smi, bool enable)
+int rtl8366_enable_vlan(struct realtek_priv *priv, bool enable)
 {
        int ret;
 
-       ret = smi->ops->enable_vlan(smi, enable);
+       ret = priv->ops->enable_vlan(priv, enable);
        if (ret)
                return ret;
 
-       smi->vlan_enabled = enable;
+       priv->vlan_enabled = enable;
 
        /* If we turn VLAN off, make sure that we turn off
         * 4k VLAN as well, if that happened to be on.
         */
        if (!enable) {
-               smi->vlan4k_enabled = false;
-               ret = smi->ops->enable_vlan4k(smi, false);
+               priv->vlan4k_enabled = false;
+               ret = priv->ops->enable_vlan4k(priv, false);
        }
 
        return ret;
 }
 EXPORT_SYMBOL_GPL(rtl8366_enable_vlan);
 
-int rtl8366_reset_vlan(struct realtek_smi *smi)
+int rtl8366_reset_vlan(struct realtek_priv *priv)
 {
        struct rtl8366_vlan_mc vlanmc;
        int ret;
        int i;
 
-       rtl8366_enable_vlan(smi, false);
-       rtl8366_enable_vlan4k(smi, false);
+       rtl8366_enable_vlan(priv, false);
+       rtl8366_enable_vlan4k(priv, false);
 
        /* Clear the 16 VLAN member configurations */
        vlanmc.vid = 0;
@@ -282,8 +282,8 @@ int rtl8366_reset_vlan(struct realtek_smi *smi)
        vlanmc.member = 0;
        vlanmc.untag = 0;
        vlanmc.fid = 0;
-       for (i = 0; i < smi->num_vlan_mc; i++) {
-               ret = smi->ops->set_vlan_mc(smi, i, &vlanmc);
+       for (i = 0; i < priv->num_vlan_mc; i++) {
+               ret = priv->ops->set_vlan_mc(priv, i, &vlanmc);
                if (ret)
                        return ret;
        }
@@ -298,12 +298,12 @@ int rtl8366_vlan_add(struct dsa_switch *ds, int port,
 {
        bool untagged = !!(vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED);
        bool pvid = !!(vlan->flags & BRIDGE_VLAN_INFO_PVID);
-       struct realtek_smi *smi = ds->priv;
+       struct realtek_priv *priv = ds->priv;
        u32 member = 0;
        u32 untag = 0;
        int ret;
 
-       if (!smi->ops->is_vlan_valid(smi, vlan->vid)) {
+       if (!priv->ops->is_vlan_valid(priv, vlan->vid)) {
                NL_SET_ERR_MSG_MOD(extack, "VLAN ID not valid");
                return -EINVAL;
        }
@@ -312,13 +312,13 @@ int rtl8366_vlan_add(struct dsa_switch *ds, int port,
         * FIXME: what's with this 4k business?
         * Just rtl8366_enable_vlan() seems inconclusive.
         */
-       ret = rtl8366_enable_vlan4k(smi, true);
+       ret = rtl8366_enable_vlan4k(priv, true);
        if (ret) {
                NL_SET_ERR_MSG_MOD(extack, "Failed to enable VLAN 4K");
                return ret;
        }
 
-       dev_dbg(smi->dev, "add VLAN %d on port %d, %s, %s\n",
+       dev_dbg(priv->dev, "add VLAN %d on port %d, %s, %s\n",
                vlan->vid, port, untagged ? "untagged" : "tagged",
                pvid ? "PVID" : "no PVID");
 
@@ -327,18 +327,18 @@ int rtl8366_vlan_add(struct dsa_switch *ds, int port,
        if (untagged)
                untag |= BIT(port);
 
-       ret = rtl8366_set_vlan(smi, vlan->vid, member, untag, 0);
+       ret = rtl8366_set_vlan(priv, vlan->vid, member, untag, 0);
        if (ret) {
-               dev_err(smi->dev, "failed to set up VLAN %04x", vlan->vid);
+               dev_err(priv->dev, "failed to set up VLAN %04x", vlan->vid);
                return ret;
        }
 
        if (!pvid)
                return 0;
 
-       ret = rtl8366_set_pvid(smi, port, vlan->vid);
+       ret = rtl8366_set_pvid(priv, port, vlan->vid);
        if (ret) {
-               dev_err(smi->dev, "failed to set PVID on port %d to VLAN %04x",
+               dev_err(priv->dev, "failed to set PVID on port %d to VLAN %04x",
                        port, vlan->vid);
                return ret;
        }
@@ -350,15 +350,15 @@ EXPORT_SYMBOL_GPL(rtl8366_vlan_add);
 int rtl8366_vlan_del(struct dsa_switch *ds, int port,
                     const struct switchdev_obj_port_vlan *vlan)
 {
-       struct realtek_smi *smi = ds->priv;
+       struct realtek_priv *priv = ds->priv;
        int ret, i;
 
-       dev_dbg(smi->dev, "del VLAN %d on port %d\n", vlan->vid, port);
+       dev_dbg(priv->dev, "del VLAN %d on port %d\n", vlan->vid, port);
 
-       for (i = 0; i < smi->num_vlan_mc; i++) {
+       for (i = 0; i < priv->num_vlan_mc; i++) {
                struct rtl8366_vlan_mc vlanmc;
 
-               ret = smi->ops->get_vlan_mc(smi, i, &vlanmc);
+               ret = priv->ops->get_vlan_mc(priv, i, &vlanmc);
                if (ret)
                        return ret;
 
@@ -376,9 +376,9 @@ int rtl8366_vlan_del(struct dsa_switch *ds, int port,
                                vlanmc.priority = 0;
                                vlanmc.fid = 0;
                        }
-                       ret = smi->ops->set_vlan_mc(smi, i, &vlanmc);
+                       ret = priv->ops->set_vlan_mc(priv, i, &vlanmc);
                        if (ret) {
-                               dev_err(smi->dev,
+                               dev_err(priv->dev,
                                        "failed to remove VLAN %04x\n",
                                        vlan->vid);
                                return ret;
@@ -394,15 +394,15 @@ EXPORT_SYMBOL_GPL(rtl8366_vlan_del);
 void rtl8366_get_strings(struct dsa_switch *ds, int port, u32 stringset,
                         uint8_t *data)
 {
-       struct realtek_smi *smi = ds->priv;
+       struct realtek_priv *priv = ds->priv;
        struct rtl8366_mib_counter *mib;
        int i;
 
-       if (port >= smi->num_ports)
+       if (port >= priv->num_ports)
                return;
 
-       for (i = 0; i < smi->num_mib_counters; i++) {
-               mib = &smi->mib_counters[i];
+       for (i = 0; i < priv->num_mib_counters; i++) {
+               mib = &priv->mib_counters[i];
                strncpy(data + i * ETH_GSTRING_LEN,
                        mib->name, ETH_GSTRING_LEN);
        }
@@ -411,35 +411,35 @@ EXPORT_SYMBOL_GPL(rtl8366_get_strings);
 
 int rtl8366_get_sset_count(struct dsa_switch *ds, int port, int sset)
 {
-       struct realtek_smi *smi = ds->priv;
+       struct realtek_priv *priv = ds->priv;
 
        /* We only support SS_STATS */
        if (sset != ETH_SS_STATS)
                return 0;
-       if (port >= smi->num_ports)
+       if (port >= priv->num_ports)
                return -EINVAL;
 
-       return smi->num_mib_counters;
+       return priv->num_mib_counters;
 }
 EXPORT_SYMBOL_GPL(rtl8366_get_sset_count);
 
 void rtl8366_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *data)
 {
-       struct realtek_smi *smi = ds->priv;
+       struct realtek_priv *priv = ds->priv;
        int i;
        int ret;
 
-       if (port >= smi->num_ports)
+       if (port >= priv->num_ports)
                return;
 
-       for (i = 0; i < smi->num_mib_counters; i++) {
+       for (i = 0; i < priv->num_mib_counters; i++) {
                struct rtl8366_mib_counter *mib;
                u64 mibvalue = 0;
 
-               mib = &smi->mib_counters[i];
-               ret = smi->ops->get_mib_counter(smi, port, mib, &mibvalue);
+               mib = &priv->mib_counters[i];
+               ret = priv->ops->get_mib_counter(priv, port, mib, &mibvalue);
                if (ret) {
-                       dev_err(smi->dev, "error reading MIB counter %s\n",
+                       dev_err(priv->dev, "error reading MIB counter %s\n",
                                mib->name);
                }
                data[i] = mibvalue;
similarity index 78%
rename from drivers/net/dsa/rtl8366rb.c
rename to drivers/net/dsa/realtek/rtl8366rb.c
index ecc19bd..fb6565e 100644 (file)
@@ -21,7 +21,7 @@
 #include <linux/of_irq.h>
 #include <linux/regmap.h>
 
-#include "realtek-smi-core.h"
+#include "realtek.h"
 
 #define RTL8366RB_PORT_NUM_CPU         5
 #define RTL8366RB_NUM_PORTS            6
@@ -396,7 +396,7 @@ static struct rtl8366_mib_counter rtl8366rb_mib_counters[] = {
        { 0, 70, 2, "IfOutBroadcastPkts"                        },
 };
 
-static int rtl8366rb_get_mib_counter(struct realtek_smi *smi,
+static int rtl8366rb_get_mib_counter(struct realtek_priv *priv,
                                     int port,
                                     struct rtl8366_mib_counter *mib,
                                     u64 *mibvalue)
@@ -412,12 +412,12 @@ static int rtl8366rb_get_mib_counter(struct realtek_smi *smi,
        /* Writing access counter address first
         * then ASIC will prepare 64bits counter wait for being retrived
         */
-       ret = regmap_write(smi->map, addr, 0); /* Write whatever */
+       ret = regmap_write(priv->map, addr, 0); /* Write whatever */
        if (ret)
                return ret;
 
        /* Read MIB control register */
-       ret = regmap_read(smi->map, RTL8366RB_MIB_CTRL_REG, &val);
+       ret = regmap_read(priv->map, RTL8366RB_MIB_CTRL_REG, &val);
        if (ret)
                return -EIO;
 
@@ -430,7 +430,7 @@ static int rtl8366rb_get_mib_counter(struct realtek_smi *smi,
        /* Read each individual MIB 16 bits at the time */
        *mibvalue = 0;
        for (i = mib->length; i > 0; i--) {
-               ret = regmap_read(smi->map, addr + (i - 1), &val);
+               ret = regmap_read(priv->map, addr + (i - 1), &val);
                if (ret)
                        return ret;
                *mibvalue = (*mibvalue << 16) | (val & 0xFFFF);
@@ -455,38 +455,38 @@ static u32 rtl8366rb_get_irqmask(struct irq_data *d)
 
 static void rtl8366rb_mask_irq(struct irq_data *d)
 {
-       struct realtek_smi *smi = irq_data_get_irq_chip_data(d);
+       struct realtek_priv *priv = irq_data_get_irq_chip_data(d);
        int ret;
 
-       ret = regmap_update_bits(smi->map, RTL8366RB_INTERRUPT_MASK_REG,
+       ret = regmap_update_bits(priv->map, RTL8366RB_INTERRUPT_MASK_REG,
                                 rtl8366rb_get_irqmask(d), 0);
        if (ret)
-               dev_err(smi->dev, "could not mask IRQ\n");
+               dev_err(priv->dev, "could not mask IRQ\n");
 }
 
 static void rtl8366rb_unmask_irq(struct irq_data *d)
 {
-       struct realtek_smi *smi = irq_data_get_irq_chip_data(d);
+       struct realtek_priv *priv = irq_data_get_irq_chip_data(d);
        int ret;
 
-       ret = regmap_update_bits(smi->map, RTL8366RB_INTERRUPT_MASK_REG,
+       ret = regmap_update_bits(priv->map, RTL8366RB_INTERRUPT_MASK_REG,
                                 rtl8366rb_get_irqmask(d),
                                 rtl8366rb_get_irqmask(d));
        if (ret)
-               dev_err(smi->dev, "could not unmask IRQ\n");
+               dev_err(priv->dev, "could not unmask IRQ\n");
 }
 
 static irqreturn_t rtl8366rb_irq(int irq, void *data)
 {
-       struct realtek_smi *smi = data;
+       struct realtek_priv *priv = data;
        u32 stat;
        int ret;
 
        /* This clears the IRQ status register */
-       ret = regmap_read(smi->map, RTL8366RB_INTERRUPT_STATUS_REG,
+       ret = regmap_read(priv->map, RTL8366RB_INTERRUPT_STATUS_REG,
                          &stat);
        if (ret) {
-               dev_err(smi->dev, "can't read interrupt status\n");
+               dev_err(priv->dev, "can't read interrupt status\n");
                return IRQ_NONE;
        }
        stat &= RTL8366RB_INTERRUPT_VALID;
@@ -502,7 +502,7 @@ static irqreturn_t rtl8366rb_irq(int irq, void *data)
                 */
                if (line < 12 && line > 5)
                        line -= 5;
-               child_irq = irq_find_mapping(smi->irqdomain, line);
+               child_irq = irq_find_mapping(priv->irqdomain, line);
                handle_nested_irq(child_irq);
        }
        return IRQ_HANDLED;
@@ -538,7 +538,7 @@ static const struct irq_domain_ops rtl8366rb_irqdomain_ops = {
        .xlate  = irq_domain_xlate_onecell,
 };
 
-static int rtl8366rb_setup_cascaded_irq(struct realtek_smi *smi)
+static int rtl8366rb_setup_cascaded_irq(struct realtek_priv *priv)
 {
        struct device_node *intc;
        unsigned long irq_trig;
@@ -547,24 +547,24 @@ static int rtl8366rb_setup_cascaded_irq(struct realtek_smi *smi)
        u32 val;
        int i;
 
-       intc = of_get_child_by_name(smi->dev->of_node, "interrupt-controller");
+       intc = of_get_child_by_name(priv->dev->of_node, "interrupt-controller");
        if (!intc) {
-               dev_err(smi->dev, "missing child interrupt-controller node\n");
+               dev_err(priv->dev, "missing child interrupt-controller node\n");
                return -EINVAL;
        }
        /* RB8366RB IRQs cascade off this one */
        irq = of_irq_get(intc, 0);
        if (irq <= 0) {
-               dev_err(smi->dev, "failed to get parent IRQ\n");
+               dev_err(priv->dev, "failed to get parent IRQ\n");
                ret = irq ? irq : -EINVAL;
                goto out_put_node;
        }
 
        /* This clears the IRQ status register */
-       ret = regmap_read(smi->map, RTL8366RB_INTERRUPT_STATUS_REG,
+       ret = regmap_read(priv->map, RTL8366RB_INTERRUPT_STATUS_REG,
                          &val);
        if (ret) {
-               dev_err(smi->dev, "can't read interrupt status\n");
+               dev_err(priv->dev, "can't read interrupt status\n");
                goto out_put_node;
        }
 
@@ -573,48 +573,48 @@ static int rtl8366rb_setup_cascaded_irq(struct realtek_smi *smi)
        switch (irq_trig) {
        case IRQF_TRIGGER_RISING:
        case IRQF_TRIGGER_HIGH:
-               dev_info(smi->dev, "active high/rising IRQ\n");
+               dev_info(priv->dev, "active high/rising IRQ\n");
                val = 0;
                break;
        case IRQF_TRIGGER_FALLING:
        case IRQF_TRIGGER_LOW:
-               dev_info(smi->dev, "active low/falling IRQ\n");
+               dev_info(priv->dev, "active low/falling IRQ\n");
                val = RTL8366RB_INTERRUPT_POLARITY;
                break;
        }
-       ret = regmap_update_bits(smi->map, RTL8366RB_INTERRUPT_CONTROL_REG,
+       ret = regmap_update_bits(priv->map, RTL8366RB_INTERRUPT_CONTROL_REG,
                                 RTL8366RB_INTERRUPT_POLARITY,
                                 val);
        if (ret) {
-               dev_err(smi->dev, "could not configure IRQ polarity\n");
+               dev_err(priv->dev, "could not configure IRQ polarity\n");
                goto out_put_node;
        }
 
-       ret = devm_request_threaded_irq(smi->dev, irq, NULL,
+       ret = devm_request_threaded_irq(priv->dev, irq, NULL,
                                        rtl8366rb_irq, IRQF_ONESHOT,
-                                       "RTL8366RB", smi);
+                                       "RTL8366RB", priv);
        if (ret) {
-               dev_err(smi->dev, "unable to request irq: %d\n", ret);
+               dev_err(priv->dev, "unable to request irq: %d\n", ret);
                goto out_put_node;
        }
-       smi->irqdomain = irq_domain_add_linear(intc,
-                                              RTL8366RB_NUM_INTERRUPT,
-                                              &rtl8366rb_irqdomain_ops,
-                                              smi);
-       if (!smi->irqdomain) {
-               dev_err(smi->dev, "failed to create IRQ domain\n");
+       priv->irqdomain = irq_domain_add_linear(intc,
+                                               RTL8366RB_NUM_INTERRUPT,
+                                               &rtl8366rb_irqdomain_ops,
+                                               priv);
+       if (!priv->irqdomain) {
+               dev_err(priv->dev, "failed to create IRQ domain\n");
                ret = -EINVAL;
                goto out_put_node;
        }
-       for (i = 0; i < smi->num_ports; i++)
-               irq_set_parent(irq_create_mapping(smi->irqdomain, i), irq);
+       for (i = 0; i < priv->num_ports; i++)
+               irq_set_parent(irq_create_mapping(priv->irqdomain, i), irq);
 
 out_put_node:
        of_node_put(intc);
        return ret;
 }
 
-static int rtl8366rb_set_addr(struct realtek_smi *smi)
+static int rtl8366rb_set_addr(struct realtek_priv *priv)
 {
        u8 addr[ETH_ALEN];
        u16 val;
@@ -622,18 +622,18 @@ static int rtl8366rb_set_addr(struct realtek_smi *smi)
 
        eth_random_addr(addr);
 
-       dev_info(smi->dev, "set MAC: %02X:%02X:%02X:%02X:%02X:%02X\n",
+       dev_info(priv->dev, "set MAC: %02X:%02X:%02X:%02X:%02X:%02X\n",
                 addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
        val = addr[0] << 8 | addr[1];
-       ret = regmap_write(smi->map, RTL8366RB_SMAR0, val);
+       ret = regmap_write(priv->map, RTL8366RB_SMAR0, val);
        if (ret)
                return ret;
        val = addr[2] << 8 | addr[3];
-       ret = regmap_write(smi->map, RTL8366RB_SMAR1, val);
+       ret = regmap_write(priv->map, RTL8366RB_SMAR1, val);
        if (ret)
                return ret;
        val = addr[4] << 8 | addr[5];
-       ret = regmap_write(smi->map, RTL8366RB_SMAR2, val);
+       ret = regmap_write(priv->map, RTL8366RB_SMAR2, val);
        if (ret)
                return ret;
 
@@ -765,7 +765,7 @@ static const struct rtl8366rb_jam_tbl_entry rtl8366rb_green_jam[] = {
 
 /* Function that jams the tables in the proper registers */
 static int rtl8366rb_jam_table(const struct rtl8366rb_jam_tbl_entry *jam_table,
-                              int jam_size, struct realtek_smi *smi,
+                              int jam_size, struct realtek_priv *priv,
                               bool write_dbg)
 {
        u32 val;
@@ -774,24 +774,24 @@ static int rtl8366rb_jam_table(const struct rtl8366rb_jam_tbl_entry *jam_table,
 
        for (i = 0; i < jam_size; i++) {
                if ((jam_table[i].reg & 0xBE00) == 0xBE00) {
-                       ret = regmap_read(smi->map,
+                       ret = regmap_read(priv->map,
                                          RTL8366RB_PHY_ACCESS_BUSY_REG,
                                          &val);
                        if (ret)
                                return ret;
                        if (!(val & RTL8366RB_PHY_INT_BUSY)) {
-                               ret = regmap_write(smi->map,
-                                               RTL8366RB_PHY_ACCESS_CTRL_REG,
-                                               RTL8366RB_PHY_CTRL_WRITE);
+                               ret = regmap_write(priv->map,
+                                                  RTL8366RB_PHY_ACCESS_CTRL_REG,
+                                                  RTL8366RB_PHY_CTRL_WRITE);
                                if (ret)
                                        return ret;
                        }
                }
                if (write_dbg)
-                       dev_dbg(smi->dev, "jam %04x into register %04x\n",
+                       dev_dbg(priv->dev, "jam %04x into register %04x\n",
                                jam_table[i].val,
                                jam_table[i].reg);
-               ret = regmap_write(smi->map,
+               ret = regmap_write(priv->map,
                                   jam_table[i].reg,
                                   jam_table[i].val);
                if (ret)
@@ -802,7 +802,7 @@ static int rtl8366rb_jam_table(const struct rtl8366rb_jam_tbl_entry *jam_table,
 
 static int rtl8366rb_setup(struct dsa_switch *ds)
 {
-       struct realtek_smi *smi = ds->priv;
+       struct realtek_priv *priv = ds->priv;
        const struct rtl8366rb_jam_tbl_entry *jam_table;
        struct rtl8366rb *rb;
        u32 chip_ver = 0;
@@ -812,11 +812,11 @@ static int rtl8366rb_setup(struct dsa_switch *ds)
        int ret;
        int i;
 
-       rb = smi->chip_data;
+       rb = priv->chip_data;
 
-       ret = regmap_read(smi->map, RTL8366RB_CHIP_ID_REG, &chip_id);
+       ret = regmap_read(priv->map, RTL8366RB_CHIP_ID_REG, &chip_id);
        if (ret) {
-               dev_err(smi->dev, "unable to read chip id\n");
+               dev_err(priv->dev, "unable to read chip id\n");
                return ret;
        }
 
@@ -824,18 +824,18 @@ static int rtl8366rb_setup(struct dsa_switch *ds)
        case RTL8366RB_CHIP_ID_8366:
                break;
        default:
-               dev_err(smi->dev, "unknown chip id (%04x)\n", chip_id);
+               dev_err(priv->dev, "unknown chip id (%04x)\n", chip_id);
                return -ENODEV;
        }
 
-       ret = regmap_read(smi->map, RTL8366RB_CHIP_VERSION_CTRL_REG,
+       ret = regmap_read(priv->map, RTL8366RB_CHIP_VERSION_CTRL_REG,
                          &chip_ver);
        if (ret) {
-               dev_err(smi->dev, "unable to read chip version\n");
+               dev_err(priv->dev, "unable to read chip version\n");
                return ret;
        }
 
-       dev_info(smi->dev, "RTL%04x ver %u chip found\n",
+       dev_info(priv->dev, "RTL%04x ver %u chip found\n",
                 chip_id, chip_ver & RTL8366RB_CHIP_VERSION_MASK);
 
        /* Do the init dance using the right jam table */
@@ -872,20 +872,20 @@ static int rtl8366rb_setup(struct dsa_switch *ds)
                jam_size = ARRAY_SIZE(rtl8366rb_init_jam_dgn3500);
        }
 
-       ret = rtl8366rb_jam_table(jam_table, jam_size, smi, true);
+       ret = rtl8366rb_jam_table(jam_table, jam_size, priv, true);
        if (ret)
                return ret;
 
        /* Isolate all user ports so they can only send packets to itself and the CPU port */
        for (i = 0; i < RTL8366RB_PORT_NUM_CPU; i++) {
-               ret = regmap_write(smi->map, RTL8366RB_PORT_ISO(i),
+               ret = regmap_write(priv->map, RTL8366RB_PORT_ISO(i),
                                   RTL8366RB_PORT_ISO_PORTS(BIT(RTL8366RB_PORT_NUM_CPU)) |
                                   RTL8366RB_PORT_ISO_EN);
                if (ret)
                        return ret;
        }
        /* CPU port can send packets to all ports */
-       ret = regmap_write(smi->map, RTL8366RB_PORT_ISO(RTL8366RB_PORT_NUM_CPU),
+       ret = regmap_write(priv->map, RTL8366RB_PORT_ISO(RTL8366RB_PORT_NUM_CPU),
                           RTL8366RB_PORT_ISO_PORTS(dsa_user_ports(ds)) |
                           RTL8366RB_PORT_ISO_EN);
        if (ret)
@@ -893,26 +893,26 @@ static int rtl8366rb_setup(struct dsa_switch *ds)
 
        /* Set up the "green ethernet" feature */
        ret = rtl8366rb_jam_table(rtl8366rb_green_jam,
-                                 ARRAY_SIZE(rtl8366rb_green_jam), smi, false);
+                                 ARRAY_SIZE(rtl8366rb_green_jam), priv, false);
        if (ret)
                return ret;
 
-       ret = regmap_write(smi->map,
+       ret = regmap_write(priv->map,
                           RTL8366RB_GREEN_FEATURE_REG,
                           (chip_ver == 1) ? 0x0007 : 0x0003);
        if (ret)
                return ret;
 
        /* Vendor driver sets 0x240 in registers 0xc and 0xd (undocumented) */
-       ret = regmap_write(smi->map, 0x0c, 0x240);
+       ret = regmap_write(priv->map, 0x0c, 0x240);
        if (ret)
                return ret;
-       ret = regmap_write(smi->map, 0x0d, 0x240);
+       ret = regmap_write(priv->map, 0x0d, 0x240);
        if (ret)
                return ret;
 
        /* Set some random MAC address */
-       ret = rtl8366rb_set_addr(smi);
+       ret = rtl8366rb_set_addr(priv);
        if (ret)
                return ret;
 
@@ -921,21 +921,21 @@ static int rtl8366rb_setup(struct dsa_switch *ds)
         * If you set RTL8368RB_CPU_NO_TAG (bit 15) in this registers
         * the custom tag is turned off.
         */
-       ret = regmap_update_bits(smi->map, RTL8368RB_CPU_CTRL_REG,
+       ret = regmap_update_bits(priv->map, RTL8368RB_CPU_CTRL_REG,
                                 0xFFFF,
-                                BIT(smi->cpu_port));
+                                BIT(priv->cpu_port));
        if (ret)
                return ret;
 
        /* Make sure we default-enable the fixed CPU port */
-       ret = regmap_update_bits(smi->map, RTL8366RB_PECR,
-                                BIT(smi->cpu_port),
+       ret = regmap_update_bits(priv->map, RTL8366RB_PECR,
+                                BIT(priv->cpu_port),
                                 0);
        if (ret)
                return ret;
 
        /* Set maximum packet length to 1536 bytes */
-       ret = regmap_update_bits(smi->map, RTL8366RB_SGCR,
+       ret = regmap_update_bits(priv->map, RTL8366RB_SGCR,
                                 RTL8366RB_SGCR_MAX_LENGTH_MASK,
                                 RTL8366RB_SGCR_MAX_LENGTH_1536);
        if (ret)
@@ -945,13 +945,13 @@ static int rtl8366rb_setup(struct dsa_switch *ds)
                rb->max_mtu[i] = 1532;
 
        /* Disable learning for all ports */
-       ret = regmap_write(smi->map, RTL8366RB_PORT_LEARNDIS_CTRL,
+       ret = regmap_write(priv->map, RTL8366RB_PORT_LEARNDIS_CTRL,
                           RTL8366RB_PORT_ALL);
        if (ret)
                return ret;
 
        /* Enable auto ageing for all ports */
-       ret = regmap_write(smi->map, RTL8366RB_SECURITY_CTRL, 0);
+       ret = regmap_write(priv->map, RTL8366RB_SECURITY_CTRL, 0);
        if (ret)
                return ret;
 
@@ -962,30 +962,30 @@ static int rtl8366rb_setup(struct dsa_switch *ds)
         * connected to something exotic such as fiber, then this might
         * be worth experimenting with.
         */
-       ret = regmap_update_bits(smi->map, RTL8366RB_PMC0,
+       ret = regmap_update_bits(priv->map, RTL8366RB_PMC0,
                                 RTL8366RB_PMC0_P4_IOMODE_MASK,
                                 0 << RTL8366RB_PMC0_P4_IOMODE_SHIFT);
        if (ret)
                return ret;
 
        /* Accept all packets by default, we enable filtering on-demand */
-       ret = regmap_write(smi->map, RTL8366RB_VLAN_INGRESS_CTRL1_REG,
+       ret = regmap_write(priv->map, RTL8366RB_VLAN_INGRESS_CTRL1_REG,
                           0);
        if (ret)
                return ret;
-       ret = regmap_write(smi->map, RTL8366RB_VLAN_INGRESS_CTRL2_REG,
+       ret = regmap_write(priv->map, RTL8366RB_VLAN_INGRESS_CTRL2_REG,
                           0);
        if (ret)
                return ret;
 
        /* Don't drop packets whose DA has not been learned */
-       ret = regmap_update_bits(smi->map, RTL8366RB_SSCR2,
+       ret = regmap_update_bits(priv->map, RTL8366RB_SSCR2,
                                 RTL8366RB_SSCR2_DROP_UNKNOWN_DA, 0);
        if (ret)
                return ret;
 
        /* Set blinking, TODO: make this configurable */
-       ret = regmap_update_bits(smi->map, RTL8366RB_LED_BLINKRATE_REG,
+       ret = regmap_update_bits(priv->map, RTL8366RB_LED_BLINKRATE_REG,
                                 RTL8366RB_LED_BLINKRATE_MASK,
                                 RTL8366RB_LED_BLINKRATE_56MS);
        if (ret)
@@ -996,15 +996,15 @@ static int rtl8366rb_setup(struct dsa_switch *ds)
         * behaviour (no individual config) but we can set up each
         * LED separately.
         */
-       if (smi->leds_disabled) {
+       if (priv->leds_disabled) {
                /* Turn everything off */
-               regmap_update_bits(smi->map,
+               regmap_update_bits(priv->map,
                                   RTL8366RB_LED_0_1_CTRL_REG,
                                   0x0FFF, 0);
-               regmap_update_bits(smi->map,
+               regmap_update_bits(priv->map,
                                   RTL8366RB_LED_2_3_CTRL_REG,
                                   0x0FFF, 0);
-               regmap_update_bits(smi->map,
+               regmap_update_bits(priv->map,
                                   RTL8366RB_INTERRUPT_CONTROL_REG,
                                   RTL8366RB_P4_RGMII_LED,
                                   0);
@@ -1014,7 +1014,7 @@ static int rtl8366rb_setup(struct dsa_switch *ds)
                val = RTL8366RB_LED_FORCE;
        }
        for (i = 0; i < 4; i++) {
-               ret = regmap_update_bits(smi->map,
+               ret = regmap_update_bits(priv->map,
                                         RTL8366RB_LED_CTRL_REG,
                                         0xf << (i * 4),
                                         val << (i * 4));
@@ -1022,18 +1022,20 @@ static int rtl8366rb_setup(struct dsa_switch *ds)
                        return ret;
        }
 
-       ret = rtl8366_reset_vlan(smi);
+       ret = rtl8366_reset_vlan(priv);
        if (ret)
                return ret;
 
-       ret = rtl8366rb_setup_cascaded_irq(smi);
+       ret = rtl8366rb_setup_cascaded_irq(priv);
        if (ret)
-               dev_info(smi->dev, "no interrupt support\n");
+               dev_info(priv->dev, "no interrupt support\n");
 
-       ret = realtek_smi_setup_mdio(smi);
-       if (ret) {
-               dev_info(smi->dev, "could not set up MDIO bus\n");
-               return -ENODEV;
+       if (priv->setup_interface) {
+               ret = priv->setup_interface(ds);
+               if (ret) {
+                       dev_err(priv->dev, "could not set up MDIO bus\n");
+                       return -ENODEV;
+               }
        }
 
        return 0;
@@ -1052,35 +1054,35 @@ rtl8366rb_mac_link_up(struct dsa_switch *ds, int port, unsigned int mode,
                      phy_interface_t interface, struct phy_device *phydev,
                      int speed, int duplex, bool tx_pause, bool rx_pause)
 {
-       struct realtek_smi *smi = ds->priv;
+       struct realtek_priv *priv = ds->priv;
        int ret;
 
-       if (port != smi->cpu_port)
+       if (port != priv->cpu_port)
                return;
 
-       dev_dbg(smi->dev, "MAC link up on CPU port (%d)\n", port);
+       dev_dbg(priv->dev, "MAC link up on CPU port (%d)\n", port);
 
        /* Force the fixed CPU port into 1Gbit mode, no autonegotiation */
-       ret = regmap_update_bits(smi->map, RTL8366RB_MAC_FORCE_CTRL_REG,
+       ret = regmap_update_bits(priv->map, RTL8366RB_MAC_FORCE_CTRL_REG,
                                 BIT(port), BIT(port));
        if (ret) {
-               dev_err(smi->dev, "failed to force 1Gbit on CPU port\n");
+               dev_err(priv->dev, "failed to force 1Gbit on CPU port\n");
                return;
        }
 
-       ret = regmap_update_bits(smi->map, RTL8366RB_PAACR2,
+       ret = regmap_update_bits(priv->map, RTL8366RB_PAACR2,
                                 0xFF00U,
                                 RTL8366RB_PAACR_CPU_PORT << 8);
        if (ret) {
-               dev_err(smi->dev, "failed to set PAACR on CPU port\n");
+               dev_err(priv->dev, "failed to set PAACR on CPU port\n");
                return;
        }
 
        /* Enable the CPU port */
-       ret = regmap_update_bits(smi->map, RTL8366RB_PECR, BIT(port),
+       ret = regmap_update_bits(priv->map, RTL8366RB_PECR, BIT(port),
                                 0);
        if (ret) {
-               dev_err(smi->dev, "failed to enable the CPU port\n");
+               dev_err(priv->dev, "failed to enable the CPU port\n");
                return;
        }
 }
@@ -1089,99 +1091,99 @@ static void
 rtl8366rb_mac_link_down(struct dsa_switch *ds, int port, unsigned int mode,
                        phy_interface_t interface)
 {
-       struct realtek_smi *smi = ds->priv;
+       struct realtek_priv *priv = ds->priv;
        int ret;
 
-       if (port != smi->cpu_port)
+       if (port != priv->cpu_port)
                return;
 
-       dev_dbg(smi->dev, "MAC link down on CPU port (%d)\n", port);
+       dev_dbg(priv->dev, "MAC link down on CPU port (%d)\n", port);
 
        /* Disable the CPU port */
-       ret = regmap_update_bits(smi->map, RTL8366RB_PECR, BIT(port),
+       ret = regmap_update_bits(priv->map, RTL8366RB_PECR, BIT(port),
                                 BIT(port));
        if (ret) {
-               dev_err(smi->dev, "failed to disable the CPU port\n");
+               dev_err(priv->dev, "failed to disable the CPU port\n");
                return;
        }
 }
 
-static void rb8366rb_set_port_led(struct realtek_smi *smi,
+static void rb8366rb_set_port_led(struct realtek_priv *priv,
                                  int port, bool enable)
 {
        u16 val = enable ? 0x3f : 0;
        int ret;
 
-       if (smi->leds_disabled)
+       if (priv->leds_disabled)
                return;
 
        switch (port) {
        case 0:
-               ret = regmap_update_bits(smi->map,
+               ret = regmap_update_bits(priv->map,
                                         RTL8366RB_LED_0_1_CTRL_REG,
                                         0x3F, val);
                break;
        case 1:
-               ret = regmap_update_bits(smi->map,
+               ret = regmap_update_bits(priv->map,
                                         RTL8366RB_LED_0_1_CTRL_REG,
                                         0x3F << RTL8366RB_LED_1_OFFSET,
                                         val << RTL8366RB_LED_1_OFFSET);
                break;
        case 2:
-               ret = regmap_update_bits(smi->map,
+               ret = regmap_update_bits(priv->map,
                                         RTL8366RB_LED_2_3_CTRL_REG,
                                         0x3F, val);
                break;
        case 3:
-               ret = regmap_update_bits(smi->map,
+               ret = regmap_update_bits(priv->map,
                                         RTL8366RB_LED_2_3_CTRL_REG,
                                         0x3F << RTL8366RB_LED_3_OFFSET,
                                         val << RTL8366RB_LED_3_OFFSET);
                break;
        case 4:
-               ret = regmap_update_bits(smi->map,
+               ret = regmap_update_bits(priv->map,
                                         RTL8366RB_INTERRUPT_CONTROL_REG,
                                         RTL8366RB_P4_RGMII_LED,
                                         enable ? RTL8366RB_P4_RGMII_LED : 0);
                break;
        default:
-               dev_err(smi->dev, "no LED for port %d\n", port);
+               dev_err(priv->dev, "no LED for port %d\n", port);
                return;
        }
        if (ret)
-               dev_err(smi->dev, "error updating LED on port %d\n", port);
+               dev_err(priv->dev, "error updating LED on port %d\n", port);
 }
 
 static int
 rtl8366rb_port_enable(struct dsa_switch *ds, int port,
                      struct phy_device *phy)
 {
-       struct realtek_smi *smi = ds->priv;
+       struct realtek_priv *priv = ds->priv;
        int ret;
 
-       dev_dbg(smi->dev, "enable port %d\n", port);
-       ret = regmap_update_bits(smi->map, RTL8366RB_PECR, BIT(port),
+       dev_dbg(priv->dev, "enable port %d\n", port);
+       ret = regmap_update_bits(priv->map, RTL8366RB_PECR, BIT(port),
                                 0);
        if (ret)
                return ret;
 
-       rb8366rb_set_port_led(smi, port, true);
+       rb8366rb_set_port_led(priv, port, true);
        return 0;
 }
 
 static void
 rtl8366rb_port_disable(struct dsa_switch *ds, int port)
 {
-       struct realtek_smi *smi = ds->priv;
+       struct realtek_priv *priv = ds->priv;
        int ret;
 
-       dev_dbg(smi->dev, "disable port %d\n", port);
-       ret = regmap_update_bits(smi->map, RTL8366RB_PECR, BIT(port),
+       dev_dbg(priv->dev, "disable port %d\n", port);
+       ret = regmap_update_bits(priv->map, RTL8366RB_PECR, BIT(port),
                                 BIT(port));
        if (ret)
                return;
 
-       rb8366rb_set_port_led(smi, port, false);
+       rb8366rb_set_port_led(priv, port, false);
 }
 
 static int
@@ -1189,7 +1191,7 @@ rtl8366rb_port_bridge_join(struct dsa_switch *ds, int port,
                           struct dsa_bridge bridge,
                           bool *tx_fwd_offload)
 {
-       struct realtek_smi *smi = ds->priv;
+       struct realtek_priv *priv = ds->priv;
        unsigned int port_bitmap = 0;
        int ret, i;
 
@@ -1202,17 +1204,17 @@ rtl8366rb_port_bridge_join(struct dsa_switch *ds, int port,
                if (!dsa_port_offloads_bridge(dsa_to_port(ds, i), &bridge))
                        continue;
                /* Join this port to each other port on the bridge */
-               ret = regmap_update_bits(smi->map, RTL8366RB_PORT_ISO(i),
+               ret = regmap_update_bits(priv->map, RTL8366RB_PORT_ISO(i),
                                         RTL8366RB_PORT_ISO_PORTS(BIT(port)),
                                         RTL8366RB_PORT_ISO_PORTS(BIT(port)));
                if (ret)
-                       dev_err(smi->dev, "failed to join port %d\n", port);
+                       dev_err(priv->dev, "failed to join port %d\n", port);
 
                port_bitmap |= BIT(i);
        }
 
        /* Set the bits for the ports we can access */
-       return regmap_update_bits(smi->map, RTL8366RB_PORT_ISO(port),
+       return regmap_update_bits(priv->map, RTL8366RB_PORT_ISO(port),
                                  RTL8366RB_PORT_ISO_PORTS(port_bitmap),
                                  RTL8366RB_PORT_ISO_PORTS(port_bitmap));
 }
@@ -1221,7 +1223,7 @@ static void
 rtl8366rb_port_bridge_leave(struct dsa_switch *ds, int port,
                            struct dsa_bridge bridge)
 {
-       struct realtek_smi *smi = ds->priv;
+       struct realtek_priv *priv = ds->priv;
        unsigned int port_bitmap = 0;
        int ret, i;
 
@@ -1234,28 +1236,30 @@ rtl8366rb_port_bridge_leave(struct dsa_switch *ds, int port,
                if (!dsa_port_offloads_bridge(dsa_to_port(ds, i), &bridge))
                        continue;
                /* Remove this port from any other port on the bridge */
-               ret = regmap_update_bits(smi->map, RTL8366RB_PORT_ISO(i),
+               ret = regmap_update_bits(priv->map, RTL8366RB_PORT_ISO(i),
                                         RTL8366RB_PORT_ISO_PORTS(BIT(port)), 0);
                if (ret)
-                       dev_err(smi->dev, "failed to leave port %d\n", port);
+                       dev_err(priv->dev, "failed to leave port %d\n", port);
 
                port_bitmap |= BIT(i);
        }
 
        /* Clear the bits for the ports we can not access, leave ourselves */
-       regmap_update_bits(smi->map, RTL8366RB_PORT_ISO(port),
+       regmap_update_bits(priv->map, RTL8366RB_PORT_ISO(port),
                           RTL8366RB_PORT_ISO_PORTS(port_bitmap), 0);
 }
 
 /**
  * rtl8366rb_drop_untagged() - make the switch drop untagged and C-tagged frames
- * @smi: SMI state container
+ * @priv: SMI state container
  * @port: the port to drop untagged and C-tagged frames on
  * @drop: whether to drop or pass untagged and C-tagged frames
+ *
+ * Return: zero for success, a negative number on error.
  */
-static int rtl8366rb_drop_untagged(struct realtek_smi *smi, int port, bool drop)
+static int rtl8366rb_drop_untagged(struct realtek_priv *priv, int port, bool drop)
 {
-       return regmap_update_bits(smi->map, RTL8366RB_VLAN_INGRESS_CTRL1_REG,
+       return regmap_update_bits(priv->map, RTL8366RB_VLAN_INGRESS_CTRL1_REG,
                                  RTL8366RB_VLAN_INGRESS_CTRL1_DROP(port),
                                  drop ? RTL8366RB_VLAN_INGRESS_CTRL1_DROP(port) : 0);
 }
@@ -1264,17 +1268,17 @@ static int rtl8366rb_vlan_filtering(struct dsa_switch *ds, int port,
                                    bool vlan_filtering,
                                    struct netlink_ext_ack *extack)
 {
-       struct realtek_smi *smi = ds->priv;
+       struct realtek_priv *priv = ds->priv;
        struct rtl8366rb *rb;
        int ret;
 
-       rb = smi->chip_data;
+       rb = priv->chip_data;
 
-       dev_dbg(smi->dev, "port %d: %s VLAN filtering\n", port,
+       dev_dbg(priv->dev, "port %d: %s VLAN filtering\n", port,
                vlan_filtering ? "enable" : "disable");
 
        /* If the port is not in the member set, the frame will be dropped */
-       ret = regmap_update_bits(smi->map, RTL8366RB_VLAN_INGRESS_CTRL2_REG,
+       ret = regmap_update_bits(priv->map, RTL8366RB_VLAN_INGRESS_CTRL2_REG,
                                 BIT(port), vlan_filtering ? BIT(port) : 0);
        if (ret)
                return ret;
@@ -1284,9 +1288,9 @@ static int rtl8366rb_vlan_filtering(struct dsa_switch *ds, int port,
         * filtering on a port, we need to accept any frames.
         */
        if (vlan_filtering)
-               ret = rtl8366rb_drop_untagged(smi, port, !rb->pvid_enabled[port]);
+               ret = rtl8366rb_drop_untagged(priv, port, !rb->pvid_enabled[port]);
        else
-               ret = rtl8366rb_drop_untagged(smi, port, false);
+               ret = rtl8366rb_drop_untagged(priv, port, false);
 
        return ret;
 }
@@ -1308,11 +1312,11 @@ rtl8366rb_port_bridge_flags(struct dsa_switch *ds, int port,
                            struct switchdev_brport_flags flags,
                            struct netlink_ext_ack *extack)
 {
-       struct realtek_smi *smi = ds->priv;
+       struct realtek_priv *priv = ds->priv;
        int ret;
 
        if (flags.mask & BR_LEARNING) {
-               ret = regmap_update_bits(smi->map, RTL8366RB_PORT_LEARNDIS_CTRL,
+               ret = regmap_update_bits(priv->map, RTL8366RB_PORT_LEARNDIS_CTRL,
                                         BIT(port),
                                         (flags.val & BR_LEARNING) ? 0 : BIT(port));
                if (ret)
@@ -1325,7 +1329,7 @@ rtl8366rb_port_bridge_flags(struct dsa_switch *ds, int port,
 static void
 rtl8366rb_port_stp_state_set(struct dsa_switch *ds, int port, u8 state)
 {
-       struct realtek_smi *smi = ds->priv;
+       struct realtek_priv *priv = ds->priv;
        u32 val;
        int i;
 
@@ -1344,13 +1348,13 @@ rtl8366rb_port_stp_state_set(struct dsa_switch *ds, int port, u8 state)
                val = RTL8366RB_STP_STATE_FORWARDING;
                break;
        default:
-               dev_err(smi->dev, "unknown bridge state requested\n");
+               dev_err(priv->dev, "unknown bridge state requested\n");
                return;
        }
 
        /* Set the same status for the port on all the FIDs */
        for (i = 0; i < RTL8366RB_NUM_FIDS; i++) {
-               regmap_update_bits(smi->map, RTL8366RB_STP_STATE_BASE + i,
+               regmap_update_bits(priv->map, RTL8366RB_STP_STATE_BASE + i,
                                   RTL8366RB_STP_STATE_MASK(port),
                                   RTL8366RB_STP_STATE(port, val));
        }
@@ -1359,26 +1363,26 @@ rtl8366rb_port_stp_state_set(struct dsa_switch *ds, int port, u8 state)
 static void
 rtl8366rb_port_fast_age(struct dsa_switch *ds, int port)
 {
-       struct realtek_smi *smi = ds->priv;
+       struct realtek_priv *priv = ds->priv;
 
        /* This will age out any learned L2 entries */
-       regmap_update_bits(smi->map, RTL8366RB_SECURITY_CTRL,
+       regmap_update_bits(priv->map, RTL8366RB_SECURITY_CTRL,
                           BIT(port), BIT(port));
        /* Restore the normal state of things */
-       regmap_update_bits(smi->map, RTL8366RB_SECURITY_CTRL,
+       regmap_update_bits(priv->map, RTL8366RB_SECURITY_CTRL,
                           BIT(port), 0);
 }
 
 static int rtl8366rb_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
 {
-       struct realtek_smi *smi = ds->priv;
+       struct realtek_priv *priv = ds->priv;
        struct rtl8366rb *rb;
        unsigned int max_mtu;
        u32 len;
        int i;
 
        /* Cache the per-port MTU setting */
-       rb = smi->chip_data;
+       rb = priv->chip_data;
        rb->max_mtu[port] = new_mtu;
 
        /* Roof out the MTU for the entire switch to the greatest
@@ -1406,7 +1410,7 @@ static int rtl8366rb_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
        else
                len = RTL8366RB_SGCR_MAX_LENGTH_16000;
 
-       return regmap_update_bits(smi->map, RTL8366RB_SGCR,
+       return regmap_update_bits(priv->map, RTL8366RB_SGCR,
                                  RTL8366RB_SGCR_MAX_LENGTH_MASK,
                                  len);
 }
@@ -1419,7 +1423,7 @@ static int rtl8366rb_max_mtu(struct dsa_switch *ds, int port)
        return 15996;
 }
 
-static int rtl8366rb_get_vlan_4k(struct realtek_smi *smi, u32 vid,
+static int rtl8366rb_get_vlan_4k(struct realtek_priv *priv, u32 vid,
                                 struct rtl8366_vlan_4k *vlan4k)
 {
        u32 data[3];
@@ -1432,19 +1436,19 @@ static int rtl8366rb_get_vlan_4k(struct realtek_smi *smi, u32 vid,
                return -EINVAL;
 
        /* write VID */
-       ret = regmap_write(smi->map, RTL8366RB_VLAN_TABLE_WRITE_BASE,
+       ret = regmap_write(priv->map, RTL8366RB_VLAN_TABLE_WRITE_BASE,
                           vid & RTL8366RB_VLAN_VID_MASK);
        if (ret)
                return ret;
 
        /* write table access control word */
-       ret = regmap_write(smi->map, RTL8366RB_TABLE_ACCESS_CTRL_REG,
+       ret = regmap_write(priv->map, RTL8366RB_TABLE_ACCESS_CTRL_REG,
                           RTL8366RB_TABLE_VLAN_READ_CTRL);
        if (ret)
                return ret;
 
        for (i = 0; i < 3; i++) {
-               ret = regmap_read(smi->map,
+               ret = regmap_read(priv->map,
                                  RTL8366RB_VLAN_TABLE_READ_BASE + i,
                                  &data[i]);
                if (ret)
@@ -1460,7 +1464,7 @@ static int rtl8366rb_get_vlan_4k(struct realtek_smi *smi, u32 vid,
        return 0;
 }
 
-static int rtl8366rb_set_vlan_4k(struct realtek_smi *smi,
+static int rtl8366rb_set_vlan_4k(struct realtek_priv *priv,
                                 const struct rtl8366_vlan_4k *vlan4k)
 {
        u32 data[3];
@@ -1480,7 +1484,7 @@ static int rtl8366rb_set_vlan_4k(struct realtek_smi *smi,
        data[2] = vlan4k->fid & RTL8366RB_VLAN_FID_MASK;
 
        for (i = 0; i < 3; i++) {
-               ret = regmap_write(smi->map,
+               ret = regmap_write(priv->map,
                                   RTL8366RB_VLAN_TABLE_WRITE_BASE + i,
                                   data[i]);
                if (ret)
@@ -1488,13 +1492,13 @@ static int rtl8366rb_set_vlan_4k(struct realtek_smi *smi,
        }
 
        /* write table access control word */
-       ret = regmap_write(smi->map, RTL8366RB_TABLE_ACCESS_CTRL_REG,
+       ret = regmap_write(priv->map, RTL8366RB_TABLE_ACCESS_CTRL_REG,
                           RTL8366RB_TABLE_VLAN_WRITE_CTRL);
 
        return ret;
 }
 
-static int rtl8366rb_get_vlan_mc(struct realtek_smi *smi, u32 index,
+static int rtl8366rb_get_vlan_mc(struct realtek_priv *priv, u32 index,
                                 struct rtl8366_vlan_mc *vlanmc)
 {
        u32 data[3];
@@ -1507,7 +1511,7 @@ static int rtl8366rb_get_vlan_mc(struct realtek_smi *smi, u32 index,
                return -EINVAL;
 
        for (i = 0; i < 3; i++) {
-               ret = regmap_read(smi->map,
+               ret = regmap_read(priv->map,
                                  RTL8366RB_VLAN_MC_BASE(index) + i,
                                  &data[i]);
                if (ret)
@@ -1525,7 +1529,7 @@ static int rtl8366rb_get_vlan_mc(struct realtek_smi *smi, u32 index,
        return 0;
 }
 
-static int rtl8366rb_set_vlan_mc(struct realtek_smi *smi, u32 index,
+static int rtl8366rb_set_vlan_mc(struct realtek_priv *priv, u32 index,
                                 const struct rtl8366_vlan_mc *vlanmc)
 {
        u32 data[3];
@@ -1549,7 +1553,7 @@ static int rtl8366rb_set_vlan_mc(struct realtek_smi *smi, u32 index,
        data[2] = vlanmc->fid & RTL8366RB_VLAN_FID_MASK;
 
        for (i = 0; i < 3; i++) {
-               ret = regmap_write(smi->map,
+               ret = regmap_write(priv->map,
                                   RTL8366RB_VLAN_MC_BASE(index) + i,
                                   data[i]);
                if (ret)
@@ -1559,15 +1563,15 @@ static int rtl8366rb_set_vlan_mc(struct realtek_smi *smi, u32 index,
        return 0;
 }
 
-static int rtl8366rb_get_mc_index(struct realtek_smi *smi, int port, int *val)
+static int rtl8366rb_get_mc_index(struct realtek_priv *priv, int port, int *val)
 {
        u32 data;
        int ret;
 
-       if (port >= smi->num_ports)
+       if (port >= priv->num_ports)
                return -EINVAL;
 
-       ret = regmap_read(smi->map, RTL8366RB_PORT_VLAN_CTRL_REG(port),
+       ret = regmap_read(priv->map, RTL8366RB_PORT_VLAN_CTRL_REG(port),
                          &data);
        if (ret)
                return ret;
@@ -1578,22 +1582,22 @@ static int rtl8366rb_get_mc_index(struct realtek_smi *smi, int port, int *val)
        return 0;
 }
 
-static int rtl8366rb_set_mc_index(struct realtek_smi *smi, int port, int index)
+static int rtl8366rb_set_mc_index(struct realtek_priv *priv, int port, int index)
 {
        struct rtl8366rb *rb;
        bool pvid_enabled;
        int ret;
 
-       rb = smi->chip_data;
+       rb = priv->chip_data;
        pvid_enabled = !!index;
 
-       if (port >= smi->num_ports || index >= RTL8366RB_NUM_VLANS)
+       if (port >= priv->num_ports || index >= RTL8366RB_NUM_VLANS)
                return -EINVAL;
 
-       ret = regmap_update_bits(smi->map, RTL8366RB_PORT_VLAN_CTRL_REG(port),
-                               RTL8366RB_PORT_VLAN_CTRL_MASK <<
+       ret = regmap_update_bits(priv->map, RTL8366RB_PORT_VLAN_CTRL_REG(port),
+                                RTL8366RB_PORT_VLAN_CTRL_MASK <<
                                        RTL8366RB_PORT_VLAN_CTRL_SHIFT(port),
-                               (index & RTL8366RB_PORT_VLAN_CTRL_MASK) <<
+                                (index & RTL8366RB_PORT_VLAN_CTRL_MASK) <<
                                        RTL8366RB_PORT_VLAN_CTRL_SHIFT(port));
        if (ret)
                return ret;
@@ -1604,17 +1608,17 @@ static int rtl8366rb_set_mc_index(struct realtek_smi *smi, int port, int index)
         * not drop any untagged or C-tagged frames. Make sure to update the
         * filtering setting.
         */
-       if (dsa_port_is_vlan_filtering(dsa_to_port(smi->ds, port)))
-               ret = rtl8366rb_drop_untagged(smi, port, !pvid_enabled);
+       if (dsa_port_is_vlan_filtering(dsa_to_port(priv->ds, port)))
+               ret = rtl8366rb_drop_untagged(priv, port, !pvid_enabled);
 
        return ret;
 }
 
-static bool rtl8366rb_is_vlan_valid(struct realtek_smi *smi, unsigned int vlan)
+static bool rtl8366rb_is_vlan_valid(struct realtek_priv *priv, unsigned int vlan)
 {
        unsigned int max = RTL8366RB_NUM_VLANS - 1;
 
-       if (smi->vlan4k_enabled)
+       if (priv->vlan4k_enabled)
                max = RTL8366RB_NUM_VIDS - 1;
 
        if (vlan > max)
@@ -1623,23 +1627,23 @@ static bool rtl8366rb_is_vlan_valid(struct realtek_smi *smi, unsigned int vlan)
        return true;
 }
 
-static int rtl8366rb_enable_vlan(struct realtek_smi *smi, bool enable)
+static int rtl8366rb_enable_vlan(struct realtek_priv *priv, bool enable)
 {
-       dev_dbg(smi->dev, "%s VLAN\n", enable ? "enable" : "disable");
-       return regmap_update_bits(smi->map,
+       dev_dbg(priv->dev, "%s VLAN\n", enable ? "enable" : "disable");
+       return regmap_update_bits(priv->map,
                                  RTL8366RB_SGCR, RTL8366RB_SGCR_EN_VLAN,
                                  enable ? RTL8366RB_SGCR_EN_VLAN : 0);
 }
 
-static int rtl8366rb_enable_vlan4k(struct realtek_smi *smi, bool enable)
+static int rtl8366rb_enable_vlan4k(struct realtek_priv *priv, bool enable)
 {
-       dev_dbg(smi->dev, "%s VLAN 4k\n", enable ? "enable" : "disable");
-       return regmap_update_bits(smi->map, RTL8366RB_SGCR,
+       dev_dbg(priv->dev, "%s VLAN 4k\n", enable ? "enable" : "disable");
+       return regmap_update_bits(priv->map, RTL8366RB_SGCR,
                                  RTL8366RB_SGCR_EN_VLAN_4KTB,
                                  enable ? RTL8366RB_SGCR_EN_VLAN_4KTB : 0);
 }
 
-static int rtl8366rb_phy_read(struct realtek_smi *smi, int phy, int regnum)
+static int rtl8366rb_phy_read(struct realtek_priv *priv, int phy, int regnum)
 {
        u32 val;
        u32 reg;
@@ -1648,32 +1652,32 @@ static int rtl8366rb_phy_read(struct realtek_smi *smi, int phy, int regnum)
        if (phy > RTL8366RB_PHY_NO_MAX)
                return -EINVAL;
 
-       ret = regmap_write(smi->map, RTL8366RB_PHY_ACCESS_CTRL_REG,
+       ret = regmap_write(priv->map, RTL8366RB_PHY_ACCESS_CTRL_REG,
                           RTL8366RB_PHY_CTRL_READ);
        if (ret)
                return ret;
 
        reg = 0x8000 | (1 << (phy + RTL8366RB_PHY_NO_OFFSET)) | regnum;
 
-       ret = regmap_write(smi->map, reg, 0);
+       ret = regmap_write(priv->map, reg, 0);
        if (ret) {
-               dev_err(smi->dev,
+               dev_err(priv->dev,
                        "failed to write PHY%d reg %04x @ %04x, ret %d\n",
                        phy, regnum, reg, ret);
                return ret;
        }
 
-       ret = regmap_read(smi->map, RTL8366RB_PHY_ACCESS_DATA_REG, &val);
+       ret = regmap_read(priv->map, RTL8366RB_PHY_ACCESS_DATA_REG, &val);
        if (ret)
                return ret;
 
-       dev_dbg(smi->dev, "read PHY%d register 0x%04x @ %08x, val <- %04x\n",
+       dev_dbg(priv->dev, "read PHY%d register 0x%04x @ %08x, val <- %04x\n",
                phy, regnum, reg, val);
 
        return val;
 }
 
-static int rtl8366rb_phy_write(struct realtek_smi *smi, int phy, int regnum,
+static int rtl8366rb_phy_write(struct realtek_priv *priv, int phy, int regnum,
                               u16 val)
 {
        u32 reg;
@@ -1682,34 +1686,45 @@ static int rtl8366rb_phy_write(struct realtek_smi *smi, int phy, int regnum,
        if (phy > RTL8366RB_PHY_NO_MAX)
                return -EINVAL;
 
-       ret = regmap_write(smi->map, RTL8366RB_PHY_ACCESS_CTRL_REG,
+       ret = regmap_write(priv->map, RTL8366RB_PHY_ACCESS_CTRL_REG,
                           RTL8366RB_PHY_CTRL_WRITE);
        if (ret)
                return ret;
 
        reg = 0x8000 | (1 << (phy + RTL8366RB_PHY_NO_OFFSET)) | regnum;
 
-       dev_dbg(smi->dev, "write PHY%d register 0x%04x @ %04x, val -> %04x\n",
+       dev_dbg(priv->dev, "write PHY%d register 0x%04x @ %04x, val -> %04x\n",
                phy, regnum, reg, val);
 
-       ret = regmap_write(smi->map, reg, val);
+       ret = regmap_write(priv->map, reg, val);
        if (ret)
                return ret;
 
        return 0;
 }
 
-static int rtl8366rb_reset_chip(struct realtek_smi *smi)
+static int rtl8366rb_dsa_phy_read(struct dsa_switch *ds, int phy, int regnum)
+{
+       return rtl8366rb_phy_read(ds->priv, phy, regnum);
+}
+
+static int rtl8366rb_dsa_phy_write(struct dsa_switch *ds, int phy, int regnum,
+                                  u16 val)
+{
+       return rtl8366rb_phy_write(ds->priv, phy, regnum, val);
+}
+
+static int rtl8366rb_reset_chip(struct realtek_priv *priv)
 {
        int timeout = 10;
        u32 val;
        int ret;
 
-       realtek_smi_write_reg_noack(smi, RTL8366RB_RESET_CTRL_REG,
-                                   RTL8366RB_CHIP_CTRL_RESET_HW);
+       priv->write_reg_noack(priv, RTL8366RB_RESET_CTRL_REG,
+                             RTL8366RB_CHIP_CTRL_RESET_HW);
        do {
                usleep_range(20000, 25000);
-               ret = regmap_read(smi->map, RTL8366RB_RESET_CTRL_REG, &val);
+               ret = regmap_read(priv->map, RTL8366RB_RESET_CTRL_REG, &val);
                if (ret)
                        return ret;
 
@@ -1718,21 +1733,21 @@ static int rtl8366rb_reset_chip(struct realtek_smi *smi)
        } while (--timeout);
 
        if (!timeout) {
-               dev_err(smi->dev, "timeout waiting for the switch to reset\n");
+               dev_err(priv->dev, "timeout waiting for the switch to reset\n");
                return -EIO;
        }
 
        return 0;
 }
 
-static int rtl8366rb_detect(struct realtek_smi *smi)
+static int rtl8366rb_detect(struct realtek_priv *priv)
 {
-       struct device *dev = smi->dev;
+       struct device *dev = priv->dev;
        int ret;
        u32 val;
 
        /* Detect device */
-       ret = regmap_read(smi->map, 0x5c, &val);
+       ret = regmap_read(priv->map, 0x5c, &val);
        if (ret) {
                dev_err(dev, "can't get chip ID (%d)\n", ret);
                return ret;
@@ -1745,11 +1760,11 @@ static int rtl8366rb_detect(struct realtek_smi *smi)
                return -ENODEV;
        case 0x5937:
                dev_info(dev, "found an RTL8366RB switch\n");
-               smi->cpu_port = RTL8366RB_PORT_NUM_CPU;
-               smi->num_ports = RTL8366RB_NUM_PORTS;
-               smi->num_vlan_mc = RTL8366RB_NUM_VLANS;
-               smi->mib_counters = rtl8366rb_mib_counters;
-               smi->num_mib_counters = ARRAY_SIZE(rtl8366rb_mib_counters);
+               priv->cpu_port = RTL8366RB_PORT_NUM_CPU;
+               priv->num_ports = RTL8366RB_NUM_PORTS;
+               priv->num_vlan_mc = RTL8366RB_NUM_VLANS;
+               priv->mib_counters = rtl8366rb_mib_counters;
+               priv->num_mib_counters = ARRAY_SIZE(rtl8366rb_mib_counters);
                break;
        default:
                dev_info(dev, "found an Unknown Realtek switch (id=0x%04x)\n",
@@ -1757,14 +1772,14 @@ static int rtl8366rb_detect(struct realtek_smi *smi)
                break;
        }
 
-       ret = rtl8366rb_reset_chip(smi);
+       ret = rtl8366rb_reset_chip(priv);
        if (ret)
                return ret;
 
        return 0;
 }
 
-static const struct dsa_switch_ops rtl8366rb_switch_ops = {
+static const struct dsa_switch_ops rtl8366rb_switch_ops_smi = {
        .get_tag_protocol = rtl8366_get_tag_protocol,
        .setup = rtl8366rb_setup,
        .phylink_mac_link_up = rtl8366rb_mac_link_up,
@@ -1787,7 +1802,32 @@ static const struct dsa_switch_ops rtl8366rb_switch_ops = {
        .port_max_mtu = rtl8366rb_max_mtu,
 };
 
-static const struct realtek_smi_ops rtl8366rb_smi_ops = {
+static const struct dsa_switch_ops rtl8366rb_switch_ops_mdio = {
+       .get_tag_protocol = rtl8366_get_tag_protocol,
+       .setup = rtl8366rb_setup,
+       .phy_read = rtl8366rb_dsa_phy_read,
+       .phy_write = rtl8366rb_dsa_phy_write,
+       .phylink_mac_link_up = rtl8366rb_mac_link_up,
+       .phylink_mac_link_down = rtl8366rb_mac_link_down,
+       .get_strings = rtl8366_get_strings,
+       .get_ethtool_stats = rtl8366_get_ethtool_stats,
+       .get_sset_count = rtl8366_get_sset_count,
+       .port_bridge_join = rtl8366rb_port_bridge_join,
+       .port_bridge_leave = rtl8366rb_port_bridge_leave,
+       .port_vlan_filtering = rtl8366rb_vlan_filtering,
+       .port_vlan_add = rtl8366_vlan_add,
+       .port_vlan_del = rtl8366_vlan_del,
+       .port_enable = rtl8366rb_port_enable,
+       .port_disable = rtl8366rb_port_disable,
+       .port_pre_bridge_flags = rtl8366rb_port_pre_bridge_flags,
+       .port_bridge_flags = rtl8366rb_port_bridge_flags,
+       .port_stp_state_set = rtl8366rb_port_stp_state_set,
+       .port_fast_age = rtl8366rb_port_fast_age,
+       .port_change_mtu = rtl8366rb_change_mtu,
+       .port_max_mtu = rtl8366rb_max_mtu,
+};
+
+static const struct realtek_ops rtl8366rb_ops = {
        .detect         = rtl8366rb_detect,
        .get_vlan_mc    = rtl8366rb_get_vlan_mc,
        .set_vlan_mc    = rtl8366rb_set_vlan_mc,
@@ -1803,12 +1843,17 @@ static const struct realtek_smi_ops rtl8366rb_smi_ops = {
        .phy_write      = rtl8366rb_phy_write,
 };
 
-const struct realtek_smi_variant rtl8366rb_variant = {
-       .ds_ops = &rtl8366rb_switch_ops,
-       .ops = &rtl8366rb_smi_ops,
+const struct realtek_variant rtl8366rb_variant = {
+       .ds_ops_smi = &rtl8366rb_switch_ops_smi,
+       .ds_ops_mdio = &rtl8366rb_switch_ops_mdio,
+       .ops = &rtl8366rb_ops,
        .clk_delay = 10,
        .cmd_read = 0xa9,
        .cmd_write = 0xa8,
        .chip_data_sz = sizeof(struct rtl8366rb),
 };
 EXPORT_SYMBOL_GPL(rtl8366rb_variant);
+
+MODULE_AUTHOR("Linus Walleij <linus.walleij@linaro.org>");
+MODULE_DESCRIPTION("Driver for RTL8366RB ethernet switch");
+MODULE_LICENSE("GPL");
index 0730352..bc06fe6 100644 (file)
@@ -442,34 +442,27 @@ static void xrs700x_teardown(struct dsa_switch *ds)
        cancel_delayed_work_sync(&priv->mib_work);
 }
 
-static void xrs700x_phylink_validate(struct dsa_switch *ds, int port,
-                                    unsigned long *supported,
-                                    struct phylink_link_state *state)
+static void xrs700x_phylink_get_caps(struct dsa_switch *ds, int port,
+                                    struct phylink_config *config)
 {
-       __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
-
        switch (port) {
        case 0:
+               __set_bit(PHY_INTERFACE_MODE_RMII,
+                         config->supported_interfaces);
+               config->mac_capabilities = MAC_10FD | MAC_100FD;
                break;
+
        case 1:
        case 2:
        case 3:
-               phylink_set(mask, 1000baseT_Full);
+               phy_interface_set_rgmii(config->supported_interfaces);
+               config->mac_capabilities = MAC_10FD | MAC_100FD | MAC_1000FD;
                break;
+
        default:
-               linkmode_zero(supported);
                dev_err(ds->dev, "Unsupported port: %i\n", port);
-               return;
+               break;
        }
-
-       phylink_set_port_modes(mask);
-
-       /* The switch only supports full duplex. */
-       phylink_set(mask, 10baseT_Full);
-       phylink_set(mask, 100baseT_Full);
-
-       linkmode_and(supported, supported, mask);
-       linkmode_and(state->advertising, state->advertising, mask);
 }
 
 static void xrs700x_mac_link_up(struct dsa_switch *ds, int port,
@@ -703,7 +696,7 @@ static const struct dsa_switch_ops xrs700x_ops = {
        .setup                  = xrs700x_setup,
        .teardown               = xrs700x_teardown,
        .port_stp_state_set     = xrs700x_port_stp_state_set,
-       .phylink_validate       = xrs700x_phylink_validate,
+       .phylink_get_caps       = xrs700x_phylink_get_caps,
        .phylink_mac_link_up    = xrs700x_mac_link_up,
        .get_strings            = xrs700x_get_strings,
        .get_sset_count         = xrs700x_get_sset_count,
index 53080fd..07444ae 100644 (file)
@@ -1400,10 +1400,9 @@ static struct sk_buff *ena_alloc_skb(struct ena_ring *rx_ring, void *first_frag)
        struct sk_buff *skb;
 
        if (!first_frag)
-               skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
-                                               rx_ring->rx_copybreak);
+               skb = napi_alloc_skb(rx_ring->napi, rx_ring->rx_copybreak);
        else
-               skb = build_skb(first_frag, ENA_PAGE_SIZE);
+               skb = napi_build_skb(first_frag, ENA_PAGE_SIZE);
 
        if (unlikely(!skb)) {
                ena_increase_stat(&rx_ring->rx_stats.skb_alloc_fail, 1,
index 4f94136..c313221 100644 (file)
@@ -233,6 +233,7 @@ static const u16 bnxt_async_events_arr[] = {
        ASYNC_EVENT_CMPL_EVENT_ID_ECHO_REQUEST,
        ASYNC_EVENT_CMPL_EVENT_ID_PPS_TIMESTAMP,
        ASYNC_EVENT_CMPL_EVENT_ID_ERROR_REPORT,
+       ASYNC_EVENT_CMPL_EVENT_ID_PHC_UPDATE,
 };
 
 static struct workqueue_struct *bnxt_pf_wq;
@@ -2079,6 +2080,16 @@ static void bnxt_event_error_report(struct bnxt *bp, u32 data1, u32 data2)
        (BNXT_EVENT_RING_TYPE(data2) == \
         ASYNC_EVENT_CMPL_RING_MONITOR_MSG_EVENT_DATA2_DISABLE_RING_TYPE_RX)
 
+#define BNXT_EVENT_PHC_EVENT_TYPE(data1)       \
+       (((data1) & ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_MASK) >>\
+        ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_SFT)
+
+#define BNXT_EVENT_PHC_RTC_UPDATE(data1)       \
+       (((data1) & ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_PHC_TIME_MSB_MASK) >>\
+        ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_PHC_TIME_MSB_SFT)
+
+#define BNXT_PHC_BITS  48
+
 static int bnxt_async_event_process(struct bnxt *bp,
                                    struct hwrm_async_event_cmpl *cmpl)
 {
@@ -2258,6 +2269,24 @@ static int bnxt_async_event_process(struct bnxt *bp,
                bnxt_event_error_report(bp, data1, data2);
                goto async_event_process_exit;
        }
+       case ASYNC_EVENT_CMPL_EVENT_ID_PHC_UPDATE: {
+               switch (BNXT_EVENT_PHC_EVENT_TYPE(data1)) {
+               case ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_PHC_RTC_UPDATE:
+                       if (bp->fw_cap & BNXT_FW_CAP_PTP_RTC) {
+                               struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+                               u64 ns;
+
+                               spin_lock_bh(&ptp->ptp_lock);
+                               bnxt_ptp_update_current_time(bp);
+                               ns = (((u64)BNXT_EVENT_PHC_RTC_UPDATE(data1) <<
+                                      BNXT_PHC_BITS) | ptp->current_time);
+                               bnxt_ptp_rtc_timecounter_init(ptp, ns);
+                               spin_unlock_bh(&ptp->ptp_lock);
+                       }
+                       break;
+               }
+               goto async_event_process_exit;
+       }
        case ASYNC_EVENT_CMPL_EVENT_ID_DEFERRED_RESPONSE: {
                u16 seq_id = le32_to_cpu(cmpl->event_data2) & 0xffff;
 
@@ -7414,6 +7443,7 @@ static int __bnxt_hwrm_ptp_qcfg(struct bnxt *bp)
        struct hwrm_port_mac_ptp_qcfg_output *resp;
        struct hwrm_port_mac_ptp_qcfg_input *req;
        struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+       bool phc_cfg;
        u8 flags;
        int rc;
 
@@ -7456,7 +7486,8 @@ static int __bnxt_hwrm_ptp_qcfg(struct bnxt *bp)
                rc = -ENODEV;
                goto exit;
        }
-       rc = bnxt_ptp_init(bp);
+       phc_cfg = (flags & PORT_MAC_PTP_QCFG_RESP_FLAGS_RTC_CONFIGURED) != 0;
+       rc = bnxt_ptp_init(bp, phc_cfg);
        if (rc)
                netdev_warn(bp->dev, "PTP initialization failed.\n");
 exit:
@@ -7514,6 +7545,8 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp)
                bp->fw_cap |= BNXT_FW_CAP_EXT_HW_STATS_SUPPORTED;
        if (BNXT_PF(bp) && (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_PTP_PPS_SUPPORTED))
                bp->fw_cap |= BNXT_FW_CAP_PTP_PPS;
+       if (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_PTP_64BIT_RTC_SUPPORTED)
+               bp->fw_cap |= BNXT_FW_CAP_PTP_RTC;
        if (BNXT_PF(bp) && (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_HOT_RESET_IF_SUPPORT))
                bp->fw_cap |= BNXT_FW_CAP_HOT_RESET_IF;
        if (BNXT_PF(bp) && (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_FW_LIVEPATCH_SUPPORTED))
@@ -10288,6 +10321,7 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
        /* VF-reps may need to be re-opened after the PF is re-opened */
        if (BNXT_PF(bp))
                bnxt_vf_reps_open(bp);
+       bnxt_ptp_init_rtc(bp, true);
        return 0;
 
 open_err_irq:
index 440dfeb..4b023e3 100644 (file)
@@ -1957,6 +1957,7 @@ struct bnxt {
        #define BNXT_FW_CAP_EXT_STATS_SUPPORTED         0x00040000
        #define BNXT_FW_CAP_ERR_RECOVER_RELOAD          0x00100000
        #define BNXT_FW_CAP_HOT_RESET                   0x00200000
+       #define BNXT_FW_CAP_PTP_RTC                     0x00400000
        #define BNXT_FW_CAP_VLAN_RX_STRIP               0x01000000
        #define BNXT_FW_CAP_VLAN_TX_INSERT              0x02000000
        #define BNXT_FW_CAP_EXT_HW_STATS_SUPPORTED      0x04000000
index 003330e..5edbee9 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/ctype.h>
 #include <linux/stringify.h>
 #include <linux/ethtool.h>
+#include <linux/ethtool_netlink.h>
 #include <linux/linkmode.h>
 #include <linux/interrupt.h>
 #include <linux/pci.h>
@@ -802,9 +803,11 @@ static void bnxt_get_ringparam(struct net_device *dev,
        if (bp->flags & BNXT_FLAG_AGG_RINGS) {
                ering->rx_max_pending = BNXT_MAX_RX_DESC_CNT_JUM_ENA;
                ering->rx_jumbo_max_pending = BNXT_MAX_RX_JUM_DESC_CNT;
+               kernel_ering->tcp_data_split = ETHTOOL_TCP_DATA_SPLIT_ENABLED;
        } else {
                ering->rx_max_pending = BNXT_MAX_RX_DESC_CNT;
                ering->rx_jumbo_max_pending = 0;
+               kernel_ering->tcp_data_split = ETHTOOL_TCP_DATA_SPLIT_DISABLED;
        }
        ering->tx_max_pending = BNXT_MAX_TX_DESC_CNT;
 
index ea86c54..b7100ed 100644 (file)
@@ -369,6 +369,12 @@ struct cmd_nums {
        #define HWRM_FUNC_PTP_EXT_CFG                     0x1a0UL
        #define HWRM_FUNC_PTP_EXT_QCFG                    0x1a1UL
        #define HWRM_FUNC_KEY_CTX_ALLOC                   0x1a2UL
+       #define HWRM_FUNC_BACKING_STORE_CFG_V2            0x1a3UL
+       #define HWRM_FUNC_BACKING_STORE_QCFG_V2           0x1a4UL
+       #define HWRM_FUNC_DBR_PACING_CFG                  0x1a5UL
+       #define HWRM_FUNC_DBR_PACING_QCFG                 0x1a6UL
+       #define HWRM_FUNC_DBR_PACING_BROADCAST_EVENT      0x1a7UL
+       #define HWRM_FUNC_BACKING_STORE_QCAPS_V2          0x1a8UL
        #define HWRM_SELFTEST_QLIST                       0x200UL
        #define HWRM_SELFTEST_EXEC                        0x201UL
        #define HWRM_SELFTEST_IRQ                         0x202UL
@@ -390,6 +396,9 @@ struct cmd_nums {
        #define HWRM_MFG_PRVSN_IMPORT_CERT                0x212UL
        #define HWRM_MFG_PRVSN_GET_STATE                  0x213UL
        #define HWRM_MFG_GET_NVM_MEASUREMENT              0x214UL
+       #define HWRM_MFG_PSOC_QSTATUS                     0x215UL
+       #define HWRM_MFG_SELFTEST_QLIST                   0x216UL
+       #define HWRM_MFG_SELFTEST_EXEC                    0x217UL
        #define HWRM_TF                                   0x2bcUL
        #define HWRM_TF_VERSION_GET                       0x2bdUL
        #define HWRM_TF_SESSION_OPEN                      0x2c6UL
@@ -532,8 +541,8 @@ struct hwrm_err_output {
 #define HWRM_VERSION_MAJOR 1
 #define HWRM_VERSION_MINOR 10
 #define HWRM_VERSION_UPDATE 2
-#define HWRM_VERSION_RSVD 63
-#define HWRM_VERSION_STR "1.10.2.63"
+#define HWRM_VERSION_RSVD 73
+#define HWRM_VERSION_STR "1.10.2.73"
 
 /* hwrm_ver_get_input (size:192b/24B) */
 struct hwrm_ver_get_input {
@@ -757,10 +766,11 @@ struct hwrm_async_event_cmpl {
        #define ASYNC_EVENT_CMPL_EVENT_ID_DEFERRED_RESPONSE          0x40UL
        #define ASYNC_EVENT_CMPL_EVENT_ID_PFC_WATCHDOG_CFG_CHANGE    0x41UL
        #define ASYNC_EVENT_CMPL_EVENT_ID_ECHO_REQUEST               0x42UL
-       #define ASYNC_EVENT_CMPL_EVENT_ID_PHC_MASTER                 0x43UL
+       #define ASYNC_EVENT_CMPL_EVENT_ID_PHC_UPDATE                 0x43UL
        #define ASYNC_EVENT_CMPL_EVENT_ID_PPS_TIMESTAMP              0x44UL
        #define ASYNC_EVENT_CMPL_EVENT_ID_ERROR_REPORT               0x45UL
-       #define ASYNC_EVENT_CMPL_EVENT_ID_MAX_RGTR_EVENT_ID          0x46UL
+       #define ASYNC_EVENT_CMPL_EVENT_ID_DOORBELL_PACING_THRESHOLD  0x46UL
+       #define ASYNC_EVENT_CMPL_EVENT_ID_MAX_RGTR_EVENT_ID          0x47UL
        #define ASYNC_EVENT_CMPL_EVENT_ID_FW_TRACE_MSG               0xfeUL
        #define ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR                 0xffUL
        #define ASYNC_EVENT_CMPL_EVENT_ID_LAST                      ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR
@@ -1112,34 +1122,37 @@ struct hwrm_async_event_cmpl_echo_request {
        __le32  event_data1;
 };
 
-/* hwrm_async_event_cmpl_phc_master (size:128b/16B) */
-struct hwrm_async_event_cmpl_phc_master {
+/* hwrm_async_event_cmpl_phc_update (size:128b/16B) */
+struct hwrm_async_event_cmpl_phc_update {
        __le16  type;
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_TYPE_MASK            0x3fUL
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_TYPE_SFT             0
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_TYPE_HWRM_ASYNC_EVENT  0x2eUL
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_TYPE_LAST             ASYNC_EVENT_CMPL_PHC_MASTER_TYPE_HWRM_ASYNC_EVENT
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_TYPE_MASK            0x3fUL
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_TYPE_SFT             0
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_TYPE_HWRM_ASYNC_EVENT  0x2eUL
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_TYPE_LAST             ASYNC_EVENT_CMPL_PHC_UPDATE_TYPE_HWRM_ASYNC_EVENT
        __le16  event_id;
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_ID_PHC_MASTER 0x43UL
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_ID_LAST      ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_ID_PHC_MASTER
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_ID_PHC_UPDATE 0x43UL
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_ID_LAST      ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_ID_PHC_UPDATE
        __le32  event_data2;
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA2_PHC_MASTER_FID_MASK 0xffffUL
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA2_PHC_MASTER_FID_SFT 0
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA2_PHC_SEC_FID_MASK   0xffff0000UL
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA2_PHC_SEC_FID_SFT    16
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA2_PHC_MASTER_FID_MASK 0xffffUL
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA2_PHC_MASTER_FID_SFT 0
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA2_PHC_SEC_FID_MASK   0xffff0000UL
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA2_PHC_SEC_FID_SFT    16
        u8      opaque_v;
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_V          0x1UL
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_OPAQUE_MASK 0xfeUL
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_OPAQUE_SFT 1
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_V          0x1UL
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_OPAQUE_MASK 0xfeUL
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_OPAQUE_SFT 1
        u8      timestamp_lo;
        __le16  timestamp_hi;
        __le32  event_data1;
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_MASK         0xfUL
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_SFT          0
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_PHC_MASTER     0x1UL
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_PHC_SECONDARY  0x2UL
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_PHC_FAILOVER   0x3UL
-       #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_LAST          ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_PHC_FAILOVER
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_MASK          0xfUL
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_SFT           0
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_PHC_MASTER      0x1UL
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_PHC_SECONDARY   0x2UL
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_PHC_FAILOVER    0x3UL
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_PHC_RTC_UPDATE  0x4UL
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_LAST           ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_PHC_RTC_UPDATE
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_PHC_TIME_MSB_MASK   0xffff0UL
+       #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_PHC_TIME_MSB_SFT    4
 };
 
 /* hwrm_async_event_cmpl_pps_timestamp (size:128b/16B) */
@@ -1330,6 +1343,30 @@ struct hwrm_async_event_cmpl_error_report_nvm {
        #define ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA1_NVM_ERR_TYPE_LAST    ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA1_NVM_ERR_TYPE_ERASE
 };
 
+/* hwrm_async_event_cmpl_error_report_doorbell_drop_threshold (size:128b/16B) */
+struct hwrm_async_event_cmpl_error_report_doorbell_drop_threshold {
+       __le16  type;
+       #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_TYPE_MASK            0x3fUL
+       #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_TYPE_SFT             0
+       #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_TYPE_HWRM_ASYNC_EVENT  0x2eUL
+       #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_TYPE_LAST             ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_TYPE_HWRM_ASYNC_EVENT
+       __le16  event_id;
+       #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_ID_ERROR_REPORT 0x45UL
+       #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_ID_LAST        ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_ID_ERROR_REPORT
+       __le32  event_data2;
+       u8      opaque_v;
+       #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_V          0x1UL
+       #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_OPAQUE_MASK 0xfeUL
+       #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_OPAQUE_SFT 1
+       u8      timestamp_lo;
+       __le16  timestamp_hi;
+       __le32  event_data1;
+       #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_DATA1_ERROR_TYPE_MASK                   0xffUL
+       #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_DATA1_ERROR_TYPE_SFT                    0
+       #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_DATA1_ERROR_TYPE_DOORBELL_DROP_THRESHOLD  0x4UL
+       #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_DATA1_ERROR_TYPE_LAST                    ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_DATA1_ERROR_TYPE_DOORBELL_DROP_THRESHOLD
+};
+
 /* hwrm_func_reset_input (size:192b/24B) */
 struct hwrm_func_reset_input {
        __le16  req_type;
@@ -1589,6 +1626,10 @@ struct hwrm_func_qcaps_output {
        #define FUNC_QCAPS_RESP_FLAGS_EXT_EP_RATE_CONTROL                        0x800000UL
        #define FUNC_QCAPS_RESP_FLAGS_EXT_MIN_BW_SUPPORTED                       0x1000000UL
        #define FUNC_QCAPS_RESP_FLAGS_EXT_TX_COAL_CMPL_CAP                       0x2000000UL
+       #define FUNC_QCAPS_RESP_FLAGS_EXT_BS_V2_SUPPORTED                        0x4000000UL
+       #define FUNC_QCAPS_RESP_FLAGS_EXT_BS_V2_REQUIRED                         0x8000000UL
+       #define FUNC_QCAPS_RESP_FLAGS_EXT_PTP_64BIT_RTC_SUPPORTED                0x10000000UL
+       #define FUNC_QCAPS_RESP_FLAGS_EXT_DBR_PACING_SUPPORTED                   0x20000000UL
        u8      max_schqs;
        u8      mpc_chnls_cap;
        #define FUNC_QCAPS_RESP_MPC_CHNLS_CAP_TCE         0x1UL
@@ -2455,7 +2496,7 @@ struct hwrm_func_backing_store_qcaps_output {
        __le16  rkc_entry_size;
        __le32  tkc_max_entries;
        __le32  rkc_max_entries;
-       u8      rsvd[7];
+       u8      rsvd1[7];
        u8      valid;
 };
 
@@ -3164,7 +3205,7 @@ struct hwrm_func_ptp_pin_cfg_output {
        u8      valid;
 };
 
-/* hwrm_func_ptp_cfg_input (size:320b/40B) */
+/* hwrm_func_ptp_cfg_input (size:384b/48B) */
 struct hwrm_func_ptp_cfg_input {
        __le16  req_type;
        __le16  cmpl_ring;
@@ -3178,6 +3219,7 @@ struct hwrm_func_ptp_cfg_input {
        #define FUNC_PTP_CFG_REQ_ENABLES_PTP_FREQ_ADJ_EXT_PERIOD     0x8UL
        #define FUNC_PTP_CFG_REQ_ENABLES_PTP_FREQ_ADJ_EXT_UP         0x10UL
        #define FUNC_PTP_CFG_REQ_ENABLES_PTP_FREQ_ADJ_EXT_PHASE      0x20UL
+       #define FUNC_PTP_CFG_REQ_ENABLES_PTP_SET_TIME                0x40UL
        u8      ptp_pps_event;
        #define FUNC_PTP_CFG_REQ_PTP_PPS_EVENT_INTERNAL     0x1UL
        #define FUNC_PTP_CFG_REQ_PTP_PPS_EVENT_EXTERNAL     0x2UL
@@ -3204,6 +3246,7 @@ struct hwrm_func_ptp_cfg_input {
        __le32  ptp_freq_adj_ext_up;
        __le32  ptp_freq_adj_ext_phase_lower;
        __le32  ptp_freq_adj_ext_phase_upper;
+       __le64  ptp_set_time;
 };
 
 /* hwrm_func_ptp_cfg_output (size:128b/16B) */
@@ -3243,6 +3286,308 @@ struct hwrm_func_ptp_ts_query_output {
        u8      valid;
 };
 
+/* hwrm_func_ptp_ext_cfg_input (size:256b/32B) */
+struct hwrm_func_ptp_ext_cfg_input {
+       __le16  req_type;
+       __le16  cmpl_ring;
+       __le16  seq_id;
+       __le16  target_id;
+       __le64  resp_addr;
+       __le16  enables;
+       #define FUNC_PTP_EXT_CFG_REQ_ENABLES_PHC_MASTER_FID     0x1UL
+       #define FUNC_PTP_EXT_CFG_REQ_ENABLES_PHC_SEC_FID        0x2UL
+       #define FUNC_PTP_EXT_CFG_REQ_ENABLES_PHC_SEC_MODE       0x4UL
+       #define FUNC_PTP_EXT_CFG_REQ_ENABLES_FAILOVER_TIMER     0x8UL
+       __le16  phc_master_fid;
+       __le16  phc_sec_fid;
+       u8      phc_sec_mode;
+       #define FUNC_PTP_EXT_CFG_REQ_PHC_SEC_MODE_SWITCH  0x0UL
+       #define FUNC_PTP_EXT_CFG_REQ_PHC_SEC_MODE_ALL     0x1UL
+       #define FUNC_PTP_EXT_CFG_REQ_PHC_SEC_MODE_PF_ONLY 0x2UL
+       #define FUNC_PTP_EXT_CFG_REQ_PHC_SEC_MODE_LAST   FUNC_PTP_EXT_CFG_REQ_PHC_SEC_MODE_PF_ONLY
+       u8      unused_0;
+       __le32  failover_timer;
+       u8      unused_1[4];
+};
+
+/* hwrm_func_ptp_ext_cfg_output (size:128b/16B) */
+struct hwrm_func_ptp_ext_cfg_output {
+       __le16  error_code;
+       __le16  req_type;
+       __le16  seq_id;
+       __le16  resp_len;
+       u8      unused_0[7];
+       u8      valid;
+};
+
+/* hwrm_func_ptp_ext_qcfg_input (size:192b/24B) */
+struct hwrm_func_ptp_ext_qcfg_input {
+       __le16  req_type;
+       __le16  cmpl_ring;
+       __le16  seq_id;
+       __le16  target_id;
+       __le64  resp_addr;
+       u8      unused_0[8];
+};
+
+/* hwrm_func_ptp_ext_qcfg_output (size:256b/32B) */
+struct hwrm_func_ptp_ext_qcfg_output {
+       __le16  error_code;
+       __le16  req_type;
+       __le16  seq_id;
+       __le16  resp_len;
+       __le16  phc_master_fid;
+       __le16  phc_sec_fid;
+       __le16  phc_active_fid0;
+       __le16  phc_active_fid1;
+       __le32  last_failover_event;
+       __le16  from_fid;
+       __le16  to_fid;
+       u8      unused_0[7];
+       u8      valid;
+};
+
+/* hwrm_func_backing_store_cfg_v2_input (size:448b/56B) */
+struct hwrm_func_backing_store_cfg_v2_input {
+       __le16  req_type;
+       __le16  cmpl_ring;
+       __le16  seq_id;
+       __le16  target_id;
+       __le64  resp_addr;
+       __le16  type;
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_QP          0x0UL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SRQ         0x1UL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_CQ          0x2UL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_VNIC        0x3UL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_STAT        0x4UL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SP_TQM_RING 0x5UL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_FP_TQM_RING 0x6UL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_MRAV        0xeUL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_TIM         0xfUL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_TKC         0x13UL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_RKC         0x14UL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_MP_TQM_RING 0x15UL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_INVALID     0xffffUL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_LAST       FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_INVALID
+       __le16  instance;
+       __le32  flags;
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_FLAGS_PREBOOT_MODE     0x1UL
+       __le64  page_dir;
+       __le32  num_entries;
+       __le16  entry_size;
+       u8      page_size_pbl_level;
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_MASK  0xfUL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_SFT   0
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_LVL_0   0x0UL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_LVL_1   0x1UL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_LVL_2   0x2UL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_LAST   FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_LVL_2
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_MASK  0xf0UL
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_SFT   4
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_4K   (0x0UL << 4)
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_8K   (0x1UL << 4)
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_64K  (0x2UL << 4)
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_2M   (0x3UL << 4)
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_8M   (0x4UL << 4)
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_1G   (0x5UL << 4)
+       #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_LAST   FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_1G
+       u8      subtype_valid_cnt;
+       __le32  split_entry_0;
+       __le32  split_entry_1;
+       __le32  split_entry_2;
+       __le32  split_entry_3;
+};
+
+/* hwrm_func_backing_store_cfg_v2_output (size:128b/16B) */
+struct hwrm_func_backing_store_cfg_v2_output {
+       __le16  error_code;
+       __le16  req_type;
+       __le16  seq_id;
+       __le16  resp_len;
+       u8      rsvd0[7];
+       u8      valid;
+};
+
+/* hwrm_func_backing_store_qcfg_v2_input (size:192b/24B) */
+struct hwrm_func_backing_store_qcfg_v2_input {
+       __le16  req_type;
+       __le16  cmpl_ring;
+       __le16  seq_id;
+       __le16  target_id;
+       __le64  resp_addr;
+       __le16  type;
+       #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_QP          0x0UL
+       #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_SRQ         0x1UL
+       #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_CQ          0x2UL
+       #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_VNIC        0x3UL
+       #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_STAT        0x4UL
+       #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_SP_TQM_RING 0x5UL
+       #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_FP_TQM_RING 0x6UL
+       #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_MRAV        0xeUL
+       #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_TIM         0xfUL
+       #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_TKC         0x13UL
+       #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_RKC         0x14UL
+       #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_MP_TQM_RING 0x15UL
+       #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_INVALID     0xffffUL
+       #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_LAST       FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_INVALID
+       __le16  instance;
+       u8      rsvd[4];
+};
+
+/* hwrm_func_backing_store_qcfg_v2_output (size:448b/56B) */
+struct hwrm_func_backing_store_qcfg_v2_output {
+       __le16  error_code;
+       __le16  req_type;
+       __le16  seq_id;
+       __le16  resp_len;
+       __le16  type;
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_QP          0x0UL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_SRQ         0x1UL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_CQ          0x2UL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_VNIC        0x3UL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_STAT        0x4UL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_SP_TQM_RING 0x5UL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_FP_TQM_RING 0x6UL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_MRAV        0xeUL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_TIM         0xfUL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_TKC         0x13UL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_RKC         0x14UL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_MP_TQM_RING 0x15UL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_INVALID     0xffffUL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_LAST       FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_INVALID
+       __le16  instance;
+       __le32  flags;
+       __le64  page_dir;
+       __le32  num_entries;
+       u8      page_size_pbl_level;
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_MASK  0xfUL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_SFT   0
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_LVL_0   0x0UL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_LVL_1   0x1UL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_LVL_2   0x2UL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_LAST   FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_LVL_2
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_MASK  0xf0UL
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_SFT   4
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_4K   (0x0UL << 4)
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_8K   (0x1UL << 4)
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_64K  (0x2UL << 4)
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_2M   (0x3UL << 4)
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_8M   (0x4UL << 4)
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_1G   (0x5UL << 4)
+       #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_LAST   FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_1G
+       u8      subtype_valid_cnt;
+       u8      rsvd[2];
+       __le32  split_entry_0;
+       __le32  split_entry_1;
+       __le32  split_entry_2;
+       __le32  split_entry_3;
+       u8      rsvd2[7];
+       u8      valid;
+};
+
+/* qpc_split_entries (size:128b/16B) */
+struct qpc_split_entries {
+       __le32  qp_num_l2_entries;
+       __le32  qp_num_qp1_entries;
+       __le32  rsvd[2];
+};
+
+/* srq_split_entries (size:128b/16B) */
+struct srq_split_entries {
+       __le32  srq_num_l2_entries;
+       __le32  rsvd;
+       __le32  rsvd2[2];
+};
+
+/* cq_split_entries (size:128b/16B) */
+struct cq_split_entries {
+       __le32  cq_num_l2_entries;
+       __le32  rsvd;
+       __le32  rsvd2[2];
+};
+
+/* vnic_split_entries (size:128b/16B) */
+struct vnic_split_entries {
+       __le32  vnic_num_vnic_entries;
+       __le32  rsvd;
+       __le32  rsvd2[2];
+};
+
+/* mrav_split_entries (size:128b/16B) */
+struct mrav_split_entries {
+       __le32  mrav_num_av_entries;
+       __le32  rsvd;
+       __le32  rsvd2[2];
+};
+
+/* hwrm_func_backing_store_qcaps_v2_input (size:192b/24B) */
+struct hwrm_func_backing_store_qcaps_v2_input {
+       __le16  req_type;
+       __le16  cmpl_ring;
+       __le16  seq_id;
+       __le16  target_id;
+       __le64  resp_addr;
+       __le16  type;
+       #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_QP          0x0UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRQ         0x1UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CQ          0x2UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_VNIC        0x3UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_STAT        0x4UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SP_TQM_RING 0x5UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_FP_TQM_RING 0x6UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_MRAV        0xeUL
+       #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TIM         0xfUL
+       #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TKC         0x13UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_RKC         0x14UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_MP_TQM_RING 0x15UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_INVALID     0xffffUL
+       #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_LAST       FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_INVALID
+       u8      rsvd[6];
+};
+
+/* hwrm_func_backing_store_qcaps_v2_output (size:448b/56B) */
+struct hwrm_func_backing_store_qcaps_v2_output {
+       __le16  error_code;
+       __le16  req_type;
+       __le16  seq_id;
+       __le16  resp_len;
+       __le16  type;
+       #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_QP          0x0UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SRQ         0x1UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_CQ          0x2UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_VNIC        0x3UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_STAT        0x4UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SP_TQM_RING 0x5UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_FP_TQM_RING 0x6UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_MRAV        0xeUL
+       #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_TIM         0xfUL
+       #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_TKC         0x13UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_RKC         0x14UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_MP_TQM_RING 0x15UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_INVALID     0xffffUL
+       #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_LAST       FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_INVALID
+       __le16  entry_size;
+       __le32  flags;
+       #define FUNC_BACKING_STORE_QCAPS_V2_RESP_FLAGS_ENABLE_CTX_KIND_INIT     0x1UL
+       #define FUNC_BACKING_STORE_QCAPS_V2_RESP_FLAGS_TYPE_VALID               0x2UL
+       __le32  instance_bit_map;
+       u8      ctx_init_value;
+       u8      ctx_init_offset;
+       u8      entry_multiple;
+       u8      rsvd;
+       __le32  max_num_entries;
+       __le32  min_num_entries;
+       __le16  next_valid_type;
+       u8      subtype_valid_cnt;
+       u8      rsvd2;
+       __le32  split_entry_0;
+       __le32  split_entry_1;
+       __le32  split_entry_2;
+       __le32  split_entry_3;
+       u8      rsvd3[3];
+       u8      valid;
+};
+
 /* hwrm_func_drv_if_change_input (size:192b/24B) */
 struct hwrm_func_drv_if_change_input {
        __le16  req_type;
@@ -3741,7 +4086,7 @@ struct hwrm_port_phy_qcfg_output {
        u8      valid;
 };
 
-/* hwrm_port_mac_cfg_input (size:384b/48B) */
+/* hwrm_port_mac_cfg_input (size:448b/56B) */
 struct hwrm_port_mac_cfg_input {
        __le16  req_type;
        __le16  cmpl_ring;
@@ -3807,7 +4152,8 @@ struct hwrm_port_mac_cfg_input {
        #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_DEFAULT_COS_SFT           5
        u8      unused_0[3];
        __le32  ptp_freq_adj_ppb;
-       __le32  ptp_adj_phase;
+       u8      unused_1[4];
+       __le64  ptp_adj_phase;
 };
 
 /* hwrm_port_mac_cfg_output (size:128b/16B) */
@@ -3850,6 +4196,7 @@ struct hwrm_port_mac_ptp_qcfg_output {
        #define PORT_MAC_PTP_QCFG_RESP_FLAGS_ONE_STEP_TX_TS                      0x4UL
        #define PORT_MAC_PTP_QCFG_RESP_FLAGS_HWRM_ACCESS                         0x8UL
        #define PORT_MAC_PTP_QCFG_RESP_FLAGS_PARTIAL_DIRECT_ACCESS_REF_CLOCK     0x10UL
+       #define PORT_MAC_PTP_QCFG_RESP_FLAGS_RTC_CONFIGURED                      0x20UL
        u8      unused_0[3];
        __le32  rx_ts_reg_off_lower;
        __le32  rx_ts_reg_off_upper;
@@ -4339,7 +4686,8 @@ struct hwrm_port_phy_qcaps_output {
        #define PORT_PHY_QCAPS_RESP_PORT_CNT_2       0x2UL
        #define PORT_PHY_QCAPS_RESP_PORT_CNT_3       0x3UL
        #define PORT_PHY_QCAPS_RESP_PORT_CNT_4       0x4UL
-       #define PORT_PHY_QCAPS_RESP_PORT_CNT_LAST   PORT_PHY_QCAPS_RESP_PORT_CNT_4
+       #define PORT_PHY_QCAPS_RESP_PORT_CNT_12      0xcUL
+       #define PORT_PHY_QCAPS_RESP_PORT_CNT_LAST   PORT_PHY_QCAPS_RESP_PORT_CNT_12
        __le16  supported_speeds_force_mode;
        #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_FORCE_MODE_100MBHD     0x1UL
        #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_FORCE_MODE_100MB       0x2UL
@@ -4399,7 +4747,7 @@ struct hwrm_port_phy_qcaps_output {
        __le16  flags2;
        #define PORT_PHY_QCAPS_RESP_FLAGS2_PAUSE_UNSUPPORTED     0x1UL
        #define PORT_PHY_QCAPS_RESP_FLAGS2_PFC_UNSUPPORTED       0x2UL
-       u8      unused_0[1];
+       u8      internal_port_cnt;
        u8      valid;
 };
 
@@ -6221,12 +6569,13 @@ struct hwrm_vnic_rss_cfg_input {
        __le16  target_id;
        __le64  resp_addr;
        __le32  hash_type;
-       #define VNIC_RSS_CFG_REQ_HASH_TYPE_IPV4         0x1UL
-       #define VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV4     0x2UL
-       #define VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV4     0x4UL
-       #define VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6         0x8UL
-       #define VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV6     0x10UL
-       #define VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV6     0x20UL
+       #define VNIC_RSS_CFG_REQ_HASH_TYPE_IPV4                0x1UL
+       #define VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV4            0x2UL
+       #define VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV4            0x4UL
+       #define VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6                0x8UL
+       #define VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV6            0x10UL
+       #define VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV6            0x20UL
+       #define VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6_FLOW_LABEL     0x40UL
        __le16  vnic_id;
        u8      ring_table_pair_index;
        u8      hash_mode_flags;
@@ -7898,6 +8247,7 @@ struct hwrm_cfa_adv_flow_mgnt_qcaps_output {
        u8      valid;
 };
 
+/* hwrm_tunnel_dst_port_query_input (size:192b/24B) */
 struct hwrm_tunnel_dst_port_query_input {
        __le16  req_type;
        __le16  cmpl_ring;
@@ -8909,6 +9259,50 @@ struct hwrm_dbg_qcfg_output {
        u8      valid;
 };
 
+/* hwrm_dbg_crashdump_medium_cfg_input (size:320b/40B) */
+struct hwrm_dbg_crashdump_medium_cfg_input {
+       __le16  req_type;
+       __le16  cmpl_ring;
+       __le16  seq_id;
+       __le16  target_id;
+       __le64  resp_addr;
+       __le16  output_dest_flags;
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_TYPE_DDR     0x1UL
+       __le16  pg_size_lvl;
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_MASK      0x3UL
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_SFT       0
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_LVL_0       0x0UL
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_LVL_1       0x1UL
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_LVL_2       0x2UL
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_LAST       DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_LVL_2
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_MASK  0x1cUL
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_SFT   2
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_4K   (0x0UL << 2)
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_8K   (0x1UL << 2)
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_64K  (0x2UL << 2)
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_2M   (0x3UL << 2)
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_8M   (0x4UL << 2)
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_1G   (0x5UL << 2)
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_LAST   DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_1G
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_UNUSED11_MASK 0xffe0UL
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_UNUSED11_SFT  5
+       __le32  size;
+       __le32  coredump_component_disable_flags;
+       #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_NVRAM     0x1UL
+       __le32  unused_0;
+       __le64  pbl;
+};
+
+/* hwrm_dbg_crashdump_medium_cfg_output (size:128b/16B) */
+struct hwrm_dbg_crashdump_medium_cfg_output {
+       __le16  error_code;
+       __le16  req_type;
+       __le16  seq_id;
+       __le16  resp_len;
+       u8      unused_1[7];
+       u8      valid;
+};
+
 /* coredump_segment_record (size:128b/16B) */
 struct coredump_segment_record {
        __le16  component_id;
@@ -9372,8 +9766,35 @@ struct hwrm_nvm_install_update_output {
        __le16  resp_len;
        __le64  installed_items;
        u8      result;
-       #define NVM_INSTALL_UPDATE_RESP_RESULT_SUCCESS 0x0UL
-       #define NVM_INSTALL_UPDATE_RESP_RESULT_LAST   NVM_INSTALL_UPDATE_RESP_RESULT_SUCCESS
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_SUCCESS                      0x0UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_FAILURE                      0xffUL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_MALLOC_FAILURE               0xfdUL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_INDEX_PARAMETER      0xfbUL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_TYPE_PARAMETER       0xf3UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_PREREQUISITE         0xf2UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_FILE_HEADER          0xecUL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_SIGNATURE            0xebUL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_PROP_STREAM          0xeaUL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_PROP_LENGTH          0xe9UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_MANIFEST             0xe8UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_TRAILER              0xe7UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_CHECKSUM             0xe6UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_ITEM_CHECKSUM        0xe5UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_DATA_LENGTH          0xe4UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_DIRECTIVE            0xe1UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_UNSUPPORTED_CHIP_REV         0xceUL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_UNSUPPORTED_DEVICE_ID        0xcdUL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_UNSUPPORTED_SUBSYS_VENDOR    0xccUL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_UNSUPPORTED_SUBSYS_ID        0xcbUL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_UNSUPPORTED_PLATFORM         0xc5UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_DUPLICATE_ITEM               0xc4UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_ZERO_LENGTH_ITEM             0xc3UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_INSTALL_CHECKSUM_ERROR       0xb9UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_INSTALL_DATA_ERROR           0xb8UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_INSTALL_AUTHENTICATION_ERROR 0xb7UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_ITEM_NOT_FOUND               0xb0UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_ITEM_LOCKED                  0xa7UL
+       #define NVM_INSTALL_UPDATE_RESP_RESULT_LAST                        NVM_INSTALL_UPDATE_RESP_RESULT_ITEM_LOCKED
        u8      problem_item;
        #define NVM_INSTALL_UPDATE_RESP_PROBLEM_ITEM_NONE    0x0UL
        #define NVM_INSTALL_UPDATE_RESP_PROBLEM_ITEM_PACKAGE 0xffUL
index 4852096..a0b321a 100644 (file)
 #include "bnxt_hwrm.h"
 #include "bnxt_ptp.h"
 
+static int bnxt_ptp_cfg_settime(struct bnxt *bp, u64 time)
+{
+       struct hwrm_func_ptp_cfg_input *req;
+       int rc;
+
+       rc = hwrm_req_init(bp, req, HWRM_FUNC_PTP_CFG);
+       if (rc)
+               return rc;
+
+       req->enables = cpu_to_le16(FUNC_PTP_CFG_REQ_ENABLES_PTP_SET_TIME);
+       req->ptp_set_time = cpu_to_le64(time);
+       return hwrm_req_send(bp, req);
+}
+
 int bnxt_ptp_parse(struct sk_buff *skb, u16 *seq_id, u16 *hdr_off)
 {
        unsigned int ptp_class;
@@ -48,6 +62,9 @@ static int bnxt_ptp_settime(struct ptp_clock_info *ptp_info,
                                                ptp_info);
        u64 ns = timespec64_to_ns(ts);
 
+       if (ptp->bp->fw_cap & BNXT_FW_CAP_PTP_RTC)
+               return bnxt_ptp_cfg_settime(ptp->bp, ns);
+
        spin_lock_bh(&ptp->ptp_lock);
        timecounter_init(&ptp->tc, &ptp->cc, ns);
        spin_unlock_bh(&ptp->ptp_lock);
@@ -131,11 +148,47 @@ static int bnxt_ptp_gettimex(struct ptp_clock_info *ptp_info,
        return 0;
 }
 
+/* Caller holds ptp_lock */
+void bnxt_ptp_update_current_time(struct bnxt *bp)
+{
+       struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+
+       bnxt_refclk_read(ptp->bp, NULL, &ptp->current_time);
+       WRITE_ONCE(ptp->old_time, ptp->current_time);
+}
+
+static int bnxt_ptp_adjphc(struct bnxt_ptp_cfg *ptp, s64 delta)
+{
+       struct hwrm_port_mac_cfg_input *req;
+       int rc;
+
+       rc = hwrm_req_init(ptp->bp, req, HWRM_PORT_MAC_CFG);
+       if (rc)
+               return rc;
+
+       req->enables = cpu_to_le32(PORT_MAC_CFG_REQ_ENABLES_PTP_ADJ_PHASE);
+       req->ptp_adj_phase = cpu_to_le64(delta);
+
+       rc = hwrm_req_send(ptp->bp, req);
+       if (rc) {
+               netdev_err(ptp->bp->dev, "ptp adjphc failed. rc = %x\n", rc);
+       } else {
+               spin_lock_bh(&ptp->ptp_lock);
+               bnxt_ptp_update_current_time(ptp->bp);
+               spin_unlock_bh(&ptp->ptp_lock);
+       }
+
+       return rc;
+}
+
 static int bnxt_ptp_adjtime(struct ptp_clock_info *ptp_info, s64 delta)
 {
        struct bnxt_ptp_cfg *ptp = container_of(ptp_info, struct bnxt_ptp_cfg,
                                                ptp_info);
 
+       if (ptp->bp->fw_cap & BNXT_FW_CAP_PTP_RTC)
+               return bnxt_ptp_adjphc(ptp, delta);
+
        spin_lock_bh(&ptp->ptp_lock);
        timecounter_adjtime(&ptp->tc, delta);
        spin_unlock_bh(&ptp->ptp_lock);
@@ -714,7 +767,70 @@ static bool bnxt_pps_config_ok(struct bnxt *bp)
        return !(bp->fw_cap & BNXT_FW_CAP_PTP_PPS) == !ptp->ptp_info.pin_config;
 }
 
-int bnxt_ptp_init(struct bnxt *bp)
+static void bnxt_ptp_timecounter_init(struct bnxt *bp, bool init_tc)
+{
+       struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+
+       if (!ptp->ptp_clock) {
+               memset(&ptp->cc, 0, sizeof(ptp->cc));
+               ptp->cc.read = bnxt_cc_read;
+               ptp->cc.mask = CYCLECOUNTER_MASK(48);
+               ptp->cc.shift = 0;
+               ptp->cc.mult = 1;
+               ptp->next_overflow_check = jiffies + BNXT_PHC_OVERFLOW_PERIOD;
+       }
+       if (init_tc)
+               timecounter_init(&ptp->tc, &ptp->cc, ktime_to_ns(ktime_get_real()));
+}
+
+/* Caller holds ptp_lock */
+void bnxt_ptp_rtc_timecounter_init(struct bnxt_ptp_cfg *ptp, u64 ns)
+{
+       timecounter_init(&ptp->tc, &ptp->cc, ns);
+       /* For RTC, cycle_last must be in sync with the timecounter value. */
+       ptp->tc.cycle_last = ns & ptp->cc.mask;
+}
+
+int bnxt_ptp_init_rtc(struct bnxt *bp, bool phc_cfg)
+{
+       struct timespec64 tsp;
+       u64 ns;
+       int rc;
+
+       if (!bp->ptp_cfg || !(bp->fw_cap & BNXT_FW_CAP_PTP_RTC))
+               return -ENODEV;
+
+       if (!phc_cfg) {
+               ktime_get_real_ts64(&tsp);
+               ns = timespec64_to_ns(&tsp);
+               rc = bnxt_ptp_cfg_settime(bp, ns);
+               if (rc)
+                       return rc;
+       } else {
+               rc = bnxt_hwrm_port_ts_query(bp, PORT_TS_QUERY_REQ_FLAGS_CURRENT_TIME, &ns);
+               if (rc)
+                       return rc;
+       }
+       spin_lock_bh(&bp->ptp_cfg->ptp_lock);
+       bnxt_ptp_rtc_timecounter_init(bp->ptp_cfg, ns);
+       spin_unlock_bh(&bp->ptp_cfg->ptp_lock);
+
+       return 0;
+}
+
+static void bnxt_ptp_free(struct bnxt *bp)
+{
+       struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+
+       if (ptp->ptp_clock) {
+               ptp_clock_unregister(ptp->ptp_clock);
+               ptp->ptp_clock = NULL;
+               kfree(ptp->ptp_info.pin_config);
+               ptp->ptp_info.pin_config = NULL;
+       }
+}
+
+int bnxt_ptp_init(struct bnxt *bp, bool phc_cfg)
 {
        struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
        int rc;
@@ -726,26 +842,23 @@ int bnxt_ptp_init(struct bnxt *bp)
        if (rc)
                return rc;
 
+       if (bp->fw_cap & BNXT_FW_CAP_PTP_RTC) {
+               bnxt_ptp_timecounter_init(bp, false);
+               rc = bnxt_ptp_init_rtc(bp, phc_cfg);
+               if (rc)
+                       goto out;
+       }
+
        if (ptp->ptp_clock && bnxt_pps_config_ok(bp))
                return 0;
 
-       if (ptp->ptp_clock) {
-               ptp_clock_unregister(ptp->ptp_clock);
-               ptp->ptp_clock = NULL;
-               kfree(ptp->ptp_info.pin_config);
-               ptp->ptp_info.pin_config = NULL;
-       }
+       bnxt_ptp_free(bp);
+
        atomic_set(&ptp->tx_avail, BNXT_MAX_TX_TS);
        spin_lock_init(&ptp->ptp_lock);
 
-       memset(&ptp->cc, 0, sizeof(ptp->cc));
-       ptp->cc.read = bnxt_cc_read;
-       ptp->cc.mask = CYCLECOUNTER_MASK(48);
-       ptp->cc.shift = 0;
-       ptp->cc.mult = 1;
-
-       ptp->next_overflow_check = jiffies + BNXT_PHC_OVERFLOW_PERIOD;
-       timecounter_init(&ptp->tc, &ptp->cc, ktime_to_ns(ktime_get_real()));
+       if (!(bp->fw_cap & BNXT_FW_CAP_PTP_RTC))
+               bnxt_ptp_timecounter_init(bp, true);
 
        ptp->ptp_info = bnxt_ptp_caps;
        if ((bp->fw_cap & BNXT_FW_CAP_PTP_PPS)) {
@@ -757,8 +870,8 @@ int bnxt_ptp_init(struct bnxt *bp)
                int err = PTR_ERR(ptp->ptp_clock);
 
                ptp->ptp_clock = NULL;
-               bnxt_unmap_ptp_regs(bp);
-               return err;
+               rc = err;
+               goto out;
        }
        if (bp->flags & BNXT_FLAG_CHIP_P5) {
                spin_lock_bh(&ptp->ptp_lock);
@@ -768,6 +881,11 @@ int bnxt_ptp_init(struct bnxt *bp)
                ptp_schedule_worker(ptp->ptp_clock, 0);
        }
        return 0;
+
+out:
+       bnxt_ptp_free(bp);
+       bnxt_unmap_ptp_regs(bp);
+       return rc;
 }
 
 void bnxt_ptp_clear(struct bnxt *bp)
index 7c528e1..373baf4 100644 (file)
@@ -131,12 +131,15 @@ do {                                              \
 #endif
 
 int bnxt_ptp_parse(struct sk_buff *skb, u16 *seq_id, u16 *hdr_off);
+void bnxt_ptp_update_current_time(struct bnxt *bp);
 void bnxt_ptp_pps_event(struct bnxt *bp, u32 data1, u32 data2);
 void bnxt_ptp_reapply_pps(struct bnxt *bp);
 int bnxt_hwtstamp_set(struct net_device *dev, struct ifreq *ifr);
 int bnxt_hwtstamp_get(struct net_device *dev, struct ifreq *ifr);
 int bnxt_get_tx_ts_p5(struct bnxt *bp, struct sk_buff *skb);
 int bnxt_get_rx_ts_p5(struct bnxt *bp, u64 *ts, u32 pkt_ts);
-int bnxt_ptp_init(struct bnxt *bp);
+void bnxt_ptp_rtc_timecounter_init(struct bnxt_ptp_cfg *ptp, u64 ns);
+int bnxt_ptp_init_rtc(struct bnxt *bp, bool phc_cfg);
+int bnxt_ptp_init(struct bnxt *bp, bool phc_cfg);
 void bnxt_ptp_clear(struct bnxt *bp);
 #endif
index 87f1056..cfe0911 100644 (file)
@@ -1368,7 +1368,7 @@ static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e)
        if (!p->eee_enabled) {
                bcmgenet_eee_enable_set(dev, false);
        } else {
-               ret = phy_init_eee(dev->phydev, 0);
+               ret = phy_init_eee(dev->phydev, false);
                if (ret) {
                        netif_err(priv, hw, dev, "EEE initialization failed\n");
                        return ret;
index 9ddbee7..f0a7d83 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/ptp_clock_kernel.h>
 #include <linux/net_tstamp.h>
 #include <linux/interrupt.h>
+#include <linux/phy/phy.h>
 
 #if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) || defined(CONFIG_MACB_USE_HWSTAMP)
 #define MACB_EXT_DESC
@@ -1291,6 +1292,9 @@ struct macb {
        u32                     wol;
 
        struct macb_ptp_info    *ptp_info;      /* macb-ptp interface */
+
+       struct phy              *sgmii_phy;     /* for ZynqMP SGMII mode */
+
 #ifdef MACB_EXT_DESC
        uint8_t hw_dma_cap;
 #endif
index a363da9..1ce20bf 100644 (file)
@@ -34,7 +34,9 @@
 #include <linux/udp.h>
 #include <linux/tcp.h>
 #include <linux/iopoll.h>
+#include <linux/phy/phy.h>
 #include <linux/pm_runtime.h>
+#include <linux/reset.h>
 #include "macb.h"
 
 /* This structure is only used for MACB on SiFive FU540 devices */
@@ -2739,10 +2741,14 @@ static int macb_open(struct net_device *dev)
 
        macb_init_hw(bp);
 
-       err = macb_phylink_connect(bp);
+       err = phy_power_on(bp->sgmii_phy);
        if (err)
                goto reset_hw;
 
+       err = macb_phylink_connect(bp);
+       if (err)
+               goto phy_off;
+
        netif_tx_start_all_queues(dev);
 
        if (bp->ptp_info)
@@ -2750,6 +2756,9 @@ static int macb_open(struct net_device *dev)
 
        return 0;
 
+phy_off:
+       phy_power_off(bp->sgmii_phy);
+
 reset_hw:
        macb_reset_hw(bp);
        for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue)
@@ -2775,6 +2784,8 @@ static int macb_close(struct net_device *dev)
        phylink_stop(bp->phylink);
        phylink_disconnect_phy(bp->phylink);
 
+       phy_power_off(bp->sgmii_phy);
+
        spin_lock_irqsave(&bp->lock, flags);
        macb_reset_hw(bp);
        netif_carrier_off(dev);
@@ -4544,13 +4555,55 @@ static const struct macb_config np4_config = {
        .usrio = &macb_default_usrio,
 };
 
+static int zynqmp_init(struct platform_device *pdev)
+{
+       struct net_device *dev = platform_get_drvdata(pdev);
+       struct macb *bp = netdev_priv(dev);
+       int ret;
+
+       if (bp->phy_interface == PHY_INTERFACE_MODE_SGMII) {
+               /* Ensure PS-GTR PHY device used in SGMII mode is ready */
+               bp->sgmii_phy = devm_phy_get(&pdev->dev, "sgmii-phy");
+
+               if (IS_ERR(bp->sgmii_phy)) {
+                       ret = PTR_ERR(bp->sgmii_phy);
+                       dev_err_probe(&pdev->dev, ret,
+                                     "failed to get PS-GTR PHY\n");
+                       return ret;
+               }
+
+               ret = phy_init(bp->sgmii_phy);
+               if (ret) {
+                       dev_err(&pdev->dev, "failed to init PS-GTR PHY: %d\n",
+                               ret);
+                       return ret;
+               }
+       }
+
+       /* Fully reset GEM controller at hardware level using zynqmp-reset driver,
+        * if mapped in device tree.
+        */
+       ret = device_reset_optional(&pdev->dev);
+       if (ret) {
+               dev_err_probe(&pdev->dev, ret, "failed to reset controller");
+               phy_exit(bp->sgmii_phy);
+               return ret;
+       }
+
+       ret = macb_init(pdev);
+       if (ret)
+               phy_exit(bp->sgmii_phy);
+
+       return ret;
+}
+
 static const struct macb_config zynqmp_config = {
        .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE |
                        MACB_CAPS_JUMBO |
                        MACB_CAPS_GEM_HAS_PTP | MACB_CAPS_BD_RD_PREFETCH,
        .dma_burst_length = 16,
        .clk_init = macb_clk_init,
-       .init = macb_init,
+       .init = zynqmp_init,
        .jumbo_max_len = 10240,
        .usrio = &macb_default_usrio,
 };
@@ -4767,7 +4820,7 @@ static int macb_probe(struct platform_device *pdev)
 
        err = macb_mii_init(bp);
        if (err)
-               goto err_out_free_netdev;
+               goto err_out_phy_exit;
 
        netif_carrier_off(dev);
 
@@ -4792,6 +4845,9 @@ err_out_unregister_mdio:
        mdiobus_unregister(bp->mii_bus);
        mdiobus_free(bp->mii_bus);
 
+err_out_phy_exit:
+       phy_exit(bp->sgmii_phy);
+
 err_out_free_netdev:
        free_netdev(dev);
 
@@ -4813,6 +4869,7 @@ static int macb_remove(struct platform_device *pdev)
 
        if (dev) {
                bp = netdev_priv(dev);
+               phy_exit(bp->sgmii_phy);
                mdiobus_unregister(bp->mii_bus);
                mdiobus_free(bp->mii_bus);
 
index 574a32f..2f6484d 100644 (file)
@@ -1409,7 +1409,8 @@ static acpi_status bgx_acpi_register_phy(acpi_handle handle,
        struct device *dev = &bgx->pdev->dev;
        struct acpi_device *adev;
 
-       if (acpi_bus_get_device(handle, &adev))
+       adev = acpi_fetch_acpi_dev(handle);
+       if (!adev)
                goto out;
 
        acpi_get_mac_address(dev, adev, bgx->lmac[bgx->acpi_lmac_idx].mac);
index c78b99a..8014eb3 100644 (file)
@@ -2363,11 +2363,13 @@ static void gemini_port_save_mac_addr(struct gemini_ethernet_port *port)
 static int gemini_ethernet_port_probe(struct platform_device *pdev)
 {
        char *port_names[2] = { "ethernet0", "ethernet1" };
+       struct device_node *np = pdev->dev.of_node;
        struct gemini_ethernet_port *port;
        struct device *dev = &pdev->dev;
        struct gemini_ethernet *geth;
        struct net_device *netdev;
        struct device *parent;
+       u8 mac[ETH_ALEN];
        unsigned int id;
        int irq;
        int ret;
@@ -2473,6 +2475,12 @@ static int gemini_ethernet_port_probe(struct platform_device *pdev)
        netif_napi_add(netdev, &port->napi, gmac_napi_poll,
                       DEFAULT_NAPI_WEIGHT);
 
+       ret = of_get_mac_address(np, mac);
+       if (!ret) {
+               dev_info(dev, "Setting macaddr from DT %pM\n", mac);
+               memcpy(port->mac_addr, mac, ETH_ALEN);
+       }
+
        if (is_valid_ether_addr((void *)port->mac_addr)) {
                eth_hw_addr_set(netdev, (u8 *)port->mac_addr);
        } else {
index 3fb39e3..653bde4 100644 (file)
@@ -21,7 +21,7 @@ void pnic_do_nway(struct net_device *dev)
        struct tulip_private *tp = netdev_priv(dev);
        void __iomem *ioaddr = tp->base_addr;
        u32 phy_reg = ioread32(ioaddr + 0xB8);
-       u32 new_csr6 = tp->csr6 & ~0x40C40200;
+       u32 new_csr6;
 
        if (phy_reg & 0x78000000) { /* Ignore baseT4 */
                if (phy_reg & 0x20000000)               dev->if_port = 5;
index 623d113..521f036 100644 (file)
@@ -100,6 +100,14 @@ static int dpaa2_mac_get_if_mode(struct fwnode_handle *dpmac_node,
        return err;
 }
 
+static struct phylink_pcs *dpaa2_mac_select_pcs(struct phylink_config *config,
+                                               phy_interface_t interface)
+{
+       struct dpaa2_mac *mac = phylink_to_dpaa2_mac(config);
+
+       return mac->pcs;
+}
+
 static void dpaa2_mac_config(struct phylink_config *config, unsigned int mode,
                             const struct phylink_link_state *state)
 {
@@ -172,6 +180,7 @@ static void dpaa2_mac_link_down(struct phylink_config *config,
 
 static const struct phylink_mac_ops dpaa2_mac_phylink_ops = {
        .validate = phylink_generic_validate,
+       .mac_select_pcs = dpaa2_mac_select_pcs,
        .mac_config = dpaa2_mac_config,
        .mac_link_up = dpaa2_mac_link_up,
        .mac_link_down = dpaa2_mac_link_down,
@@ -303,9 +312,6 @@ int dpaa2_mac_connect(struct dpaa2_mac *mac)
        }
        mac->phylink = phylink;
 
-       if (mac->pcs)
-               phylink_set_pcs(mac->phylink, mac->pcs);
-
        err = phylink_fwnode_phy_connect(mac->phylink, dpmac_node, 0);
        if (err) {
                netdev_err(net_dev, "phylink_fwnode_phy_connect() = %d\n", err);
index ed16a5a..a0c75c7 100644 (file)
@@ -934,18 +934,21 @@ static void enetc_mdiobus_destroy(struct enetc_pf *pf)
        enetc_imdio_remove(pf);
 }
 
+static struct phylink_pcs *
+enetc_pl_mac_select_pcs(struct phylink_config *config, phy_interface_t iface)
+{
+       struct enetc_pf *pf = phylink_to_enetc_pf(config);
+
+       return pf->pcs;
+}
+
 static void enetc_pl_mac_config(struct phylink_config *config,
                                unsigned int mode,
                                const struct phylink_link_state *state)
 {
        struct enetc_pf *pf = phylink_to_enetc_pf(config);
-       struct enetc_ndev_priv *priv;
 
        enetc_mac_config(&pf->si->hw, state->interface);
-
-       priv = netdev_priv(pf->si->ndev);
-       if (pf->pcs)
-               phylink_set_pcs(priv->phylink, pf->pcs);
 }
 
 static void enetc_force_rgmii_mac(struct enetc_hw *hw, int speed, int duplex)
@@ -1062,6 +1065,7 @@ static void enetc_pl_mac_link_down(struct phylink_config *config,
 
 static const struct phylink_mac_ops enetc_mac_phylink_ops = {
        .validate = phylink_generic_validate,
+       .mac_select_pcs = enetc_pl_mac_select_pcs,
        .mac_config = enetc_pl_mac_config,
        .mac_link_up = enetc_pl_mac_link_up,
        .mac_link_down = enetc_pl_mac_link_down,
index 796133d..11227f5 100644 (file)
@@ -2797,7 +2797,7 @@ static int fec_enet_eee_mode_set(struct net_device *ndev, bool enable)
        int ret = 0;
 
        if (enable) {
-               ret = phy_init_eee(ndev->phydev, 0);
+               ret = phy_init_eee(ndev->phydev, false);
                if (ret)
                        return ret;
 
index af99017..7d49c28 100644 (file)
@@ -101,7 +101,6 @@ static int fec_ptp_enable_pps(struct fec_enet_private *fep, uint enable)
        u32 val, tempval;
        struct timespec64 ts;
        u64 ns;
-       val = 0;
 
        if (fep->pps_enable == enable)
                return 0;
index 266e562..ef8058a 100644 (file)
@@ -14,6 +14,7 @@
 
 #include <linux/acpi.h>
 #include <linux/acpi_mdio.h>
+#include <linux/clk.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/mdio.h>
@@ -36,9 +37,10 @@ struct tgec_mdio_controller {
 } __packed;
 
 #define MDIO_STAT_ENC          BIT(6)
-#define MDIO_STAT_CLKDIV(x)    (((x>>1) & 0xff) << 8)
+#define MDIO_STAT_CLKDIV(x)    (((x) & 0x1ff) << 7)
 #define MDIO_STAT_BSY          BIT(0)
 #define MDIO_STAT_RD_ER                BIT(1)
+#define MDIO_STAT_PRE_DIS      BIT(5)
 #define MDIO_CTL_DEV_ADDR(x)   (x & 0x1f)
 #define MDIO_CTL_PORT_ADDR(x)  ((x & 0x1f) << 5)
 #define MDIO_CTL_PRE_DIS       BIT(10)
@@ -50,6 +52,8 @@ struct tgec_mdio_controller {
 
 struct mdio_fsl_priv {
        struct  tgec_mdio_controller __iomem *mdio_base;
+       struct  clk *enet_clk;
+       u32     mdc_freq;
        bool    is_little_endian;
        bool    has_a009885;
        bool    has_a011043;
@@ -254,6 +258,50 @@ irq_restore:
        return ret;
 }
 
+static int xgmac_mdio_set_mdc_freq(struct mii_bus *bus)
+{
+       struct mdio_fsl_priv *priv = (struct mdio_fsl_priv *)bus->priv;
+       struct tgec_mdio_controller __iomem *regs = priv->mdio_base;
+       struct device *dev = bus->parent;
+       u32 mdio_stat, div;
+
+       if (device_property_read_u32(dev, "clock-frequency", &priv->mdc_freq))
+               return 0;
+
+       priv->enet_clk = devm_clk_get(dev, NULL);
+       if (IS_ERR(priv->enet_clk)) {
+               dev_err(dev, "Input clock unknown, not changing MDC frequency");
+               return PTR_ERR(priv->enet_clk);
+       }
+
+       div = ((clk_get_rate(priv->enet_clk) / priv->mdc_freq) - 1) / 2;
+       if (div < 5 || div > 0x1ff) {
+               dev_err(dev, "Requested MDC frequency is out of range, ignoring");
+               return -EINVAL;
+       }
+
+       mdio_stat = xgmac_read32(&regs->mdio_stat, priv->is_little_endian);
+       mdio_stat &= ~MDIO_STAT_CLKDIV(0x1ff);
+       mdio_stat |= MDIO_STAT_CLKDIV(div);
+       xgmac_write32(mdio_stat, &regs->mdio_stat, priv->is_little_endian);
+       return 0;
+}
+
+static void xgmac_mdio_set_suppress_preamble(struct mii_bus *bus)
+{
+       struct mdio_fsl_priv *priv = (struct mdio_fsl_priv *)bus->priv;
+       struct tgec_mdio_controller __iomem *regs = priv->mdio_base;
+       struct device *dev = bus->parent;
+       u32 mdio_stat;
+
+       if (!device_property_read_bool(dev, "suppress-preamble"))
+               return;
+
+       mdio_stat = xgmac_read32(&regs->mdio_stat, priv->is_little_endian);
+       mdio_stat |= MDIO_STAT_PRE_DIS;
+       xgmac_write32(mdio_stat, &regs->mdio_stat, priv->is_little_endian);
+}
+
 static int xgmac_mdio_probe(struct platform_device *pdev)
 {
        struct fwnode_handle *fwnode;
@@ -273,7 +321,7 @@ static int xgmac_mdio_probe(struct platform_device *pdev)
                return -EINVAL;
        }
 
-       bus = mdiobus_alloc_size(sizeof(struct mdio_fsl_priv));
+       bus = devm_mdiobus_alloc_size(&pdev->dev, sizeof(struct mdio_fsl_priv));
        if (!bus)
                return -ENOMEM;
 
@@ -284,13 +332,11 @@ static int xgmac_mdio_probe(struct platform_device *pdev)
        bus->probe_capabilities = MDIOBUS_C22_C45;
        snprintf(bus->id, MII_BUS_ID_SIZE, "%pa", &res->start);
 
-       /* Set the PHY base address */
        priv = bus->priv;
-       priv->mdio_base = ioremap(res->start, resource_size(res));
-       if (!priv->mdio_base) {
-               ret = -ENOMEM;
-               goto err_ioremap;
-       }
+       priv->mdio_base = devm_ioremap(&pdev->dev, res->start,
+                                      resource_size(res));
+       if (!priv->mdio_base)
+               return -ENOMEM;
 
        /* For both ACPI and DT cases, endianness of MDIO controller
         * needs to be specified using "little-endian" property.
@@ -303,6 +349,12 @@ static int xgmac_mdio_probe(struct platform_device *pdev)
        priv->has_a011043 = device_property_read_bool(&pdev->dev,
                                                      "fsl,erratum-a011043");
 
+       xgmac_mdio_set_suppress_preamble(bus);
+
+       ret = xgmac_mdio_set_mdc_freq(bus);
+       if (ret)
+               return ret;
+
        fwnode = pdev->dev.fwnode;
        if (is_of_node(fwnode))
                ret = of_mdiobus_register(bus, to_of_node(fwnode));
@@ -312,32 +364,12 @@ static int xgmac_mdio_probe(struct platform_device *pdev)
                ret = -EINVAL;
        if (ret) {
                dev_err(&pdev->dev, "cannot register MDIO bus\n");
-               goto err_registration;
+               return ret;
        }
 
        platform_set_drvdata(pdev, bus);
 
        return 0;
-
-err_registration:
-       iounmap(priv->mdio_base);
-
-err_ioremap:
-       mdiobus_free(bus);
-
-       return ret;
-}
-
-static int xgmac_mdio_remove(struct platform_device *pdev)
-{
-       struct mii_bus *bus = platform_get_drvdata(pdev);
-       struct mdio_fsl_priv *priv = bus->priv;
-
-       mdiobus_unregister(bus);
-       iounmap(priv->mdio_base);
-       mdiobus_free(bus);
-
-       return 0;
 }
 
 static const struct of_device_id xgmac_mdio_match[] = {
@@ -364,7 +396,6 @@ static struct platform_driver xgmac_mdio_driver = {
                .acpi_match_table = xgmac_acpi_match,
        },
        .probe = xgmac_mdio_probe,
-       .remove = xgmac_mdio_remove,
 };
 
 module_platform_driver(xgmac_mdio_driver);
index a42aeb5..6fb3437 100644 (file)
@@ -7388,9 +7388,9 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        resource_size_t flash_start, flash_len;
        static int cards_found;
        u16 aspm_disable_flag = 0;
-       int bars, i, err, pci_using_dac;
        u16 eeprom_data = 0;
        u16 eeprom_apme_mask = E1000_EEPROM_APME;
+       int bars, i, err;
        s32 ret_val = 0;
 
        if (ei->flags2 & FLAG2_DISABLE_ASPM_L0S)
@@ -7404,17 +7404,11 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        if (err)
                return err;
 
-       pci_using_dac = 0;
        err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
-       if (!err) {
-               pci_using_dac = 1;
-       } else {
-               err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
-               if (err) {
-                       dev_err(&pdev->dev,
-                               "No usable DMA configuration, aborting\n");
-                       goto err_dma;
-               }
+       if (err) {
+               dev_err(&pdev->dev,
+                       "No usable DMA configuration, aborting\n");
+               goto err_dma;
        }
 
        bars = pci_select_bars(pdev, IORESOURCE_MEM);
@@ -7550,10 +7544,8 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
        netdev->priv_flags |= IFF_UNICAST_FLT;
 
-       if (pci_using_dac) {
-               netdev->features |= NETIF_F_HIGHDMA;
-               netdev->vlan_features |= NETIF_F_HIGHDMA;
-       }
+       netdev->features |= NETIF_F_HIGHDMA;
+       netdev->vlan_features |= NETIF_F_HIGHDMA;
 
        /* MTU range: 68 - max_hw_frame_size */
        netdev->min_mtu = ETH_MIN_MTU;
index 0c4b7df..f531bc1 100644 (file)
@@ -15341,12 +15341,9 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        /* set up for high or low dma */
        err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
        if (err) {
-               err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
-               if (err) {
-                       dev_err(&pdev->dev,
-                               "DMA configuration failed: 0x%x\n", err);
-                       goto err_dma;
-               }
+               dev_err(&pdev->dev,
+                       "DMA configuration failed: 0x%x\n", err);
+               goto err_dma;
        }
 
        /* set up pci connections */
index 945b1bb..67e9844 100644 (file)
@@ -241,21 +241,25 @@ bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 count)
 static struct sk_buff *i40e_construct_skb_zc(struct i40e_ring *rx_ring,
                                             struct xdp_buff *xdp)
 {
+       unsigned int totalsize = xdp->data_end - xdp->data_meta;
        unsigned int metasize = xdp->data - xdp->data_meta;
-       unsigned int datasize = xdp->data_end - xdp->data;
        struct sk_buff *skb;
 
+       net_prefetch(xdp->data_meta);
+
        /* allocate a skb to store the frags */
-       skb = __napi_alloc_skb(&rx_ring->q_vector->napi,
-                              xdp->data_end - xdp->data_hard_start,
+       skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize,
                               GFP_ATOMIC | __GFP_NOWARN);
        if (unlikely(!skb))
                goto out;
 
-       skb_reserve(skb, xdp->data - xdp->data_hard_start);
-       memcpy(__skb_put(skb, datasize), xdp->data, datasize);
-       if (metasize)
+       memcpy(__skb_put(skb, totalsize), xdp->data_meta,
+              ALIGN(totalsize, sizeof(long)));
+
+       if (metasize) {
                skb_metadata_set(skb, metasize);
+               __skb_pull(skb, metasize);
+       }
 
 out:
        xsk_buff_free(xdp);
index 8125b91..b0bd95c 100644 (file)
@@ -4368,12 +4368,9 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
        err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
        if (err) {
-               err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
-               if (err) {
-                       dev_err(&pdev->dev,
-                               "DMA configuration failed: 0x%x\n", err);
-                       goto err_dma;
-               }
+               dev_err(&pdev->dev,
+                       "DMA configuration failed: 0x%x\n", err);
+               goto err_dma;
        }
 
        err = pci_request_regions(pdev, iavf_driver_name);
index 3081443..f46af3b 100644 (file)
@@ -4459,8 +4459,6 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
 
        /* set up for high or low DMA */
        err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
-       if (err)
-               err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
        if (err) {
                dev_err(dev, "DMA configuration failed: 0x%x\n", err);
                return err;
index 3e38695..c2258be 100644 (file)
@@ -983,15 +983,17 @@ static struct sk_buff *
 ice_construct_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf,
                  struct xdp_buff *xdp)
 {
+       unsigned int metasize = xdp->data - xdp->data_meta;
        unsigned int size = xdp->data_end - xdp->data;
        unsigned int headlen;
        struct sk_buff *skb;
 
        /* prefetch first cache line of first page */
-       net_prefetch(xdp->data);
+       net_prefetch(xdp->data_meta);
 
        /* allocate a skb to store the frags */
-       skb = __napi_alloc_skb(&rx_ring->q_vector->napi, ICE_RX_HDR_SIZE,
+       skb = __napi_alloc_skb(&rx_ring->q_vector->napi,
+                              ICE_RX_HDR_SIZE + metasize,
                               GFP_ATOMIC | __GFP_NOWARN);
        if (unlikely(!skb))
                return NULL;
@@ -1003,8 +1005,13 @@ ice_construct_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf,
                headlen = eth_get_headlen(skb->dev, xdp->data, ICE_RX_HDR_SIZE);
 
        /* align pull length to size of long to optimize memcpy performance */
-       memcpy(__skb_put(skb, headlen), xdp->data, ALIGN(headlen,
-                                                        sizeof(long)));
+       memcpy(__skb_put(skb, headlen + metasize), xdp->data_meta,
+              ALIGN(headlen + metasize, sizeof(long)));
+
+       if (metasize) {
+               skb_metadata_set(skb, metasize);
+               __skb_pull(skb, metasize);
+       }
 
        /* if we exhaust the linear part then add what is left as a frag */
        size -= headlen;
index 2388837..feb874b 100644 (file)
@@ -428,20 +428,24 @@ static void ice_bump_ntc(struct ice_rx_ring *rx_ring)
 static struct sk_buff *
 ice_construct_skb_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp)
 {
-       unsigned int datasize_hard = xdp->data_end - xdp->data_hard_start;
+       unsigned int totalsize = xdp->data_end - xdp->data_meta;
        unsigned int metasize = xdp->data - xdp->data_meta;
-       unsigned int datasize = xdp->data_end - xdp->data;
        struct sk_buff *skb;
 
-       skb = __napi_alloc_skb(&rx_ring->q_vector->napi, datasize_hard,
+       net_prefetch(xdp->data_meta);
+
+       skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize,
                               GFP_ATOMIC | __GFP_NOWARN);
        if (unlikely(!skb))
                return NULL;
 
-       skb_reserve(skb, xdp->data - xdp->data_hard_start);
-       memcpy(__skb_put(skb, datasize), xdp->data, datasize);
-       if (metasize)
+       memcpy(__skb_put(skb, totalsize), xdp->data_meta,
+              ALIGN(totalsize, sizeof(long)));
+
+       if (metasize) {
                skb_metadata_set(skb, metasize);
+               __skb_pull(skb, metasize);
+       }
 
        xsk_buff_free(xdp);
        return skb;
index 38ba920..bfa321e 100644 (file)
@@ -3164,8 +3164,8 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        s32 ret_val;
        static int global_quad_port_a; /* global quad port a indication */
        const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
-       int err, pci_using_dac;
        u8 part_str[E1000_PBANUM_LENGTH];
+       int err;
 
        /* Catch broken hardware that put the wrong VF device ID in
         * the PCIe SR-IOV capability.
@@ -3180,17 +3180,11 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        if (err)
                return err;
 
-       pci_using_dac = 0;
        err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
-       if (!err) {
-               pci_using_dac = 1;
-       } else {
-               err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
-               if (err) {
-                       dev_err(&pdev->dev,
-                               "No usable DMA configuration, aborting\n");
-                       goto err_dma;
-               }
+       if (err) {
+               dev_err(&pdev->dev,
+                       "No usable DMA configuration, aborting\n");
+               goto err_dma;
        }
 
        err = pci_request_mem_regions(pdev, igb_driver_name);
@@ -3306,8 +3300,7 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        if (hw->mac.type >= e1000_i350)
                netdev->hw_features |= NETIF_F_NTUPLE;
 
-       if (pci_using_dac)
-               netdev->features |= NETIF_F_HIGHDMA;
+       netdev->features |= NETIF_F_HIGHDMA;
 
        netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID;
        netdev->mpls_features |= NETIF_F_HW_CSUM;
index b784072..43ced78 100644 (file)
@@ -2684,25 +2684,18 @@ static int igbvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        struct igbvf_adapter *adapter;
        struct e1000_hw *hw;
        const struct igbvf_info *ei = igbvf_info_tbl[ent->driver_data];
-
        static int cards_found;
-       int err, pci_using_dac;
+       int err;
 
        err = pci_enable_device_mem(pdev);
        if (err)
                return err;
 
-       pci_using_dac = 0;
        err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
-       if (!err) {
-               pci_using_dac = 1;
-       } else {
-               err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
-               if (err) {
-                       dev_err(&pdev->dev,
-                               "No usable DMA configuration, aborting\n");
-                       goto err_dma;
-               }
+       if (err) {
+               dev_err(&pdev->dev,
+                       "No usable DMA configuration, aborting\n");
+               goto err_dma;
        }
 
        err = pci_request_regions(pdev, igbvf_driver_name);
@@ -2783,10 +2776,7 @@ static int igbvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        netdev->hw_features |= NETIF_F_GSO_PARTIAL |
                               IGBVF_GSO_PARTIAL_FEATURES;
 
-       netdev->features = netdev->hw_features;
-
-       if (pci_using_dac)
-               netdev->features |= NETIF_F_HIGHDMA;
+       netdev->features = netdev->hw_features | NETIF_F_HIGHDMA;
 
        netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID;
        netdev->mpls_features |= NETIF_F_HW_CSUM;
index 2f17f36..b965fb8 100644 (file)
@@ -2446,19 +2446,20 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
 static struct sk_buff *igc_construct_skb_zc(struct igc_ring *ring,
                                            struct xdp_buff *xdp)
 {
+       unsigned int totalsize = xdp->data_end - xdp->data_meta;
        unsigned int metasize = xdp->data - xdp->data_meta;
-       unsigned int datasize = xdp->data_end - xdp->data;
-       unsigned int totalsize = metasize + datasize;
        struct sk_buff *skb;
 
-       skb = __napi_alloc_skb(&ring->q_vector->napi,
-                              xdp->data_end - xdp->data_hard_start,
+       net_prefetch(xdp->data_meta);
+
+       skb = __napi_alloc_skb(&ring->q_vector->napi, totalsize,
                               GFP_ATOMIC | __GFP_NOWARN);
        if (unlikely(!skb))
                return NULL;
 
-       skb_reserve(skb, xdp->data_meta - xdp->data_hard_start);
-       memcpy(__skb_put(skb, totalsize), xdp->data_meta, totalsize);
+       memcpy(__skb_put(skb, totalsize), xdp->data_meta,
+              ALIGN(totalsize, sizeof(long)));
+
        if (metasize) {
                skb_metadata_set(skb, metasize);
                __skb_pull(skb, metasize);
@@ -6251,23 +6252,17 @@ static int igc_probe(struct pci_dev *pdev,
        struct net_device *netdev;
        struct igc_hw *hw;
        const struct igc_info *ei = igc_info_tbl[ent->driver_data];
-       int err, pci_using_dac;
+       int err;
 
        err = pci_enable_device_mem(pdev);
        if (err)
                return err;
 
-       pci_using_dac = 0;
        err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
-       if (!err) {
-               pci_using_dac = 1;
-       } else {
-               err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
-               if (err) {
-                       dev_err(&pdev->dev,
-                               "No usable DMA configuration, aborting\n");
-                       goto err_dma;
-               }
+       if (err) {
+               dev_err(&pdev->dev,
+                       "No usable DMA configuration, aborting\n");
+               goto err_dma;
        }
 
        err = pci_request_mem_regions(pdev, igc_driver_name);
@@ -6367,8 +6362,7 @@ static int igc_probe(struct pci_dev *pdev,
        netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX;
        netdev->hw_features |= netdev->features;
 
-       if (pci_using_dac)
-               netdev->features |= NETIF_F_HIGHDMA;
+       netdev->features |= NETIF_F_HIGHDMA;
 
        netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID;
        netdev->mpls_features |= NETIF_F_HW_CSUM;
index 99d4819..affdefc 100644 (file)
@@ -361,7 +361,6 @@ ixgb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        struct net_device *netdev = NULL;
        struct ixgb_adapter *adapter;
        static int cards_found = 0;
-       int pci_using_dac;
        u8 addr[ETH_ALEN];
        int i;
        int err;
@@ -370,16 +369,10 @@ ixgb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        if (err)
                return err;
 
-       pci_using_dac = 0;
        err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
-       if (!err) {
-               pci_using_dac = 1;
-       } else {
-               err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
-               if (err) {
-                       pr_err("No usable DMA configuration, aborting\n");
-                       goto err_dma_mask;
-               }
+       if (err) {
+               pr_err("No usable DMA configuration, aborting\n");
+               goto err_dma_mask;
        }
 
        err = pci_request_regions(pdev, ixgb_driver_name);
@@ -444,10 +437,8 @@ ixgb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                           NETIF_F_HW_VLAN_CTAG_FILTER;
        netdev->hw_features |= NETIF_F_RXCSUM;
 
-       if (pci_using_dac) {
-               netdev->features |= NETIF_F_HIGHDMA;
-               netdev->vlan_features |= NETIF_F_HIGHDMA;
-       }
+       netdev->features |= NETIF_F_HIGHDMA;
+       netdev->vlan_features |= NETIF_F_HIGHDMA;
 
        /* MTU range: 68 - 16114 */
        netdev->min_mtu = ETH_MIN_MTU;
index 89b4670..2c8a4a0 100644 (file)
@@ -10632,9 +10632,9 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        struct ixgbe_adapter *adapter = NULL;
        struct ixgbe_hw *hw;
        const struct ixgbe_info *ii = ixgbe_info_tbl[ent->driver_data];
-       int i, err, pci_using_dac, expected_gts;
        unsigned int indices = MAX_TX_QUEUES;
        u8 part_str[IXGBE_PBANUM_LENGTH];
+       int i, err, expected_gts;
        bool disable_dev = false;
 #ifdef IXGBE_FCOE
        u16 device_caps;
@@ -10654,16 +10654,11 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        if (err)
                return err;
 
-       if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) {
-               pci_using_dac = 1;
-       } else {
-               err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
-               if (err) {
-                       dev_err(&pdev->dev,
-                               "No usable DMA configuration, aborting\n");
-                       goto err_dma;
-               }
-               pci_using_dac = 0;
+       err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+       if (err) {
+               dev_err(&pdev->dev,
+                       "No usable DMA configuration, aborting\n");
+               goto err_dma;
        }
 
        err = pci_request_mem_regions(pdev, ixgbe_driver_name);
@@ -10861,8 +10856,7 @@ skip_sriov:
                netdev->hw_features |= NETIF_F_NTUPLE |
                                       NETIF_F_HW_TC;
 
-       if (pci_using_dac)
-               netdev->features |= NETIF_F_HIGHDMA;
+       netdev->features |= NETIF_F_HIGHDMA;
 
        netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID;
        netdev->hw_enc_features |= netdev->vlan_features;
index b3fd8e5..ee28929 100644 (file)
@@ -207,26 +207,28 @@ bool ixgbe_alloc_rx_buffers_zc(struct ixgbe_ring *rx_ring, u16 count)
 }
 
 static struct sk_buff *ixgbe_construct_skb_zc(struct ixgbe_ring *rx_ring,
-                                             struct ixgbe_rx_buffer *bi)
+                                             const struct xdp_buff *xdp)
 {
-       unsigned int metasize = bi->xdp->data - bi->xdp->data_meta;
-       unsigned int datasize = bi->xdp->data_end - bi->xdp->data;
+       unsigned int totalsize = xdp->data_end - xdp->data_meta;
+       unsigned int metasize = xdp->data - xdp->data_meta;
        struct sk_buff *skb;
 
+       net_prefetch(xdp->data_meta);
+
        /* allocate a skb to store the frags */
-       skb = __napi_alloc_skb(&rx_ring->q_vector->napi,
-                              bi->xdp->data_end - bi->xdp->data_hard_start,
+       skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize,
                               GFP_ATOMIC | __GFP_NOWARN);
        if (unlikely(!skb))
                return NULL;
 
-       skb_reserve(skb, bi->xdp->data - bi->xdp->data_hard_start);
-       memcpy(__skb_put(skb, datasize), bi->xdp->data, datasize);
-       if (metasize)
+       memcpy(__skb_put(skb, totalsize), xdp->data_meta,
+              ALIGN(totalsize, sizeof(long)));
+
+       if (metasize) {
                skb_metadata_set(skb, metasize);
+               __skb_pull(skb, metasize);
+       }
 
-       xsk_buff_free(bi->xdp);
-       bi->xdp = NULL;
        return skb;
 }
 
@@ -317,12 +319,15 @@ int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector,
                }
 
                /* XDP_PASS path */
-               skb = ixgbe_construct_skb_zc(rx_ring, bi);
+               skb = ixgbe_construct_skb_zc(rx_ring, bi->xdp);
                if (!skb) {
                        rx_ring->rx_stats.alloc_rx_buff_failed++;
                        break;
                }
 
+               xsk_buff_free(bi->xdp);
+               bi->xdp = NULL;
+
                cleaned_count++;
                ixgbe_inc_ntc(rx_ring);
 
index 0015fcf..7c33be9 100644 (file)
@@ -4511,22 +4511,17 @@ static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        struct ixgbevf_adapter *adapter = NULL;
        struct ixgbe_hw *hw = NULL;
        const struct ixgbevf_info *ii = ixgbevf_info_tbl[ent->driver_data];
-       int err, pci_using_dac;
        bool disable_dev = false;
+       int err;
 
        err = pci_enable_device(pdev);
        if (err)
                return err;
 
-       if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) {
-               pci_using_dac = 1;
-       } else {
-               err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
-               if (err) {
-                       dev_err(&pdev->dev, "No usable DMA configuration, aborting\n");
-                       goto err_dma;
-               }
-               pci_using_dac = 0;
+       err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+       if (err) {
+               dev_err(&pdev->dev, "No usable DMA configuration, aborting\n");
+               goto err_dma;
        }
 
        err = pci_request_regions(pdev, ixgbevf_driver_name);
@@ -4606,10 +4601,7 @@ static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        netdev->hw_features |= NETIF_F_GSO_PARTIAL |
                               IXGBEVF_GSO_PARTIAL_FEATURES;
 
-       netdev->features = netdev->hw_features;
-
-       if (pci_using_dac)
-               netdev->features |= NETIF_F_HIGHDMA;
+       netdev->features = netdev->hw_features | NETIF_F_HIGHDMA;
 
        netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID;
        netdev->mpls_features |= NETIF_F_SG |
index 83c8908..f1335a1 100644 (file)
@@ -1884,8 +1884,8 @@ static void mvneta_txq_bufs_free(struct mvneta_port *pp,
                        bytes_compl += buf->skb->len;
                        pkts_compl++;
                        dev_kfree_skb_any(buf->skb);
-               } else if (buf->type == MVNETA_TYPE_XDP_TX ||
-                          buf->type == MVNETA_TYPE_XDP_NDO) {
+               } else if ((buf->type == MVNETA_TYPE_XDP_TX ||
+                           buf->type == MVNETA_TYPE_XDP_NDO) && buf->xdpf) {
                        if (napi && buf->type == MVNETA_TYPE_XDP_TX)
                                xdp_return_frame_rx_napi(buf->xdpf);
                        else
@@ -2060,61 +2060,104 @@ int mvneta_rx_refill_queue(struct mvneta_port *pp, struct mvneta_rx_queue *rxq)
 
 static void
 mvneta_xdp_put_buff(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
-                   struct xdp_buff *xdp, struct skb_shared_info *sinfo,
-                   int sync_len)
+                   struct xdp_buff *xdp, int sync_len)
 {
+       struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
        int i;
 
+       if (likely(!xdp_buff_has_frags(xdp)))
+               goto out;
+
        for (i = 0; i < sinfo->nr_frags; i++)
                page_pool_put_full_page(rxq->page_pool,
                                        skb_frag_page(&sinfo->frags[i]), true);
+
+out:
        page_pool_put_page(rxq->page_pool, virt_to_head_page(xdp->data),
                           sync_len, true);
 }
 
 static int
 mvneta_xdp_submit_frame(struct mvneta_port *pp, struct mvneta_tx_queue *txq,
-                       struct xdp_frame *xdpf, bool dma_map)
+                       struct xdp_frame *xdpf, int *nxmit_byte, bool dma_map)
 {
+       struct skb_shared_info *sinfo = xdp_get_shared_info_from_frame(xdpf);
+       struct device *dev = pp->dev->dev.parent;
        struct mvneta_tx_desc *tx_desc;
-       struct mvneta_tx_buf *buf;
-       dma_addr_t dma_addr;
+       int i, num_frames = 1;
+       struct page *page;
 
-       if (txq->count >= txq->tx_stop_threshold)
+       if (unlikely(xdp_frame_has_frags(xdpf)))
+               num_frames += sinfo->nr_frags;
+
+       if (txq->count + num_frames >= txq->size)
                return MVNETA_XDP_DROPPED;
 
-       tx_desc = mvneta_txq_next_desc_get(txq);
+       for (i = 0; i < num_frames; i++) {
+               struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index];
+               skb_frag_t *frag = NULL;
+               int len = xdpf->len;
+               dma_addr_t dma_addr;
 
-       buf = &txq->buf[txq->txq_put_index];
-       if (dma_map) {
-               /* ndo_xdp_xmit */
-               dma_addr = dma_map_single(pp->dev->dev.parent, xdpf->data,
-                                         xdpf->len, DMA_TO_DEVICE);
-               if (dma_mapping_error(pp->dev->dev.parent, dma_addr)) {
-                       mvneta_txq_desc_put(txq);
-                       return MVNETA_XDP_DROPPED;
+               if (unlikely(i)) { /* paged area */
+                       frag = &sinfo->frags[i - 1];
+                       len = skb_frag_size(frag);
                }
-               buf->type = MVNETA_TYPE_XDP_NDO;
-       } else {
-               struct page *page = virt_to_page(xdpf->data);
 
-               dma_addr = page_pool_get_dma_addr(page) +
-                          sizeof(*xdpf) + xdpf->headroom;
-               dma_sync_single_for_device(pp->dev->dev.parent, dma_addr,
-                                          xdpf->len, DMA_BIDIRECTIONAL);
-               buf->type = MVNETA_TYPE_XDP_TX;
-       }
-       buf->xdpf = xdpf;
+               tx_desc = mvneta_txq_next_desc_get(txq);
+               if (dma_map) {
+                       /* ndo_xdp_xmit */
+                       void *data;
+
+                       data = unlikely(frag) ? skb_frag_address(frag)
+                                             : xdpf->data;
+                       dma_addr = dma_map_single(dev, data, len,
+                                                 DMA_TO_DEVICE);
+                       if (dma_mapping_error(dev, dma_addr)) {
+                               mvneta_txq_desc_put(txq);
+                               goto unmap;
+                       }
+
+                       buf->type = MVNETA_TYPE_XDP_NDO;
+               } else {
+                       page = unlikely(frag) ? skb_frag_page(frag)
+                                             : virt_to_page(xdpf->data);
+                       dma_addr = page_pool_get_dma_addr(page);
+                       if (unlikely(frag))
+                               dma_addr += skb_frag_off(frag);
+                       else
+                               dma_addr += sizeof(*xdpf) + xdpf->headroom;
+                       dma_sync_single_for_device(dev, dma_addr, len,
+                                                  DMA_BIDIRECTIONAL);
+                       buf->type = MVNETA_TYPE_XDP_TX;
+               }
+               buf->xdpf = unlikely(i) ? NULL : xdpf;
 
-       tx_desc->command = MVNETA_TXD_FLZ_DESC;
-       tx_desc->buf_phys_addr = dma_addr;
-       tx_desc->data_size = xdpf->len;
+               tx_desc->command = unlikely(i) ? 0 : MVNETA_TXD_F_DESC;
+               tx_desc->buf_phys_addr = dma_addr;
+               tx_desc->data_size = len;
+               *nxmit_byte += len;
 
-       mvneta_txq_inc_put(txq);
-       txq->pending++;
-       txq->count++;
+               mvneta_txq_inc_put(txq);
+       }
+       /*last descriptor */
+       tx_desc->command |= MVNETA_TXD_L_DESC | MVNETA_TXD_Z_PAD;
+
+       txq->pending += num_frames;
+       txq->count += num_frames;
 
        return MVNETA_XDP_TX;
+
+unmap:
+       for (i--; i >= 0; i--) {
+               mvneta_txq_desc_put(txq);
+               tx_desc = txq->descs + txq->next_desc_to_proc;
+               dma_unmap_single(dev, tx_desc->buf_phys_addr,
+                                tx_desc->data_size,
+                                DMA_TO_DEVICE);
+       }
+
+       return MVNETA_XDP_DROPPED;
 }
 
 static int
@@ -2123,8 +2166,8 @@ mvneta_xdp_xmit_back(struct mvneta_port *pp, struct xdp_buff *xdp)
        struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
        struct mvneta_tx_queue *txq;
        struct netdev_queue *nq;
+       int cpu, nxmit_byte = 0;
        struct xdp_frame *xdpf;
-       int cpu;
        u32 ret;
 
        xdpf = xdp_convert_buff_to_frame(xdp);
@@ -2136,10 +2179,10 @@ mvneta_xdp_xmit_back(struct mvneta_port *pp, struct xdp_buff *xdp)
        nq = netdev_get_tx_queue(pp->dev, txq->id);
 
        __netif_tx_lock(nq, cpu);
-       ret = mvneta_xdp_submit_frame(pp, txq, xdpf, false);
+       ret = mvneta_xdp_submit_frame(pp, txq, xdpf, &nxmit_byte, false);
        if (ret == MVNETA_XDP_TX) {
                u64_stats_update_begin(&stats->syncp);
-               stats->es.ps.tx_bytes += xdpf->len;
+               stats->es.ps.tx_bytes += nxmit_byte;
                stats->es.ps.tx_packets++;
                stats->es.ps.xdp_tx++;
                u64_stats_update_end(&stats->syncp);
@@ -2178,11 +2221,11 @@ mvneta_xdp_xmit(struct net_device *dev, int num_frame,
 
        __netif_tx_lock(nq, cpu);
        for (i = 0; i < num_frame; i++) {
-               ret = mvneta_xdp_submit_frame(pp, txq, frames[i], true);
+               ret = mvneta_xdp_submit_frame(pp, txq, frames[i], &nxmit_byte,
+                                             true);
                if (ret != MVNETA_XDP_TX)
                        break;
 
-               nxmit_byte += frames[i]->len;
                nxmit++;
        }
 
@@ -2205,7 +2248,6 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
               struct bpf_prog *prog, struct xdp_buff *xdp,
               u32 frame_sz, struct mvneta_stats *stats)
 {
-       struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
        unsigned int len, data_len, sync;
        u32 ret, act;
 
@@ -2226,7 +2268,7 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
 
                err = xdp_do_redirect(pp->dev, xdp, prog);
                if (unlikely(err)) {
-                       mvneta_xdp_put_buff(pp, rxq, xdp, sinfo, sync);
+                       mvneta_xdp_put_buff(pp, rxq, xdp, sync);
                        ret = MVNETA_XDP_DROPPED;
                } else {
                        ret = MVNETA_XDP_REDIR;
@@ -2237,7 +2279,7 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
        case XDP_TX:
                ret = mvneta_xdp_xmit_back(pp, xdp);
                if (ret != MVNETA_XDP_TX)
-                       mvneta_xdp_put_buff(pp, rxq, xdp, sinfo, sync);
+                       mvneta_xdp_put_buff(pp, rxq, xdp, sync);
                break;
        default:
                bpf_warn_invalid_xdp_action(pp->dev, prog, act);
@@ -2246,7 +2288,7 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
                trace_xdp_exception(pp->dev, prog, act);
                fallthrough;
        case XDP_DROP:
-               mvneta_xdp_put_buff(pp, rxq, xdp, sinfo, sync);
+               mvneta_xdp_put_buff(pp, rxq, xdp, sync);
                ret = MVNETA_XDP_DROPPED;
                stats->xdp_drop++;
                break;
@@ -2269,7 +2311,6 @@ mvneta_swbm_rx_frame(struct mvneta_port *pp,
        int data_len = -MVNETA_MH_SIZE, len;
        struct net_device *dev = pp->dev;
        enum dma_data_direction dma_dir;
-       struct skb_shared_info *sinfo;
 
        if (*size > MVNETA_MAX_RX_BUF_SIZE) {
                len = MVNETA_MAX_RX_BUF_SIZE;
@@ -2289,11 +2330,9 @@ mvneta_swbm_rx_frame(struct mvneta_port *pp,
 
        /* Prefetch header */
        prefetch(data);
+       xdp_buff_clear_frags_flag(xdp);
        xdp_prepare_buff(xdp, data, pp->rx_offset_correction + MVNETA_MH_SIZE,
                         data_len, false);
-
-       sinfo = xdp_get_shared_info_from_buff(xdp);
-       sinfo->nr_frags = 0;
 }
 
 static void
@@ -2301,9 +2340,9 @@ mvneta_swbm_add_rx_fragment(struct mvneta_port *pp,
                            struct mvneta_rx_desc *rx_desc,
                            struct mvneta_rx_queue *rxq,
                            struct xdp_buff *xdp, int *size,
-                           struct skb_shared_info *xdp_sinfo,
                            struct page *page)
 {
+       struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
        struct net_device *dev = pp->dev;
        enum dma_data_direction dma_dir;
        int data_len, len;
@@ -2321,25 +2360,25 @@ mvneta_swbm_add_rx_fragment(struct mvneta_port *pp,
                                len, dma_dir);
        rx_desc->buf_phys_addr = 0;
 
-       if (data_len > 0 && xdp_sinfo->nr_frags < MAX_SKB_FRAGS) {
-               skb_frag_t *frag = &xdp_sinfo->frags[xdp_sinfo->nr_frags++];
+       if (!xdp_buff_has_frags(xdp))
+               sinfo->nr_frags = 0;
+
+       if (data_len > 0 && sinfo->nr_frags < MAX_SKB_FRAGS) {
+               skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags++];
 
                skb_frag_off_set(frag, pp->rx_offset_correction);
                skb_frag_size_set(frag, data_len);
                __skb_frag_set_page(frag, page);
+
+               if (!xdp_buff_has_frags(xdp)) {
+                       sinfo->xdp_frags_size = *size;
+                       xdp_buff_set_frags_flag(xdp);
+               }
+               if (page_is_pfmemalloc(page))
+                       xdp_buff_set_frag_pfmemalloc(xdp);
        } else {
                page_pool_put_full_page(rxq->page_pool, page, true);
        }
-
-       /* last fragment */
-       if (len == *size) {
-               struct skb_shared_info *sinfo;
-
-               sinfo = xdp_get_shared_info_from_buff(xdp);
-               sinfo->nr_frags = xdp_sinfo->nr_frags;
-               memcpy(sinfo->frags, xdp_sinfo->frags,
-                      sinfo->nr_frags * sizeof(skb_frag_t));
-       }
        *size -= len;
 }
 
@@ -2348,8 +2387,11 @@ mvneta_swbm_build_skb(struct mvneta_port *pp, struct page_pool *pool,
                      struct xdp_buff *xdp, u32 desc_status)
 {
        struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
-       int i, num_frags = sinfo->nr_frags;
        struct sk_buff *skb;
+       u8 num_frags;
+
+       if (unlikely(xdp_buff_has_frags(xdp)))
+               num_frags = sinfo->nr_frags;
 
        skb = build_skb(xdp->data_hard_start, PAGE_SIZE);
        if (!skb)
@@ -2361,13 +2403,11 @@ mvneta_swbm_build_skb(struct mvneta_port *pp, struct page_pool *pool,
        skb_put(skb, xdp->data_end - xdp->data);
        skb->ip_summed = mvneta_rx_csum(pp, desc_status);
 
-       for (i = 0; i < num_frags; i++) {
-               skb_frag_t *frag = &sinfo->frags[i];
-
-               skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
-                               skb_frag_page(frag), skb_frag_off(frag),
-                               skb_frag_size(frag), PAGE_SIZE);
-       }
+       if (unlikely(xdp_buff_has_frags(xdp)))
+               xdp_update_skb_shared_info(skb, num_frags,
+                                          sinfo->xdp_frags_size,
+                                          num_frags * xdp->frame_sz,
+                                          xdp_buff_is_frag_pfmemalloc(xdp));
 
        return skb;
 }
@@ -2379,7 +2419,6 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
 {
        int rx_proc = 0, rx_todo, refill, size = 0;
        struct net_device *dev = pp->dev;
-       struct skb_shared_info sinfo;
        struct mvneta_stats ps = {};
        struct bpf_prog *xdp_prog;
        u32 desc_status, frame_sz;
@@ -2388,8 +2427,6 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
        xdp_init_buff(&xdp_buf, PAGE_SIZE, &rxq->xdp_rxq);
        xdp_buf.data_hard_start = NULL;
 
-       sinfo.nr_frags = 0;
-
        /* Get number of received packets */
        rx_todo = mvneta_rxq_busy_desc_num_get(pp, rxq);
 
@@ -2431,7 +2468,7 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
                        }
 
                        mvneta_swbm_add_rx_fragment(pp, rx_desc, rxq, &xdp_buf,
-                                                   &size, &sinfo, page);
+                                                   &size, page);
                } /* Middle or Last descriptor */
 
                if (!(rx_status & MVNETA_RXD_LAST_DESC))
@@ -2439,7 +2476,7 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
                        continue;
 
                if (size) {
-                       mvneta_xdp_put_buff(pp, rxq, &xdp_buf, &sinfo, -1);
+                       mvneta_xdp_put_buff(pp, rxq, &xdp_buf, -1);
                        goto next;
                }
 
@@ -2451,7 +2488,7 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
                if (IS_ERR(skb)) {
                        struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
 
-                       mvneta_xdp_put_buff(pp, rxq, &xdp_buf, &sinfo, -1);
+                       mvneta_xdp_put_buff(pp, rxq, &xdp_buf, -1);
 
                        u64_stats_update_begin(&stats->syncp);
                        stats->es.skb_alloc_error++;
@@ -2468,11 +2505,10 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
                napi_gro_receive(napi, skb);
 next:
                xdp_buf.data_hard_start = NULL;
-               sinfo.nr_frags = 0;
        }
 
        if (xdp_buf.data_hard_start)
-               mvneta_xdp_put_buff(pp, rxq, &xdp_buf, &sinfo, -1);
+               mvneta_xdp_put_buff(pp, rxq, &xdp_buf, -1);
 
        if (ps.xdp_redirect)
                xdp_do_flush_map();
@@ -3260,7 +3296,8 @@ static int mvneta_create_page_pool(struct mvneta_port *pp,
                return err;
        }
 
-       err = xdp_rxq_info_reg(&rxq->xdp_rxq, pp->dev, rxq->id, 0);
+       err = __xdp_rxq_info_reg(&rxq->xdp_rxq, pp->dev, rxq->id, 0,
+                                PAGE_SIZE);
        if (err < 0)
                goto err_free_pp;
 
@@ -3740,6 +3777,7 @@ static void mvneta_percpu_disable(void *arg)
 static int mvneta_change_mtu(struct net_device *dev, int mtu)
 {
        struct mvneta_port *pp = netdev_priv(dev);
+       struct bpf_prog *prog = pp->xdp_prog;
        int ret;
 
        if (!IS_ALIGNED(MVNETA_RX_PKT_SIZE(mtu), 8)) {
@@ -3748,8 +3786,11 @@ static int mvneta_change_mtu(struct net_device *dev, int mtu)
                mtu = ALIGN(MVNETA_RX_PKT_SIZE(mtu), 8);
        }
 
-       if (pp->xdp_prog && mtu > MVNETA_MAX_RX_BUF_SIZE) {
-               netdev_info(dev, "Illegal MTU value %d for XDP mode\n", mtu);
+       if (prog && !prog->aux->xdp_has_frags &&
+           mtu > MVNETA_MAX_RX_BUF_SIZE) {
+               netdev_info(dev, "Illegal MTU %d for XDP prog without frags\n",
+                           mtu);
+
                return -EINVAL;
        }
 
@@ -3969,6 +4010,15 @@ static const struct phylink_pcs_ops mvneta_phylink_pcs_ops = {
        .pcs_an_restart = mvneta_pcs_an_restart,
 };
 
+static struct phylink_pcs *mvneta_mac_select_pcs(struct phylink_config *config,
+                                                phy_interface_t interface)
+{
+       struct net_device *ndev = to_net_dev(config->dev);
+       struct mvneta_port *pp = netdev_priv(ndev);
+
+       return &pp->phylink_pcs;
+}
+
 static int mvneta_mac_prepare(struct phylink_config *config, unsigned int mode,
                              phy_interface_t interface)
 {
@@ -4169,13 +4219,14 @@ static void mvneta_mac_link_up(struct phylink_config *config,
        mvneta_port_up(pp);
 
        if (phy && pp->eee_enabled) {
-               pp->eee_active = phy_init_eee(phy, 0) >= 0;
+               pp->eee_active = phy_init_eee(phy, false) >= 0;
                mvneta_set_eee(pp, pp->eee_active && pp->tx_lpi_enabled);
        }
 }
 
 static const struct phylink_mac_ops mvneta_phylink_ops = {
        .validate = phylink_generic_validate,
+       .mac_select_pcs = mvneta_mac_select_pcs,
        .mac_prepare = mvneta_mac_prepare,
        .mac_config = mvneta_mac_config,
        .mac_finish = mvneta_mac_finish,
@@ -4490,8 +4541,9 @@ static int mvneta_xdp_setup(struct net_device *dev, struct bpf_prog *prog,
        struct mvneta_port *pp = netdev_priv(dev);
        struct bpf_prog *old_prog;
 
-       if (prog && dev->mtu > MVNETA_MAX_RX_BUF_SIZE) {
-               NL_SET_ERR_MSG_MOD(extack, "MTU too large for XDP");
+       if (prog && !prog->aux->xdp_has_frags &&
+           dev->mtu > MVNETA_MAX_RX_BUF_SIZE) {
+               NL_SET_ERR_MSG_MOD(extack, "prog does not support XDP frags");
                return -EOPNOTSUPP;
        }
 
@@ -5321,26 +5373,62 @@ static int mvneta_probe(struct platform_device *pdev)
        if (!dev)
                return -ENOMEM;
 
-       dev->irq = irq_of_parse_and_map(dn, 0);
-       if (dev->irq == 0)
-               return -EINVAL;
+       dev->tx_queue_len = MVNETA_MAX_TXD;
+       dev->watchdog_timeo = 5 * HZ;
+       dev->netdev_ops = &mvneta_netdev_ops;
+       dev->ethtool_ops = &mvneta_eth_tool_ops;
+
+       pp = netdev_priv(dev);
+       spin_lock_init(&pp->lock);
+       pp->dn = dn;
+
+       pp->rxq_def = rxq_def;
+       pp->indir[0] = rxq_def;
 
        err = of_get_phy_mode(dn, &phy_mode);
        if (err) {
                dev_err(&pdev->dev, "incorrect phy-mode\n");
-               goto err_free_irq;
+               return err;
        }
 
+       pp->phy_interface = phy_mode;
+
        comphy = devm_of_phy_get(&pdev->dev, dn, NULL);
-       if (comphy == ERR_PTR(-EPROBE_DEFER)) {
-               err = -EPROBE_DEFER;
-               goto err_free_irq;
-       } else if (IS_ERR(comphy)) {
+       if (comphy == ERR_PTR(-EPROBE_DEFER))
+               return -EPROBE_DEFER;
+
+       if (IS_ERR(comphy))
                comphy = NULL;
+
+       pp->comphy = comphy;
+
+       pp->base = devm_platform_ioremap_resource(pdev, 0);
+       if (IS_ERR(pp->base))
+               return PTR_ERR(pp->base);
+
+       /* Get special SoC configurations */
+       if (of_device_is_compatible(dn, "marvell,armada-3700-neta"))
+               pp->neta_armada3700 = true;
+
+       dev->irq = irq_of_parse_and_map(dn, 0);
+       if (dev->irq == 0)
+               return -EINVAL;
+
+       pp->clk = devm_clk_get(&pdev->dev, "core");
+       if (IS_ERR(pp->clk))
+               pp->clk = devm_clk_get(&pdev->dev, NULL);
+       if (IS_ERR(pp->clk)) {
+               err = PTR_ERR(pp->clk);
+               goto err_free_irq;
        }
 
-       pp = netdev_priv(dev);
-       spin_lock_init(&pp->lock);
+       clk_prepare_enable(pp->clk);
+
+       pp->clk_bus = devm_clk_get(&pdev->dev, "bus");
+       if (!IS_ERR(pp->clk_bus))
+               clk_prepare_enable(pp->clk_bus);
+
+       pp->phylink_pcs.ops = &mvneta_phylink_pcs_ops;
 
        pp->phylink_config.dev = &dev->dev;
        pp->phylink_config.type = PHYLINK_NETDEV;
@@ -5377,55 +5465,16 @@ static int mvneta_probe(struct platform_device *pdev)
                                 phy_mode, &mvneta_phylink_ops);
        if (IS_ERR(phylink)) {
                err = PTR_ERR(phylink);
-               goto err_free_irq;
-       }
-
-       dev->tx_queue_len = MVNETA_MAX_TXD;
-       dev->watchdog_timeo = 5 * HZ;
-       dev->netdev_ops = &mvneta_netdev_ops;
-
-       dev->ethtool_ops = &mvneta_eth_tool_ops;
-
-       pp->phylink = phylink;
-       pp->comphy = comphy;
-       pp->phy_interface = phy_mode;
-       pp->dn = dn;
-
-       pp->rxq_def = rxq_def;
-       pp->indir[0] = rxq_def;
-
-       /* Get special SoC configurations */
-       if (of_device_is_compatible(dn, "marvell,armada-3700-neta"))
-               pp->neta_armada3700 = true;
-
-       pp->clk = devm_clk_get(&pdev->dev, "core");
-       if (IS_ERR(pp->clk))
-               pp->clk = devm_clk_get(&pdev->dev, NULL);
-       if (IS_ERR(pp->clk)) {
-               err = PTR_ERR(pp->clk);
-               goto err_free_phylink;
-       }
-
-       clk_prepare_enable(pp->clk);
-
-       pp->clk_bus = devm_clk_get(&pdev->dev, "bus");
-       if (!IS_ERR(pp->clk_bus))
-               clk_prepare_enable(pp->clk_bus);
-
-       pp->base = devm_platform_ioremap_resource(pdev, 0);
-       if (IS_ERR(pp->base)) {
-               err = PTR_ERR(pp->base);
                goto err_clk;
        }
 
-       pp->phylink_pcs.ops = &mvneta_phylink_pcs_ops;
-       phylink_set_pcs(phylink, &pp->phylink_pcs);
+       pp->phylink = phylink;
 
        /* Alloc per-cpu port structure */
        pp->ports = alloc_percpu(struct mvneta_pcpu_port);
        if (!pp->ports) {
                err = -ENOMEM;
-               goto err_clk;
+               goto err_free_phylink;
        }
 
        /* Alloc per-cpu stats */
@@ -5569,12 +5618,12 @@ err_netdev:
        free_percpu(pp->stats);
 err_free_ports:
        free_percpu(pp->ports);
-err_clk:
-       clk_disable_unprepare(pp->clk_bus);
-       clk_disable_unprepare(pp->clk);
 err_free_phylink:
        if (pp->phylink)
                phylink_destroy(pp->phylink);
+err_clk:
+       clk_disable_unprepare(pp->clk_bus);
+       clk_disable_unprepare(pp->clk);
 err_free_irq:
        irq_dispose_mapping(dev->irq);
        return err;
index 66da31f..92c0ddb 100644 (file)
@@ -222,8 +222,11 @@ EXPORT_SYMBOL(otx2_set_mac_address);
 int otx2_hw_set_mtu(struct otx2_nic *pfvf, int mtu)
 {
        struct nix_frs_cfg *req;
+       u16 maxlen;
        int err;
 
+       maxlen = otx2_get_max_mtu(pfvf) + OTX2_ETH_HLEN + OTX2_HW_TIMESTAMP_LEN;
+
        mutex_lock(&pfvf->mbox.lock);
        req = otx2_mbox_alloc_msg_nix_set_hw_frs(&pfvf->mbox);
        if (!req) {
@@ -233,6 +236,10 @@ int otx2_hw_set_mtu(struct otx2_nic *pfvf, int mtu)
 
        req->maxlen = pfvf->netdev->mtu + OTX2_ETH_HLEN + OTX2_HW_TIMESTAMP_LEN;
 
+       /* Use max receive length supported by hardware for loopback devices */
+       if (is_otx2_lbkvf(pfvf->pdev))
+               req->maxlen = maxlen;
+
        err = otx2_sync_mbox_msg(&pfvf->mbox);
        mutex_unlock(&pfvf->mbox.lock);
        return err;
index 14509fc..56be200 100644 (file)
@@ -178,6 +178,9 @@ struct otx2_hw {
        u16                     rqpool_cnt;
        u16                     sqpool_cnt;
 
+#define OTX2_DEFAULT_RBUF_LEN  2048
+       u16                     rbuf_len;
+
        /* NPA */
        u32                     stack_pg_ptrs;  /* No of ptrs per stack page */
        u32                     stack_pg_bytes; /* Size of stack page */
index d85db90..abe5267 100644 (file)
@@ -371,6 +371,7 @@ static void otx2_get_ringparam(struct net_device *netdev,
        ring->rx_pending = qs->rqe_cnt ? qs->rqe_cnt : Q_COUNT(Q_SIZE_256);
        ring->tx_max_pending = Q_COUNT(Q_SIZE_MAX);
        ring->tx_pending = qs->sqe_cnt ? qs->sqe_cnt : Q_COUNT(Q_SIZE_4K);
+       kernel_ring->rx_buf_len = pfvf->hw.rbuf_len;
 }
 
 static int otx2_set_ringparam(struct net_device *netdev,
@@ -379,6 +380,8 @@ static int otx2_set_ringparam(struct net_device *netdev,
                              struct netlink_ext_ack *extack)
 {
        struct otx2_nic *pfvf = netdev_priv(netdev);
+       u32 rx_buf_len = kernel_ring->rx_buf_len;
+       u32 old_rx_buf_len = pfvf->hw.rbuf_len;
        bool if_up = netif_running(netdev);
        struct otx2_qset *qs = &pfvf->qset;
        u32 rx_count, tx_count;
@@ -386,6 +389,15 @@ static int otx2_set_ringparam(struct net_device *netdev,
        if (ring->rx_mini_pending || ring->rx_jumbo_pending)
                return -EINVAL;
 
+       /* Hardware supports max size of 32k for a receive buffer
+        * and 1536 is typical ethernet frame size.
+        */
+       if (rx_buf_len && (rx_buf_len < 1536 || rx_buf_len > 32768)) {
+               netdev_err(netdev,
+                          "Receive buffer range is 1536 - 32768");
+               return -EINVAL;
+       }
+
        /* Permitted lengths are 16 64 256 1K 4K 16K 64K 256K 1M  */
        rx_count = ring->rx_pending;
        /* On some silicon variants a skid or reserved CQEs are
@@ -403,7 +415,8 @@ static int otx2_set_ringparam(struct net_device *netdev,
                           Q_COUNT(Q_SIZE_4K), Q_COUNT(Q_SIZE_MAX));
        tx_count = Q_COUNT(Q_SIZE(tx_count, 3));
 
-       if (tx_count == qs->sqe_cnt && rx_count == qs->rqe_cnt)
+       if (tx_count == qs->sqe_cnt && rx_count == qs->rqe_cnt &&
+           rx_buf_len == old_rx_buf_len)
                return 0;
 
        if (if_up)
@@ -413,6 +426,8 @@ static int otx2_set_ringparam(struct net_device *netdev,
        qs->sqe_cnt = tx_count;
        qs->rqe_cnt = rx_count;
 
+       pfvf->hw.rbuf_len = rx_buf_len;
+
        if (if_up)
                return netdev->netdev_ops->ndo_open(netdev);
 
@@ -1207,6 +1222,7 @@ end:
 static const struct ethtool_ops otx2_ethtool_ops = {
        .supported_coalesce_params = ETHTOOL_COALESCE_USECS |
                                     ETHTOOL_COALESCE_MAX_FRAMES,
+       .supported_ring_params  = ETHTOOL_RING_USE_RX_BUF_LEN,
        .get_link               = otx2_get_link,
        .get_drvinfo            = otx2_get_drvinfo,
        .get_strings            = otx2_get_strings,
@@ -1326,6 +1342,7 @@ static int otx2vf_get_link_ksettings(struct net_device *netdev,
 static const struct ethtool_ops otx2vf_ethtool_ops = {
        .supported_coalesce_params = ETHTOOL_COALESCE_USECS |
                                     ETHTOOL_COALESCE_MAX_FRAMES,
+       .supported_ring_params  = ETHTOOL_RING_USE_RX_BUF_LEN,
        .get_link               = otx2_get_link,
        .get_drvinfo            = otx2vf_get_drvinfo,
        .get_strings            = otx2vf_get_strings,
index d39341e..86c1c2f 100644 (file)
@@ -1311,6 +1311,9 @@ static int otx2_get_rbuf_size(struct otx2_nic *pf, int mtu)
        int total_size;
        int rbuf_size;
 
+       if (pf->hw.rbuf_len)
+               return ALIGN(pf->hw.rbuf_len, OTX2_ALIGN) + OTX2_HEAD_ROOM;
+
        /* The data transferred by NIX to memory consists of actual packet
         * plus additional data which has timestamp and/or EDSA/HIGIG2
         * headers if interface is configured in corresponding modes.
@@ -2625,6 +2628,7 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        hw->tx_queues = qcount;
        hw->tot_tx_queues = qcount;
        hw->max_queues = qcount;
+       hw->rbuf_len = OTX2_DEFAULT_RBUF_LEN;
 
        num_vec = pci_msix_vec_count(pdev);
        hw->irq_name = devm_kmalloc_array(&hw->pdev->dev, num_vec, NAME_SIZE,
index 925b74e..d96c890 100644 (file)
@@ -586,6 +586,7 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        hw->tx_queues = qcount;
        hw->max_queues = qcount;
        hw->tot_tx_queues = qcount;
+       hw->rbuf_len = OTX2_DEFAULT_RBUF_LEN;
 
        hw->irq_name = devm_kmalloc_array(&hw->pdev->dev, num_vec, NAME_SIZE,
                                          GFP_KERNEL);
index 89ca796..4cd0747 100644 (file)
@@ -1556,6 +1556,7 @@ static int mtk_star_probe(struct platform_device *pdev)
        return devm_register_netdev(dev, ndev);
 }
 
+#ifdef CONFIG_OF
 static const struct of_device_id mtk_star_of_match[] = {
        { .compatible = "mediatek,mt8516-eth", },
        { .compatible = "mediatek,mt8518-eth", },
@@ -1563,6 +1564,7 @@ static const struct of_device_id mtk_star_of_match[] = {
        { }
 };
 MODULE_DEVICE_TABLE(of, mtk_star_of_match);
+#endif
 
 static SIMPLE_DEV_PM_OPS(mtk_star_pm_ops,
                         mtk_star_suspend, mtk_star_resume);
index b0de6b9..2b53738 100644 (file)
@@ -7,7 +7,8 @@
 static bool
 tc_act_can_offload_accept(struct mlx5e_tc_act_parse_state *parse_state,
                          const struct flow_action_entry *act,
-                         int act_index)
+                         int act_index,
+                         struct mlx5_flow_attr *attr)
 {
        return true;
 }
@@ -20,7 +21,7 @@ tc_act_parse_accept(struct mlx5e_tc_act_parse_state *parse_state,
 {
        attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
                        MLX5_FLOW_CONTEXT_ACTION_COUNT;
-       attr->flags |= MLX5_ESW_ATTR_FLAG_ACCEPT;
+       attr->flags |= MLX5_ATTR_FLAG_ACCEPT;
 
        return 0;
 }
index 26efa33..bfbc91c 100644 (file)
@@ -16,12 +16,12 @@ struct mlx5e_tc_act_parse_state {
        unsigned int num_actions;
        struct mlx5e_tc_flow *flow;
        struct netlink_ext_ack *extack;
+       bool ct;
        bool encap;
        bool decap;
        bool mpls_push;
        bool ptype_host;
        const struct ip_tunnel_info *tun_info;
-       struct pedit_headers_action hdrs[__PEDIT_CMD_MAX];
        int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS];
        int if_count;
        struct mlx5_tc_ct_priv *ct_priv;
@@ -30,7 +30,8 @@ struct mlx5e_tc_act_parse_state {
 struct mlx5e_tc_act {
        bool (*can_offload)(struct mlx5e_tc_act_parse_state *parse_state,
                            const struct flow_action_entry *act,
-                           int act_index);
+                           int act_index,
+                           struct mlx5_flow_attr *attr);
 
        int (*parse_action)(struct mlx5e_tc_act_parse_state *parse_state,
                            const struct flow_action_entry *act,
index 29920ef..c0f08ae 100644 (file)
@@ -38,11 +38,12 @@ csum_offload_supported(struct mlx5e_priv *priv,
 static bool
 tc_act_can_offload_csum(struct mlx5e_tc_act_parse_state *parse_state,
                        const struct flow_action_entry *act,
-                       int act_index)
+                       int act_index,
+                       struct mlx5_flow_attr *attr)
 {
        struct mlx5e_tc_flow *flow = parse_state->flow;
 
-       return csum_offload_supported(flow->priv, flow->attr->action,
+       return csum_offload_supported(flow->priv, attr->action,
                                      act->csum_flags, parse_state->extack);
 }
 
index 06ec30c..85f0cb8 100644 (file)
@@ -8,8 +8,10 @@
 static bool
 tc_act_can_offload_ct(struct mlx5e_tc_act_parse_state *parse_state,
                      const struct flow_action_entry *act,
-                     int act_index)
+                     int act_index,
+                     struct mlx5_flow_attr *attr)
 {
+       bool clear_action = act->ct.action & TCA_CT_ACT_CLEAR;
        struct netlink_ext_ack *extack = parse_state->extack;
 
        if (flow_flag_test(parse_state->flow, SAMPLE)) {
@@ -18,6 +20,11 @@ tc_act_can_offload_ct(struct mlx5e_tc_act_parse_state *parse_state,
                return false;
        }
 
+       if (parse_state->ct && !clear_action) {
+               NL_SET_ERR_MSG_MOD(extack, "Multiple CT actions are not supoported");
+               return false;
+       }
+
        return true;
 }
 
@@ -27,6 +34,7 @@ tc_act_parse_ct(struct mlx5e_tc_act_parse_state *parse_state,
                struct mlx5e_priv *priv,
                struct mlx5_flow_attr *attr)
 {
+       bool clear_action = act->ct.action & TCA_CT_ACT_CLEAR;
        int err;
 
        err = mlx5_tc_ct_parse_action(parse_state->ct_priv, attr,
@@ -35,11 +43,16 @@ tc_act_parse_ct(struct mlx5e_tc_act_parse_state *parse_state,
        if (err)
                return err;
 
-       flow_flag_set(parse_state->flow, CT);
 
        if (mlx5e_is_eswitch_flow(parse_state->flow))
                attr->esw_attr->split_count = attr->esw_attr->out_count;
 
+       if (!clear_action) {
+               attr->flags |= MLX5_ATTR_FLAG_CT;
+               flow_flag_set(parse_state->flow, CT);
+               parse_state->ct = true;
+       }
+
        return 0;
 }
 
index 2e29a23..3d5f236 100644 (file)
@@ -7,7 +7,8 @@
 static bool
 tc_act_can_offload_drop(struct mlx5e_tc_act_parse_state *parse_state,
                        const struct flow_action_entry *act,
-                       int act_index)
+                       int act_index,
+                       struct mlx5_flow_attr *attr)
 {
        return true;
 }
index f445150..fb1be82 100644 (file)
@@ -8,6 +8,7 @@
 static int
 validate_goto_chain(struct mlx5e_priv *priv,
                    struct mlx5e_tc_flow *flow,
+                   struct mlx5_flow_attr *attr,
                    const struct flow_action_entry *act,
                    struct netlink_ext_ack *extack)
 {
@@ -32,7 +33,7 @@ validate_goto_chain(struct mlx5e_priv *priv,
        }
 
        if (!mlx5_chains_backwards_supported(chains) &&
-           dest_chain <= flow->attr->chain) {
+           dest_chain <= attr->chain) {
                NL_SET_ERR_MSG_MOD(extack, "Goto lower numbered chain isn't supported");
                return -EOPNOTSUPP;
        }
@@ -43,8 +44,8 @@ validate_goto_chain(struct mlx5e_priv *priv,
                return -EOPNOTSUPP;
        }
 
-       if (flow->attr->action & (MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT |
-                                 MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
+       if (attr->action & (MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT |
+                           MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
            !reformat_and_fwd) {
                NL_SET_ERR_MSG_MOD(extack,
                                   "Goto chain is not allowed if action has reformat or decap");
@@ -57,12 +58,13 @@ validate_goto_chain(struct mlx5e_priv *priv,
 static bool
 tc_act_can_offload_goto(struct mlx5e_tc_act_parse_state *parse_state,
                        const struct flow_action_entry *act,
-                       int act_index)
+                       int act_index,
+                       struct mlx5_flow_attr *attr)
 {
        struct netlink_ext_ack *extack = parse_state->extack;
        struct mlx5e_tc_flow *flow = parse_state->flow;
 
-       if (validate_goto_chain(flow->priv, flow, act, extack))
+       if (validate_goto_chain(flow->priv, flow, attr, act, extack))
                return false;
 
        return true;
index d775c3d..e8d2275 100644 (file)
@@ -7,7 +7,8 @@
 static bool
 tc_act_can_offload_mark(struct mlx5e_tc_act_parse_state *parse_state,
                        const struct flow_action_entry *act,
-                       int act_index)
+                       int act_index,
+                       struct mlx5_flow_attr *attr)
 {
        if (act->mark & ~MLX5E_TC_FLOW_ID_MASK) {
                NL_SET_ERR_MSG_MOD(parse_state->extack, "Bad flow mark, only 16 bit supported");
index c614fc7..99fb98b 100644 (file)
@@ -99,7 +99,8 @@ get_fdb_out_dev(struct net_device *uplink_dev, struct net_device *out_dev)
 static bool
 tc_act_can_offload_mirred(struct mlx5e_tc_act_parse_state *parse_state,
                          const struct flow_action_entry *act,
-                         int act_index)
+                         int act_index,
+                         struct mlx5_flow_attr *attr)
 {
        struct netlink_ext_ack *extack = parse_state->extack;
        struct mlx5e_tc_flow *flow = parse_state->flow;
@@ -108,8 +109,8 @@ tc_act_can_offload_mirred(struct mlx5e_tc_act_parse_state *parse_state,
        struct mlx5e_priv *priv = flow->priv;
        struct mlx5_esw_flow_attr *esw_attr;
 
-       parse_attr = flow->attr->parse_attr;
-       esw_attr = flow->attr->esw_attr;
+       parse_attr = attr->parse_attr;
+       esw_attr = attr->esw_attr;
 
        if (!out_dev) {
                /* out_dev is NULL when filters with
index 2c74567..16681cf 100644 (file)
@@ -7,7 +7,8 @@
 static bool
 tc_act_can_offload_mirred_nic(struct mlx5e_tc_act_parse_state *parse_state,
                              const struct flow_action_entry *act,
-                             int act_index)
+                             int act_index,
+                             struct mlx5_flow_attr *attr)
 {
        struct netlink_ext_ack *extack = parse_state->extack;
        struct mlx5e_tc_flow *flow = parse_state->flow;
index 784fc4f..4033294 100644 (file)
@@ -8,7 +8,8 @@
 static bool
 tc_act_can_offload_mpls_push(struct mlx5e_tc_act_parse_state *parse_state,
                             const struct flow_action_entry *act,
-                            int act_index)
+                            int act_index,
+                            struct mlx5_flow_attr *attr)
 {
        struct netlink_ext_ack *extack = parse_state->extack;
        struct mlx5e_priv *priv = parse_state->flow->priv;
@@ -36,13 +37,13 @@ tc_act_parse_mpls_push(struct mlx5e_tc_act_parse_state *parse_state,
 static bool
 tc_act_can_offload_mpls_pop(struct mlx5e_tc_act_parse_state *parse_state,
                            const struct flow_action_entry *act,
-                           int act_index)
+                           int act_index,
+                           struct mlx5_flow_attr *attr)
 {
        struct netlink_ext_ack *extack = parse_state->extack;
-       struct mlx5e_tc_flow *flow = parse_state->flow;
        struct net_device *filter_dev;
 
-       filter_dev = flow->attr->parse_attr->filter_dev;
+       filter_dev = attr->parse_attr->filter_dev;
 
        /* we only support mpls pop if it is the first action
         * and the filter net device is bareudp. Subsequent
index 79addbb..39f8f71 100644 (file)
@@ -46,9 +46,9 @@ static int
 parse_pedit_to_modify_hdr(struct mlx5e_priv *priv,
                          const struct flow_action_entry *act, int namespace,
                          struct mlx5e_tc_flow_parse_attr *parse_attr,
-                         struct pedit_headers_action *hdrs,
                          struct netlink_ext_ack *extack)
 {
+       struct pedit_headers_action *hdrs = parse_attr->hdrs;
        u8 cmd = (act->id == FLOW_ACTION_MANGLE) ? 0 : 1;
        u8 htype = act->mangle.htype;
        int err = -EOPNOTSUPP;
@@ -110,20 +110,20 @@ int
 mlx5e_tc_act_pedit_parse_action(struct mlx5e_priv *priv,
                                const struct flow_action_entry *act, int namespace,
                                struct mlx5e_tc_flow_parse_attr *parse_attr,
-                               struct pedit_headers_action *hdrs,
                                struct mlx5e_tc_flow *flow,
                                struct netlink_ext_ack *extack)
 {
        if (flow && flow_flag_test(flow, L3_TO_L2_DECAP))
                return parse_pedit_to_reformat(act, parse_attr, extack);
 
-       return parse_pedit_to_modify_hdr(priv, act, namespace, parse_attr, hdrs, extack);
+       return parse_pedit_to_modify_hdr(priv, act, namespace, parse_attr, extack);
 }
 
 static bool
 tc_act_can_offload_pedit(struct mlx5e_tc_act_parse_state *parse_state,
                         const struct flow_action_entry *act,
-                        int act_index)
+                        int act_index,
+                        struct mlx5_flow_attr *attr)
 {
        return true;
 }
@@ -141,8 +141,7 @@ tc_act_parse_pedit(struct mlx5e_tc_act_parse_state *parse_state,
 
        ns_type = mlx5e_get_flow_namespace(flow);
 
-       err = mlx5e_tc_act_pedit_parse_action(flow->priv, act, ns_type,
-                                             attr->parse_attr, parse_state->hdrs,
+       err = mlx5e_tc_act_pedit_parse_action(flow->priv, act, ns_type, attr->parse_attr,
                                              flow, parse_state->extack);
        if (err)
                return err;
index da8ab03..258f030 100644 (file)
@@ -25,7 +25,6 @@ int
 mlx5e_tc_act_pedit_parse_action(struct mlx5e_priv *priv,
                                const struct flow_action_entry *act, int namespace,
                                struct mlx5e_tc_flow_parse_attr *parse_attr,
-                               struct pedit_headers_action *hdrs,
                                struct mlx5e_tc_flow *flow,
                                struct netlink_ext_ack *extack);
 
index 0819110..6454b03 100644 (file)
@@ -7,7 +7,8 @@
 static bool
 tc_act_can_offload_ptype(struct mlx5e_tc_act_parse_state *parse_state,
                         const struct flow_action_entry *act,
-                        int act_index)
+                        int act_index,
+                        struct mlx5_flow_attr *attr)
 {
        return true;
 }
index 1c32e24..9dd2441 100644 (file)
@@ -7,16 +7,16 @@
 static bool
 tc_act_can_offload_redirect_ingress(struct mlx5e_tc_act_parse_state *parse_state,
                                    const struct flow_action_entry *act,
-                                   int act_index)
+                                   int act_index,
+                                   struct mlx5_flow_attr *attr)
 {
        struct netlink_ext_ack *extack = parse_state->extack;
-       struct mlx5e_tc_flow *flow = parse_state->flow;
        struct mlx5e_tc_flow_parse_attr *parse_attr;
        struct net_device *out_dev = act->dev;
        struct mlx5_esw_flow_attr *esw_attr;
 
-       parse_attr = flow->attr->parse_attr;
-       esw_attr = flow->attr->esw_attr;
+       parse_attr = attr->parse_attr;
+       esw_attr = attr->esw_attr;
 
        if (!out_dev)
                return false;
index 6699bdf..539fea1 100644 (file)
@@ -8,7 +8,8 @@
 static bool
 tc_act_can_offload_sample(struct mlx5e_tc_act_parse_state *parse_state,
                          const struct flow_action_entry *act,
-                         int act_index)
+                         int act_index,
+                         struct mlx5_flow_attr *attr)
 {
        struct netlink_ext_ack *extack = parse_state->extack;
 
@@ -27,11 +28,7 @@ tc_act_parse_sample(struct mlx5e_tc_act_parse_state *parse_state,
                    struct mlx5e_priv *priv,
                    struct mlx5_flow_attr *attr)
 {
-       struct mlx5e_sample_attr *sample_attr;
-
-       sample_attr = kzalloc(sizeof(*attr->sample_attr), GFP_KERNEL);
-       if (!sample_attr)
-               return -ENOMEM;
+       struct mlx5e_sample_attr *sample_attr = &attr->sample_attr;
 
        sample_attr->rate = act->sample.rate;
        sample_attr->group_num = act->sample.psample_group->group_num;
@@ -39,7 +36,7 @@ tc_act_parse_sample(struct mlx5e_tc_act_parse_state *parse_state,
        if (act->sample.truncate)
                sample_attr->trunc_size = act->sample.trunc_size;
 
-       attr->sample_attr = sample_attr;
+       attr->flags |= MLX5_ATTR_FLAG_SAMPLE;
        flow_flag_set(parse_state->flow, SAMPLE);
 
        return 0;
index 046b64c..9ea293f 100644 (file)
@@ -7,7 +7,8 @@
 static bool
 tc_act_can_offload_trap(struct mlx5e_tc_act_parse_state *parse_state,
                        const struct flow_action_entry *act,
-                       int act_index)
+                       int act_index,
+                       struct mlx5_flow_attr *attr)
 {
        struct netlink_ext_ack *extack = parse_state->extack;
 
@@ -27,7 +28,7 @@ tc_act_parse_trap(struct mlx5e_tc_act_parse_state *parse_state,
 {
        attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
                        MLX5_FLOW_CONTEXT_ACTION_COUNT;
-       attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
+       attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH;
 
        return 0;
 }
index 6f4a2cf..b4fa2de 100644 (file)
@@ -8,7 +8,8 @@
 static bool
 tc_act_can_offload_tun_encap(struct mlx5e_tc_act_parse_state *parse_state,
                             const struct flow_action_entry *act,
-                            int act_index)
+                            int act_index,
+                            struct mlx5_flow_attr *attr)
 {
        if (!act->tunnel) {
                NL_SET_ERR_MSG_MOD(parse_state->extack,
@@ -34,7 +35,8 @@ tc_act_parse_tun_encap(struct mlx5e_tc_act_parse_state *parse_state,
 static bool
 tc_act_can_offload_tun_decap(struct mlx5e_tc_act_parse_state *parse_state,
                             const struct flow_action_entry *act,
-                            int act_index)
+                            int act_index,
+                            struct mlx5_flow_attr *attr)
 {
        return true;
 }
index 70fc0c2..6378b75 100644 (file)
@@ -9,7 +9,6 @@
 static int
 add_vlan_prio_tag_rewrite_action(struct mlx5e_priv *priv,
                                 struct mlx5e_tc_flow_parse_attr *parse_attr,
-                                struct pedit_headers_action *hdrs,
                                 u32 *action, struct netlink_ext_ack *extack)
 {
        const struct flow_action_entry prio_tag_act = {
@@ -26,7 +25,7 @@ add_vlan_prio_tag_rewrite_action(struct mlx5e_priv *priv,
        };
 
        return mlx5e_tc_act_vlan_add_rewrite_action(priv, MLX5_FLOW_NAMESPACE_FDB,
-                                                   &prio_tag_act, parse_attr, hdrs, action,
+                                                   &prio_tag_act, parse_attr, action,
                                                    extack);
 }
 
@@ -151,7 +150,8 @@ mlx5e_tc_act_vlan_add_pop_action(struct mlx5e_priv *priv,
 static bool
 tc_act_can_offload_vlan(struct mlx5e_tc_act_parse_state *parse_state,
                        const struct flow_action_entry *act,
-                       int act_index)
+                       int act_index,
+                       struct mlx5_flow_attr *attr)
 {
        return true;
 }
@@ -170,8 +170,8 @@ tc_act_parse_vlan(struct mlx5e_tc_act_parse_state *parse_state,
                /* Replace vlan pop+push with vlan modify */
                attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
                err = mlx5e_tc_act_vlan_add_rewrite_action(priv, MLX5_FLOW_NAMESPACE_FDB, act,
-                                                          attr->parse_attr, parse_state->hdrs,
-                                                          &attr->action, parse_state->extack);
+                                                          attr->parse_attr, &attr->action,
+                                                          parse_state->extack);
        } else {
                err = parse_tc_vlan_action(priv, act, esw_attr, &attr->action,
                                           parse_state->extack);
@@ -191,7 +191,6 @@ tc_act_post_parse_vlan(struct mlx5e_tc_act_parse_state *parse_state,
                       struct mlx5_flow_attr *attr)
 {
        struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
-       struct pedit_headers_action *hdrs = parse_state->hdrs;
        struct netlink_ext_ack *extack = parse_state->extack;
        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
        int err;
@@ -202,7 +201,7 @@ tc_act_post_parse_vlan(struct mlx5e_tc_act_parse_state *parse_state,
                 * tag rewrite.
                 */
                attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
-               err = add_vlan_prio_tag_rewrite_action(priv, parse_attr, hdrs,
+               err = add_vlan_prio_tag_rewrite_action(priv, parse_attr,
                                                       &attr->action, extack);
                if (err)
                        return err;
index 3d62f13..2fa58c6 100644 (file)
@@ -24,7 +24,6 @@ int
 mlx5e_tc_act_vlan_add_rewrite_action(struct mlx5e_priv *priv, int namespace,
                                     const struct flow_action_entry *act,
                                     struct mlx5e_tc_flow_parse_attr *parse_attr,
-                                    struct pedit_headers_action *hdrs,
                                     u32 *action, struct netlink_ext_ack *extack);
 
 #endif /* __MLX5_EN_TC_ACT_VLAN_H__ */
index 63e36e7..28444d4 100644 (file)
@@ -12,7 +12,6 @@ int
 mlx5e_tc_act_vlan_add_rewrite_action(struct mlx5e_priv *priv, int namespace,
                                     const struct flow_action_entry *act,
                                     struct mlx5e_tc_flow_parse_attr *parse_attr,
-                                    struct pedit_headers_action *hdrs,
                                     u32 *action, struct netlink_ext_ack *extack)
 {
        u16 mask16 = VLAN_VID_MASK;
@@ -44,7 +43,7 @@ mlx5e_tc_act_vlan_add_rewrite_action(struct mlx5e_priv *priv, int namespace,
                return -EOPNOTSUPP;
        }
 
-       err = mlx5e_tc_act_pedit_parse_action(priv, &pedit_act, namespace, parse_attr, hdrs,
+       err = mlx5e_tc_act_pedit_parse_action(priv, &pedit_act, namespace, parse_attr,
                                              NULL, extack);
        *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
 
@@ -54,7 +53,8 @@ mlx5e_tc_act_vlan_add_rewrite_action(struct mlx5e_priv *priv, int namespace,
 static bool
 tc_act_can_offload_vlan_mangle(struct mlx5e_tc_act_parse_state *parse_state,
                               const struct flow_action_entry *act,
-                              int act_index)
+                              int act_index,
+                              struct mlx5_flow_attr *attr)
 {
        return true;
 }
@@ -69,8 +69,7 @@ tc_act_parse_vlan_mangle(struct mlx5e_tc_act_parse_state *parse_state,
        int err;
 
        ns_type = mlx5e_get_flow_namespace(parse_state->flow);
-       err = mlx5e_tc_act_vlan_add_rewrite_action(priv, ns_type, act,
-                                                  attr->parse_attr, parse_state->hdrs,
+       err = mlx5e_tc_act_vlan_add_rewrite_action(priv, ns_type, act, attr->parse_attr,
                                                   &attr->action, parse_state->extack);
        if (err)
                return err;
index 31b4e39..9e0e229 100644 (file)
@@ -101,6 +101,7 @@ mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *at
        post_attr->inner_match_level = MLX5_MATCH_NONE;
        post_attr->outer_match_level = MLX5_MATCH_NONE;
        post_attr->action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP);
+       post_attr->flags &= ~MLX5_ATTR_FLAG_SAMPLE;
 
        handle->ns_type = post_act->ns_type;
        /* Splits were handled before post action */
index ff4b4f8..32230e6 100644 (file)
@@ -403,7 +403,7 @@ add_post_rule(struct mlx5_eswitch *esw, struct mlx5e_sample_flow *sample_flow,
        post_attr->chain = 0;
        post_attr->prio = 0;
        post_attr->ft = default_tbl;
-       post_attr->flags = MLX5_ESW_ATTR_FLAG_NO_IN_PORT;
+       post_attr->flags = MLX5_ATTR_FLAG_NO_IN_PORT;
 
        /* When offloading sample and encap action, if there is no valid
         * neigh data struct, a slow path rule is offloaded first. Source
@@ -492,8 +492,7 @@ del_post_rule(struct mlx5_eswitch *esw, struct mlx5e_sample_flow *sample_flow,
 struct mlx5_flow_handle *
 mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample,
                        struct mlx5_flow_spec *spec,
-                       struct mlx5_flow_attr *attr,
-                       u32 tunnel_id)
+                       struct mlx5_flow_attr *attr)
 {
        struct mlx5e_post_act_handle *post_act_handle = NULL;
        struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
@@ -502,6 +501,7 @@ mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample,
        struct mlx5e_sample_flow *sample_flow;
        struct mlx5e_sample_attr *sample_attr;
        struct mlx5_flow_attr *pre_attr;
+       u32 tunnel_id = attr->tunnel_id;
        struct mlx5_eswitch *esw;
        u32 default_tbl_id;
        u32 obj_id;
@@ -513,7 +513,7 @@ mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample,
        sample_flow = kzalloc(sizeof(*sample_flow), GFP_KERNEL);
        if (!sample_flow)
                return ERR_PTR(-ENOMEM);
-       sample_attr = attr->sample_attr;
+       sample_attr = &attr->sample_attr;
        sample_attr->sample_flow = sample_flow;
 
        /* For NICs with reg_c_preserve support or decap action, use
@@ -546,6 +546,7 @@ mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample,
                err = PTR_ERR(sample_flow->sampler);
                goto err_sampler;
        }
+       sample_attr->sampler_id = sample_flow->sampler->sampler_id;
 
        /* Create an id mapping reg_c0 value to sample object. */
        restore_obj.type = MLX5_MAPPED_OBJ_SAMPLE;
@@ -580,13 +581,12 @@ mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample,
        if (tunnel_id)
                pre_attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
        pre_attr->modify_hdr = sample_flow->restore->modify_hdr;
-       pre_attr->flags = MLX5_ESW_ATTR_FLAG_SAMPLE;
+       pre_attr->flags = MLX5_ATTR_FLAG_SAMPLE;
        pre_attr->inner_match_level = attr->inner_match_level;
        pre_attr->outer_match_level = attr->outer_match_level;
        pre_attr->chain = attr->chain;
        pre_attr->prio = attr->prio;
-       pre_attr->sample_attr = attr->sample_attr;
-       sample_attr->sampler_id = sample_flow->sampler->sampler_id;
+       pre_attr->sample_attr = *sample_attr;
        pre_esw_attr = pre_attr->esw_attr;
        pre_esw_attr->in_mdev = esw_attr->in_mdev;
        pre_esw_attr->in_rep = esw_attr->in_rep;
@@ -633,11 +633,11 @@ mlx5e_tc_sample_unoffload(struct mlx5e_tc_psample *tc_psample,
         * will hit fw syndromes.
         */
        esw = tc_psample->esw;
-       sample_flow = attr->sample_attr->sample_flow;
+       sample_flow = attr->sample_attr.sample_flow;
        mlx5_eswitch_del_offloaded_rule(esw, sample_flow->pre_rule, sample_flow->pre_attr);
 
        sample_restore_put(tc_psample, sample_flow->restore);
-       mapping_remove(esw->offloads.reg_c0_obj_pool, attr->sample_attr->restore_obj_id);
+       mapping_remove(esw->offloads.reg_c0_obj_pool, attr->sample_attr.restore_obj_id);
        sampler_put(tc_psample, sample_flow->sampler);
        if (sample_flow->post_act_handle)
                mlx5e_tc_post_act_del(tc_psample->post_act, sample_flow->post_act_handle);
index 9ef8a49..a569367 100644 (file)
@@ -26,8 +26,7 @@ void mlx5e_tc_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj
 struct mlx5_flow_handle *
 mlx5e_tc_sample_offload(struct mlx5e_tc_psample *sample_priv,
                        struct mlx5_flow_spec *spec,
-                       struct mlx5_flow_attr *attr,
-                       u32 tunnel_id);
+                       struct mlx5_flow_attr *attr);
 
 void
 mlx5e_tc_sample_unoffload(struct mlx5e_tc_psample *sample_priv,
@@ -45,8 +44,7 @@ mlx5e_tc_sample_cleanup(struct mlx5e_tc_psample *tc_psample);
 static inline struct mlx5_flow_handle *
 mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample,
                        struct mlx5_flow_spec *spec,
-                       struct mlx5_flow_attr *attr,
-                       u32 tunnel_id)
+                       struct mlx5_flow_attr *attr)
 { return ERR_PTR(-EOPNOTSUPP); }
 
 static inline void
index 4a0d38d..0f4d3b9 100644 (file)
@@ -809,7 +809,7 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
        attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct;
        attr->outer_match_level = MLX5_MATCH_L4;
        attr->counter = entry->counter->counter;
-       attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT;
+       attr->flags |= MLX5_ATTR_FLAG_NO_IN_PORT;
        if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB)
                attr->esw_attr->in_mdev = priv->mdev;
 
@@ -1787,7 +1787,6 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
  */
 static struct mlx5_flow_handle *
 __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
-                         struct mlx5e_tc_flow *flow,
                          struct mlx5_flow_spec *orig_spec,
                          struct mlx5_flow_attr *attr)
 {
@@ -1871,12 +1870,10 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
         */
        if ((pre_ct_attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
            attr->chain == 0) {
-               u32 tun_id = mlx5e_tc_get_flow_tun_id(flow);
-
                err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts,
                                                ct_priv->ns_type,
                                                TUNNEL_TO_REG,
-                                               tun_id);
+                                               attr->tunnel_id);
                if (err) {
                        ct_dbg("Failed to set tunnel register mapping");
                        goto err_mapping;
@@ -1926,87 +1923,19 @@ err_ft:
        return ERR_PTR(err);
 }
 
-static struct mlx5_flow_handle *
-__mlx5_tc_ct_flow_offload_clear(struct mlx5_tc_ct_priv *ct_priv,
-                               struct mlx5_flow_spec *orig_spec,
-                               struct mlx5_flow_attr *attr,
-                               struct mlx5e_tc_mod_hdr_acts *mod_acts)
-{
-       struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
-       u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type);
-       struct mlx5_flow_attr *pre_ct_attr;
-       struct mlx5_modify_hdr *mod_hdr;
-       struct mlx5_flow_handle *rule;
-       struct mlx5_ct_flow *ct_flow;
-       int err;
-
-       ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
-       if (!ct_flow)
-               return ERR_PTR(-ENOMEM);
-
-       /* Base esw attributes on original rule attribute */
-       pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
-       if (!pre_ct_attr) {
-               err = -ENOMEM;
-               goto err_attr;
-       }
-
-       memcpy(pre_ct_attr, attr, attr_sz);
-
-       mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type,
-                                          mod_acts->num_actions,
-                                          mod_acts->actions);
-       if (IS_ERR(mod_hdr)) {
-               err = PTR_ERR(mod_hdr);
-               ct_dbg("Failed to add create ct clear mod hdr");
-               goto err_mod_hdr;
-       }
-
-       pre_ct_attr->modify_hdr = mod_hdr;
-
-       rule = mlx5_tc_rule_insert(priv, orig_spec, pre_ct_attr);
-       if (IS_ERR(rule)) {
-               err = PTR_ERR(rule);
-               ct_dbg("Failed to add ct clear rule");
-               goto err_insert;
-       }
-
-       attr->ct_attr.ct_flow = ct_flow;
-       ct_flow->pre_ct_attr = pre_ct_attr;
-       ct_flow->pre_ct_rule = rule;
-       return rule;
-
-err_insert:
-       mlx5_modify_header_dealloc(priv->mdev, mod_hdr);
-err_mod_hdr:
-       netdev_warn(priv->netdev,
-                   "Failed to offload ct clear flow, err %d\n", err);
-       kfree(pre_ct_attr);
-err_attr:
-       kfree(ct_flow);
-
-       return ERR_PTR(err);
-}
-
 struct mlx5_flow_handle *
 mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
-                       struct mlx5e_tc_flow *flow,
                        struct mlx5_flow_spec *spec,
                        struct mlx5_flow_attr *attr,
                        struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
 {
-       bool clear_action = attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR;
        struct mlx5_flow_handle *rule;
 
        if (!priv)
                return ERR_PTR(-EOPNOTSUPP);
 
        mutex_lock(&priv->control_lock);
-
-       if (clear_action)
-               rule = __mlx5_tc_ct_flow_offload_clear(priv, spec, attr, mod_hdr_acts);
-       else
-               rule = __mlx5_tc_ct_flow_offload(priv, flow, spec, attr);
+       rule = __mlx5_tc_ct_flow_offload(priv, spec, attr);
        mutex_unlock(&priv->control_lock);
 
        return rule;
@@ -2014,14 +1943,13 @@ mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
 
 static void
 __mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv,
-                        struct mlx5e_tc_flow *flow,
-                        struct mlx5_ct_flow *ct_flow)
+                        struct mlx5_ct_flow *ct_flow,
+                        struct mlx5_flow_attr *attr)
 {
        struct mlx5_flow_attr *pre_ct_attr = ct_flow->pre_ct_attr;
        struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
 
-       mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule,
-                           pre_ct_attr);
+       mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule, pre_ct_attr);
        mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
 
        if (ct_flow->post_act_handle) {
@@ -2036,7 +1964,6 @@ __mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv,
 
 void
 mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
-                      struct mlx5e_tc_flow *flow,
                       struct mlx5_flow_attr *attr)
 {
        struct mlx5_ct_flow *ct_flow = attr->ct_attr.ct_flow;
@@ -2048,7 +1975,7 @@ mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
                return;
 
        mutex_lock(&priv->control_lock);
-       __mlx5_tc_ct_delete_flow(priv, flow, ct_flow);
+       __mlx5_tc_ct_delete_flow(priv, ct_flow, attr);
        mutex_unlock(&priv->control_lock);
 }
 
index 99662af..2b21c7b 100644 (file)
@@ -116,13 +116,11 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
 
 struct mlx5_flow_handle *
 mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
-                       struct mlx5e_tc_flow *flow,
                        struct mlx5_flow_spec *spec,
                        struct mlx5_flow_attr *attr,
                        struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts);
 void
 mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
-                      struct mlx5e_tc_flow *flow,
                       struct mlx5_flow_attr *attr);
 
 bool
@@ -183,7 +181,6 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
 
 static inline struct mlx5_flow_handle *
 mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
-                       struct mlx5e_tc_flow *flow,
                        struct mlx5_flow_spec *spec,
                        struct mlx5_flow_attr *attr,
                        struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
@@ -193,7 +190,6 @@ mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
 
 static inline void
 mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
-                      struct mlx5e_tc_flow *flow,
                       struct mlx5_flow_attr *attr)
 {
 }
index f832c26..9ffba58 100644 (file)
@@ -37,6 +37,7 @@ struct mlx5e_tc_flow_parse_attr {
        const struct ip_tunnel_info *tun_info[MLX5_MAX_FLOW_FWD_VPORTS];
        struct net_device *filter_dev;
        struct mlx5_flow_spec spec;
+       struct pedit_headers_action hdrs[__PEDIT_CMD_MAX];
        struct mlx5e_tc_mod_hdr_acts mod_hdr_acts;
        int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS];
        struct ethhdr eth;
@@ -107,10 +108,19 @@ struct mlx5e_tc_flow {
        struct rcu_head rcu_head;
        struct completion init_done;
        struct completion del_hw_done;
-       int tunnel_id; /* the mapped tunnel id of this flow */
        struct mlx5_flow_attr *attr;
 };
 
+struct mlx5_flow_handle *
+mlx5e_tc_rule_offload(struct mlx5e_priv *priv,
+                     struct mlx5_flow_spec *spec,
+                     struct mlx5_flow_attr *attr);
+
+void
+mlx5e_tc_rule_unoffload(struct mlx5e_priv *priv,
+                       struct mlx5_flow_handle *rule,
+                       struct mlx5_flow_attr *attr);
+
 u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer);
 
 struct mlx5_flow_handle *
@@ -173,6 +183,7 @@ struct mlx5_flow_handle *
 mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
                              struct mlx5e_tc_flow *flow,
                              struct mlx5_flow_spec *spec);
+
 void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
                                  struct mlx5e_tc_flow *flow,
                                  struct mlx5_flow_attr *attr);
index 9918ed8..1f8d339 100644 (file)
@@ -488,12 +488,14 @@ static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
                                     int out_index);
 
 void mlx5e_detach_encap(struct mlx5e_priv *priv,
-                       struct mlx5e_tc_flow *flow, int out_index)
+                       struct mlx5e_tc_flow *flow,
+                       struct mlx5_flow_attr *attr,
+                       int out_index)
 {
        struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 
-       if (flow->attr->esw_attr->dests[out_index].flags &
+       if (attr->esw_attr->dests[out_index].flags &
            MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
                mlx5e_detach_encap_route(priv, flow, out_index);
 
@@ -733,6 +735,7 @@ static unsigned int mlx5e_route_tbl_get_last_update(struct mlx5e_priv *priv)
 
 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
                                    struct mlx5e_tc_flow *flow,
+                                   struct mlx5_flow_attr *attr,
                                    struct mlx5e_encap_entry *e,
                                    bool new_encap_entry,
                                    unsigned long tbl_time_before,
@@ -740,6 +743,7 @@ static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
 
 int mlx5e_attach_encap(struct mlx5e_priv *priv,
                       struct mlx5e_tc_flow *flow,
+                      struct mlx5_flow_attr *attr,
                       struct net_device *mirred_dev,
                       int out_index,
                       struct netlink_ext_ack *extack,
@@ -748,7 +752,6 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv,
 {
        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
        struct mlx5e_tc_flow_parse_attr *parse_attr;
-       struct mlx5_flow_attr *attr = flow->attr;
        const struct ip_tunnel_info *tun_info;
        unsigned long tbl_time_before = 0;
        struct mlx5e_encap_entry *e;
@@ -834,8 +837,8 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv,
        e->compl_result = 1;
 
 attach_flow:
-       err = mlx5e_attach_encap_route(priv, flow, e, entry_created, tbl_time_before,
-                                      out_index);
+       err = mlx5e_attach_encap_route(priv, flow, attr, e, entry_created,
+                                      tbl_time_before, out_index);
        if (err)
                goto out_err;
 
@@ -1198,6 +1201,7 @@ out:
 
 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
                                    struct mlx5e_tc_flow *flow,
+                                   struct mlx5_flow_attr *attr,
                                    struct mlx5e_encap_entry *e,
                                    bool new_encap_entry,
                                    unsigned long tbl_time_before,
@@ -1206,7 +1210,6 @@ static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
        unsigned long tbl_time_after = tbl_time_before;
        struct mlx5e_tc_flow_parse_attr *parse_attr;
-       struct mlx5_flow_attr *attr = flow->attr;
        const struct ip_tunnel_info *tun_info;
        struct mlx5_esw_flow_attr *esw_attr;
        struct mlx5e_route_entry *r;
@@ -1377,7 +1380,7 @@ static void mlx5e_reoffload_encap(struct mlx5e_priv *priv,
                        continue;
                }
 
-               err = mlx5e_tc_add_flow_mod_hdr(priv, parse_attr, flow);
+               err = mlx5e_tc_add_flow_mod_hdr(priv, flow, attr);
                if (err) {
                        mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d",
                                       err);
index 3391504..d542b84 100644 (file)
@@ -7,15 +7,19 @@
 #include "tc_priv.h"
 
 void mlx5e_detach_encap(struct mlx5e_priv *priv,
-                       struct mlx5e_tc_flow *flow, int out_index);
+                       struct mlx5e_tc_flow *flow,
+                       struct mlx5_flow_attr *attr,
+                       int out_index);
 
 int mlx5e_attach_encap(struct mlx5e_priv *priv,
                       struct mlx5e_tc_flow *flow,
+                      struct mlx5_flow_attr *attr,
                       struct net_device *mirred_dev,
                       int out_index,
                       struct netlink_ext_ack *extack,
                       struct net_device **encap_dev,
                       bool *encap_valid);
+
 int mlx5e_attach_decap(struct mlx5e_priv *priv,
                       struct mlx5e_tc_flow *flow,
                       struct netlink_ext_ack *extack);
index 2022fa4..099d4ce 100644 (file)
@@ -295,13 +295,62 @@ mlx5_tc_rule_delete(struct mlx5e_priv *priv,
 
        if (is_mdev_switchdev_mode(priv->mdev)) {
                mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
-
                return;
        }
 
        mlx5e_del_offloaded_nic_rule(priv, rule, attr);
 }
 
+struct mlx5_flow_handle *
+mlx5e_tc_rule_offload(struct mlx5e_priv *priv,
+                     struct mlx5_flow_spec *spec,
+                     struct mlx5_flow_attr *attr)
+{
+       struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+       if (attr->flags & MLX5_ATTR_FLAG_CT) {
+               struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts =
+                       &attr->parse_attr->mod_hdr_acts;
+
+               return mlx5_tc_ct_flow_offload(get_ct_priv(priv),
+                                              spec, attr,
+                                              mod_hdr_acts);
+       }
+
+       if (!is_mdev_switchdev_mode(priv->mdev))
+               return mlx5e_add_offloaded_nic_rule(priv, spec, attr);
+
+       if (attr->flags & MLX5_ATTR_FLAG_SAMPLE)
+               return mlx5e_tc_sample_offload(get_sample_priv(priv), spec, attr);
+
+       return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
+}
+
+void
+mlx5e_tc_rule_unoffload(struct mlx5e_priv *priv,
+                       struct mlx5_flow_handle *rule,
+                       struct mlx5_flow_attr *attr)
+{
+       struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+       if (attr->flags & MLX5_ATTR_FLAG_CT) {
+               mlx5_tc_ct_delete_flow(get_ct_priv(priv), attr);
+               return;
+       }
+
+       if (!is_mdev_switchdev_mode(priv->mdev)) {
+               mlx5e_del_offloaded_nic_rule(priv, rule, attr);
+               return;
+       }
+
+       if (attr->flags & MLX5_ATTR_FLAG_SAMPLE) {
+               mlx5e_tc_sample_unoffload(get_sample_priv(priv), rule, attr);
+               return;
+       }
+
+       mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
+}
+
 int
 mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev,
                          struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
@@ -1039,6 +1088,21 @@ err_ft_get:
 }
 
 static int
+alloc_flow_attr_counter(struct mlx5_core_dev *counter_dev,
+                       struct mlx5_flow_attr *attr)
+
+{
+       struct mlx5_fc *counter;
+
+       counter = mlx5_fc_create(counter_dev, true);
+       if (IS_ERR(counter))
+               return PTR_ERR(counter);
+
+       attr->counter = counter;
+       return 0;
+}
+
+static int
 mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
                      struct mlx5e_tc_flow *flow,
                      struct netlink_ext_ack *extack)
@@ -1046,7 +1110,6 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
        struct mlx5e_tc_flow_parse_attr *parse_attr;
        struct mlx5_flow_attr *attr = flow->attr;
        struct mlx5_core_dev *dev = priv->mdev;
-       struct mlx5_fc *counter;
        int err;
 
        parse_attr = attr->parse_attr;
@@ -1058,11 +1121,9 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
        }
 
        if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
-               counter = mlx5_fc_create(dev, true);
-               if (IS_ERR(counter))
-                       return PTR_ERR(counter);
-
-               attr->counter = counter;
+               err = alloc_flow_attr_counter(dev, attr);
+               if (err)
+                       return err;
        }
 
        if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
@@ -1072,8 +1133,8 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
                        return err;
        }
 
-       if (flow_flag_test(flow, CT))
-               flow->rule[0] = mlx5_tc_ct_flow_offload(get_ct_priv(priv), flow, &parse_attr->spec,
+       if (attr->flags & MLX5_ATTR_FLAG_CT)
+               flow->rule[0] = mlx5_tc_ct_flow_offload(get_ct_priv(priv), &parse_attr->spec,
                                                        attr, &parse_attr->mod_hdr_acts);
        else
                flow->rule[0] = mlx5e_add_offloaded_nic_rule(priv, &parse_attr->spec,
@@ -1107,8 +1168,8 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
 
        flow_flag_clear(flow, OFFLOADED);
 
-       if (flow_flag_test(flow, CT))
-               mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr);
+       if (attr->flags & MLX5_ATTR_FLAG_CT)
+               mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), attr);
        else if (!IS_ERR_OR_NULL(flow->rule[0]))
                mlx5e_del_offloaded_nic_rule(priv, flow->rule[0], attr);
 
@@ -1142,40 +1203,27 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
                           struct mlx5_flow_spec *spec,
                           struct mlx5_flow_attr *attr)
 {
-       struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts;
        struct mlx5_flow_handle *rule;
 
-       if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH)
+       if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH)
                return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
 
-       if (flow_flag_test(flow, CT)) {
-               mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
-
-               rule = mlx5_tc_ct_flow_offload(get_ct_priv(flow->priv),
-                                              flow, spec, attr,
-                                              mod_hdr_acts);
-       } else if (flow_flag_test(flow, SAMPLE)) {
-               rule = mlx5e_tc_sample_offload(get_sample_priv(flow->priv), spec, attr,
-                                              mlx5e_tc_get_flow_tun_id(flow));
-       } else {
-               rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
-       }
+       rule = mlx5e_tc_rule_offload(flow->priv, spec, attr);
 
        if (IS_ERR(rule))
                return rule;
 
        if (attr->esw_attr->split_count) {
                flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr);
-               if (IS_ERR(flow->rule[1])) {
-                       if (flow_flag_test(flow, CT))
-                               mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr);
-                       else
-                               mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
-                       return flow->rule[1];
-               }
+               if (IS_ERR(flow->rule[1]))
+                       goto err_rule1;
        }
 
        return rule;
+
+err_rule1:
+       mlx5e_tc_rule_unoffload(flow->priv, rule, attr);
+       return flow->rule[1];
 }
 
 void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
@@ -1184,19 +1232,13 @@ void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
 {
        flow_flag_clear(flow, OFFLOADED);
 
-       if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH)
-               goto offload_rule_0;
+       if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH)
+               return mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
 
        if (attr->esw_attr->split_count)
                mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr);
 
-       if (flow_flag_test(flow, CT))
-               mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr);
-       else if (flow_flag_test(flow, SAMPLE))
-               mlx5e_tc_sample_unoffload(get_sample_priv(flow->priv), flow->rule[0], attr);
-       else
-offload_rule_0:
-               mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
+       mlx5e_tc_rule_unoffload(flow->priv, flow->rule[0], attr);
 }
 
 struct mlx5_flow_handle *
@@ -1214,7 +1256,7 @@ mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
        memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ);
        slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
        slow_attr->esw_attr->split_count = 0;
-       slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
+       slow_attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH;
 
        rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr);
        if (!IS_ERR(rule))
@@ -1239,7 +1281,7 @@ void mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
        memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ);
        slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
        slow_attr->esw_attr->split_count = 0;
-       slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
+       slow_attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH;
        mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr);
        flow_flag_clear(flow, SLOW);
        kfree(slow_attr);
@@ -1348,10 +1390,10 @@ int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *ro
 }
 
 int mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv *priv,
-                             struct mlx5e_tc_flow_parse_attr *parse_attr,
-                             struct mlx5e_tc_flow *flow)
+                             struct mlx5e_tc_flow *flow,
+                             struct mlx5_flow_attr *attr)
 {
-       struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts = &parse_attr->mod_hdr_acts;
+       struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
        struct mlx5_modify_hdr *mod_hdr;
 
        mod_hdr = mlx5_modify_header_alloc(priv->mdev,
@@ -1361,13 +1403,101 @@ int mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv *priv,
        if (IS_ERR(mod_hdr))
                return PTR_ERR(mod_hdr);
 
-       WARN_ON(flow->attr->modify_hdr);
-       flow->attr->modify_hdr = mod_hdr;
+       WARN_ON(attr->modify_hdr);
+       attr->modify_hdr = mod_hdr;
 
        return 0;
 }
 
 static int
+set_encap_dests(struct mlx5e_priv *priv,
+               struct mlx5e_tc_flow *flow,
+               struct mlx5_flow_attr *attr,
+               struct netlink_ext_ack *extack,
+               bool *encap_valid,
+               bool *vf_tun)
+{
+       struct mlx5e_tc_flow_parse_attr *parse_attr;
+       struct mlx5_esw_flow_attr *esw_attr;
+       struct net_device *encap_dev = NULL;
+       struct mlx5e_rep_priv *rpriv;
+       struct mlx5e_priv *out_priv;
+       int out_index;
+       int err = 0;
+
+       parse_attr = attr->parse_attr;
+       esw_attr = attr->esw_attr;
+       *vf_tun = false;
+       *encap_valid = true;
+
+       for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
+               struct net_device *out_dev;
+               int mirred_ifindex;
+
+               if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
+                       continue;
+
+               mirred_ifindex = parse_attr->mirred_ifindex[out_index];
+               out_dev = dev_get_by_index(dev_net(priv->netdev), mirred_ifindex);
+               if (!out_dev) {
+                       NL_SET_ERR_MSG_MOD(extack, "Requested mirred device not found");
+                       err = -ENODEV;
+                       goto out;
+               }
+               err = mlx5e_attach_encap(priv, flow, attr, out_dev, out_index,
+                                        extack, &encap_dev, encap_valid);
+               dev_put(out_dev);
+               if (err)
+                       goto out;
+
+               if (esw_attr->dests[out_index].flags &
+                   MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE &&
+                   !esw_attr->dest_int_port)
+                       *vf_tun = true;
+
+               out_priv = netdev_priv(encap_dev);
+               rpriv = out_priv->ppriv;
+               esw_attr->dests[out_index].rep = rpriv->rep;
+               esw_attr->dests[out_index].mdev = out_priv->mdev;
+       }
+
+       if (*vf_tun && esw_attr->out_count > 1) {
+               NL_SET_ERR_MSG_MOD(extack, "VF tunnel encap with mirroring is not supported");
+               err = -EOPNOTSUPP;
+               goto out;
+       }
+
+out:
+       return err;
+}
+
+static void
+clean_encap_dests(struct mlx5e_priv *priv,
+                 struct mlx5e_tc_flow *flow,
+                 struct mlx5_flow_attr *attr,
+                 bool *vf_tun)
+{
+       struct mlx5_esw_flow_attr *esw_attr;
+       int out_index;
+
+       esw_attr = attr->esw_attr;
+       *vf_tun = false;
+
+       for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
+               if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
+                       continue;
+
+               if (esw_attr->dests[out_index].flags &
+                   MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE &&
+                   !esw_attr->dest_int_port)
+                       *vf_tun = true;
+
+               mlx5e_detach_encap(priv, flow, attr, out_index);
+               kfree(attr->parse_attr->tun_info[out_index]);
+       }
+}
+
+static int
 mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
                      struct mlx5e_tc_flow *flow,
                      struct netlink_ext_ack *extack)
@@ -1375,15 +1505,10 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
        struct mlx5e_tc_flow_parse_attr *parse_attr;
        struct mlx5_flow_attr *attr = flow->attr;
-       bool vf_tun = false, encap_valid = true;
-       struct net_device *encap_dev = NULL;
        struct mlx5_esw_flow_attr *esw_attr;
-       struct mlx5e_rep_priv *rpriv;
-       struct mlx5e_priv *out_priv;
-       struct mlx5_fc *counter;
+       bool vf_tun, encap_valid;
        u32 max_prio, max_chain;
        int err = 0;
-       int out_index;
 
        parse_attr = attr->parse_attr;
        esw_attr = attr->esw_attr;
@@ -1472,50 +1597,17 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
                esw_attr->int_port = int_port;
        }
 
-       for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
-               struct net_device *out_dev;
-               int mirred_ifindex;
-
-               if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
-                       continue;
-
-               mirred_ifindex = parse_attr->mirred_ifindex[out_index];
-               out_dev = dev_get_by_index(dev_net(priv->netdev), mirred_ifindex);
-               if (!out_dev) {
-                       NL_SET_ERR_MSG_MOD(extack, "Requested mirred device not found");
-                       err = -ENODEV;
-                       goto err_out;
-               }
-               err = mlx5e_attach_encap(priv, flow, out_dev, out_index,
-                                        extack, &encap_dev, &encap_valid);
-               dev_put(out_dev);
-               if (err)
-                       goto err_out;
-
-               if (esw_attr->dests[out_index].flags &
-                   MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE &&
-                   !esw_attr->dest_int_port)
-                       vf_tun = true;
-               out_priv = netdev_priv(encap_dev);
-               rpriv = out_priv->ppriv;
-               esw_attr->dests[out_index].rep = rpriv->rep;
-               esw_attr->dests[out_index].mdev = out_priv->mdev;
-       }
-
-       if (vf_tun && esw_attr->out_count > 1) {
-               NL_SET_ERR_MSG_MOD(extack, "VF tunnel encap with mirroring is not supported");
-               err = -EOPNOTSUPP;
+       err = set_encap_dests(priv, flow, attr, extack, &encap_valid, &vf_tun);
+       if (err)
                goto err_out;
-       }
 
        err = mlx5_eswitch_add_vlan_action(esw, attr);
        if (err)
                goto err_out;
 
-       if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
-           !(attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR)) {
+       if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
                if (vf_tun) {
-                       err = mlx5e_tc_add_flow_mod_hdr(priv, parse_attr, flow);
+                       err = mlx5e_tc_add_flow_mod_hdr(priv, flow, attr);
                        if (err)
                                goto err_out;
                } else {
@@ -1526,13 +1618,9 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
        }
 
        if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
-               counter = mlx5_fc_create(esw_attr->counter_dev, true);
-               if (IS_ERR(counter)) {
-                       err = PTR_ERR(counter);
+               err = alloc_flow_attr_counter(esw_attr->counter_dev, attr);
+               if (err)
                        goto err_out;
-               }
-
-               attr->counter = counter;
        }
 
        /* we get here if one of the following takes place:
@@ -1576,8 +1664,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
        struct mlx5_flow_attr *attr = flow->attr;
        struct mlx5_esw_flow_attr *esw_attr;
-       bool vf_tun = false;
-       int out_index;
+       bool vf_tun;
 
        esw_attr = attr->esw_attr;
        mlx5e_put_flow_tunnel_id(flow);
@@ -1601,16 +1688,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
        if (flow->decap_route)
                mlx5e_detach_decap_route(priv, flow);
 
-       for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
-               if (esw_attr->dests[out_index].flags &
-                   MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE &&
-                   !esw_attr->dest_int_port)
-                       vf_tun = true;
-               if (esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) {
-                       mlx5e_detach_encap(priv, flow, out_index);
-                       kfree(attr->parse_attr->tun_info[out_index]);
-               }
-       }
+       clean_encap_dests(priv, flow, attr, &vf_tun);
 
        mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr);
 
@@ -1634,7 +1712,6 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
        if (flow_flag_test(flow, L3_TO_L2_DECAP))
                mlx5e_detach_decap(priv, flow);
 
-       kfree(attr->sample_attr);
        kvfree(attr->esw_attr->rx_tun_attr);
        kvfree(attr->parse_attr);
        kfree(flow->attr);
@@ -1854,7 +1931,7 @@ static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv *priv,
                attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
        }
 
-       flow->tunnel_id = value;
+       flow->attr->tunnel_id = value;
        return 0;
 
 err_set:
@@ -1868,8 +1945,8 @@ err_enc_opts:
 
 static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow)
 {
-       u32 enc_opts_id = flow->tunnel_id & ENC_OPTS_BITS_MASK;
-       u32 tun_id = flow->tunnel_id >> ENC_OPTS_BITS;
+       u32 enc_opts_id = flow->attr->tunnel_id & ENC_OPTS_BITS_MASK;
+       u32 tun_id = flow->attr->tunnel_id >> ENC_OPTS_BITS;
        struct mlx5_rep_uplink_priv *uplink_priv;
        struct mlx5e_rep_priv *uplink_rpriv;
        struct mlx5_eswitch *esw;
@@ -1885,11 +1962,6 @@ static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow)
                               enc_opts_id);
 }
 
-u32 mlx5e_tc_get_flow_tun_id(struct mlx5e_tc_flow *flow)
-{
-       return flow->tunnel_id;
-}
-
 void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev,
                            struct flow_match_basic *match, bool outer,
                            void *headers_c, void *headers_v)
@@ -2811,14 +2883,15 @@ static unsigned long mask_to_le(unsigned long mask, int size)
 
        return mask;
 }
+
 static int offload_pedit_fields(struct mlx5e_priv *priv,
                                int namespace,
-                               struct pedit_headers_action *hdrs,
                                struct mlx5e_tc_flow_parse_attr *parse_attr,
                                u32 *action_flags,
                                struct netlink_ext_ack *extack)
 {
        struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
+       struct pedit_headers_action *hdrs = parse_attr->hdrs;
        void *headers_c, *headers_v, *action, *vals_p;
        u32 *s_masks_p, *a_masks_p, s_mask, a_mask;
        struct mlx5e_tc_mod_hdr_acts *mod_acts;
@@ -2944,35 +3017,43 @@ static int offload_pedit_fields(struct mlx5e_priv *priv,
 
 static const struct pedit_headers zero_masks = {};
 
-static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace,
-                                struct mlx5e_tc_flow_parse_attr *parse_attr,
-                                struct pedit_headers_action *hdrs,
-                                u32 *action_flags,
-                                struct netlink_ext_ack *extack)
+static int verify_offload_pedit_fields(struct mlx5e_priv *priv,
+                                      struct mlx5e_tc_flow_parse_attr *parse_attr,
+                                      struct netlink_ext_ack *extack)
 {
        struct pedit_headers *cmd_masks;
-       int err;
        u8 cmd;
 
-       err = offload_pedit_fields(priv, namespace, hdrs, parse_attr,
-                                  action_flags, extack);
-       if (err < 0)
-               goto out_dealloc_parsed_actions;
-
        for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) {
-               cmd_masks = &hdrs[cmd].masks;
+               cmd_masks = &parse_attr->hdrs[cmd].masks;
                if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) {
-                       NL_SET_ERR_MSG_MOD(extack,
-                                          "attempt to offload an unsupported field");
+                       NL_SET_ERR_MSG_MOD(extack, "attempt to offload an unsupported field");
                        netdev_warn(priv->netdev, "attempt to offload an unsupported field (cmd %d)\n", cmd);
                        print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS,
                                       16, 1, cmd_masks, sizeof(zero_masks), true);
-                       err = -EOPNOTSUPP;
-                       goto out_dealloc_parsed_actions;
+                       return -EOPNOTSUPP;
                }
        }
 
        return 0;
+}
+
+static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace,
+                                struct mlx5e_tc_flow_parse_attr *parse_attr,
+                                u32 *action_flags,
+                                struct netlink_ext_ack *extack)
+{
+       int err;
+
+       err = offload_pedit_fields(priv, namespace, parse_attr, action_flags, extack);
+       if (err)
+               goto out_dealloc_parsed_actions;
+
+       err = verify_offload_pedit_fields(priv, parse_attr, extack);
+       if (err)
+               goto out_dealloc_parsed_actions;
+
+       return 0;
 
 out_dealloc_parsed_actions:
        mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts);
@@ -3257,7 +3338,7 @@ parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state,
                        return -EOPNOTSUPP;
                }
 
-               if (!tc_act->can_offload(parse_state, act, i))
+               if (!tc_act->can_offload(parse_state, act, i, attr))
                        return -EOPNOTSUPP;
 
                err = tc_act->parse_action(parse_state, act, priv, attr);
@@ -3268,7 +3349,7 @@ parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state,
        flow_action_for_each(i, act, flow_action) {
                tc_act = mlx5e_tc_act_get(act->id, ns_type);
                if (!tc_act || !tc_act->post_parse ||
-                   !tc_act->can_offload(parse_state, act, i))
+                   !tc_act->can_offload(parse_state, act, i, attr))
                        continue;
 
                err = tc_act->post_parse(parse_state, priv, attr);
@@ -3283,10 +3364,10 @@ static int
 actions_prepare_mod_hdr_actions(struct mlx5e_priv *priv,
                                struct mlx5e_tc_flow *flow,
                                struct mlx5_flow_attr *attr,
-                               struct pedit_headers_action *hdrs,
                                struct netlink_ext_ack *extack)
 {
        struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
+       struct pedit_headers_action *hdrs = parse_attr->hdrs;
        enum mlx5_flow_namespace_type ns_type;
        int err;
 
@@ -3296,8 +3377,7 @@ actions_prepare_mod_hdr_actions(struct mlx5e_priv *priv,
 
        ns_type = mlx5e_get_flow_namespace(flow);
 
-       err = alloc_tc_pedit_action(priv, ns_type, parse_attr, hdrs,
-                                   &attr->action, extack);
+       err = alloc_tc_pedit_action(priv, ns_type, parse_attr, &attr->action, extack);
        if (err)
                return err;
 
@@ -3345,7 +3425,6 @@ parse_tc_nic_actions(struct mlx5e_priv *priv,
        struct mlx5e_tc_act_parse_state *parse_state;
        struct mlx5e_tc_flow_parse_attr *parse_attr;
        struct mlx5_flow_attr *attr = flow->attr;
-       struct pedit_headers_action *hdrs;
        int err;
 
        err = flow_action_supported(flow_action, extack);
@@ -3357,13 +3436,12 @@ parse_tc_nic_actions(struct mlx5e_priv *priv,
        parse_state = &parse_attr->parse_state;
        mlx5e_tc_act_init_parse_state(parse_state, flow, flow_action, extack);
        parse_state->ct_priv = get_ct_priv(priv);
-       hdrs = parse_state->hdrs;
 
        err = parse_tc_actions(parse_state, flow_action);
        if (err)
                return err;
 
-       err = actions_prepare_mod_hdr_actions(priv, flow, attr, hdrs, extack);
+       err = actions_prepare_mod_hdr_actions(priv, flow, attr, extack);
        if (err)
                return err;
 
@@ -3468,7 +3546,6 @@ parse_tc_fdb_actions(struct mlx5e_priv *priv,
        struct mlx5e_tc_flow_parse_attr *parse_attr;
        struct mlx5_flow_attr *attr = flow->attr;
        struct mlx5_esw_flow_attr *esw_attr;
-       struct pedit_headers_action *hdrs;
        int err;
 
        err = flow_action_supported(flow_action, extack);
@@ -3480,7 +3557,6 @@ parse_tc_fdb_actions(struct mlx5e_priv *priv,
        parse_state = &parse_attr->parse_state;
        mlx5e_tc_act_init_parse_state(parse_state, flow, flow_action, extack);
        parse_state->ct_priv = get_ct_priv(priv);
-       hdrs = parse_state->hdrs;
 
        err = parse_tc_actions(parse_state, flow_action);
        if (err)
@@ -3494,7 +3570,7 @@ parse_tc_fdb_actions(struct mlx5e_priv *priv,
                return -EOPNOTSUPP;
        }
 
-       err = actions_prepare_mod_hdr_actions(priv, flow, attr, hdrs, extack);
+       err = actions_prepare_mod_hdr_actions(priv, flow, attr, extack);
        if (err)
                return err;
 
index 5ffae9b..c622172 100644 (file)
@@ -71,7 +71,7 @@ struct mlx5_flow_attr {
        struct mlx5_fc *counter;
        struct mlx5_modify_hdr *modify_hdr;
        struct mlx5_ct_attr ct_attr;
-       struct mlx5e_sample_attr *sample_attr;
+       struct mlx5e_sample_attr sample_attr;
        struct mlx5e_tc_flow_parse_attr *parse_attr;
        u32 chain;
        u16 prio;
@@ -82,6 +82,7 @@ struct mlx5_flow_attr {
        u8 outer_match_level;
        u8 ip_version;
        u8 tun_ip_version;
+       int tunnel_id; /* mapped tunnel id */
        u32 flags;
        union {
                struct mlx5_esw_flow_attr esw_attr[0];
@@ -89,6 +90,23 @@ struct mlx5_flow_attr {
        };
 };
 
+enum {
+       MLX5_ATTR_FLAG_VLAN_HANDLED  = BIT(0),
+       MLX5_ATTR_FLAG_SLOW_PATH     = BIT(1),
+       MLX5_ATTR_FLAG_NO_IN_PORT    = BIT(2),
+       MLX5_ATTR_FLAG_SRC_REWRITE   = BIT(3),
+       MLX5_ATTR_FLAG_SAMPLE        = BIT(4),
+       MLX5_ATTR_FLAG_ACCEPT        = BIT(5),
+       MLX5_ATTR_FLAG_CT            = BIT(6),
+};
+
+/* Returns true if any of the flags that require skipping further TC/NF processing are set. */
+static inline bool
+mlx5e_tc_attr_flags_skip(u32 attr_flags)
+{
+       return attr_flags & (MLX5_ATTR_FLAG_SLOW_PATH | MLX5_ATTR_FLAG_ACCEPT);
+}
+
 struct mlx5_rx_tun_attr {
        u16 decap_vport;
        union {
@@ -243,11 +261,8 @@ int mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev *mdev,
                                         u32 data);
 
 int mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv *priv,
-                             struct mlx5e_tc_flow_parse_attr *parse_attr,
-                             struct mlx5e_tc_flow *flow);
-
-struct mlx5e_tc_flow;
-u32 mlx5e_tc_get_flow_tun_id(struct mlx5e_tc_flow *flow);
+                             struct mlx5e_tc_flow *flow,
+                             struct mlx5_flow_attr *attr);
 
 void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev,
                            struct flow_match_basic *match, bool outer,
index c275fe0..0abef71 100644 (file)
@@ -86,7 +86,7 @@ mlx5_esw_indir_table_needed(struct mlx5_eswitch *esw,
                mlx5_eswitch_is_vf_vport(esw, vport_num) &&
                esw->dev == dest_mdev &&
                attr->ip_version &&
-               attr->flags & MLX5_ESW_ATTR_FLAG_SRC_REWRITE;
+               attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE;
 }
 
 u16
index ead5e8a..44321cd 100644 (file)
@@ -448,22 +448,6 @@ enum {
        MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE  = BIT(2),
 };
 
-enum {
-       MLX5_ESW_ATTR_FLAG_VLAN_HANDLED  = BIT(0),
-       MLX5_ESW_ATTR_FLAG_SLOW_PATH     = BIT(1),
-       MLX5_ESW_ATTR_FLAG_NO_IN_PORT    = BIT(2),
-       MLX5_ESW_ATTR_FLAG_SRC_REWRITE   = BIT(3),
-       MLX5_ESW_ATTR_FLAG_SAMPLE        = BIT(4),
-       MLX5_ESW_ATTR_FLAG_ACCEPT        = BIT(5),
-};
-
-/* Returns true if any of the flags that require skipping further TC/NF processing are set. */
-static inline bool
-mlx5_esw_attr_flags_skip(u32 attr_flags)
-{
-       return attr_flags & (MLX5_ESW_ATTR_FLAG_SLOW_PATH | MLX5_ESW_ATTR_FLAG_ACCEPT);
-}
-
 struct mlx5_esw_flow_attr {
        struct mlx5_eswitch_rep *in_rep;
        struct mlx5_core_dev    *in_mdev;
index 9a7b256..2b31d8b 100644 (file)
@@ -180,7 +180,7 @@ esw_setup_decap_indir(struct mlx5_eswitch *esw,
 {
        struct mlx5_flow_table *ft;
 
-       if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SRC_REWRITE))
+       if (!(attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE))
                return -EOPNOTSUPP;
 
        ft = mlx5_esw_indir_table_get(esw, attr, spec,
@@ -201,12 +201,12 @@ esw_cleanup_decap_indir(struct mlx5_eswitch *esw,
 static int
 esw_setup_sampler_dest(struct mlx5_flow_destination *dest,
                       struct mlx5_flow_act *flow_act,
-                      struct mlx5_flow_attr *attr,
+                      u32 sampler_id,
                       int i)
 {
        flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
        dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER;
-       dest[i].sampler_id = attr->sample_attr->sampler_id;
+       dest[i].sampler_id = sampler_id;
 
        return 0;
 }
@@ -297,7 +297,7 @@ esw_setup_chain_src_port_rewrite(struct mlx5_flow_destination *dest,
        struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
        int err;
 
-       if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SRC_REWRITE))
+       if (!(attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE))
                return -EOPNOTSUPP;
 
        /* flow steering cannot handle more than one dest with the same ft
@@ -364,7 +364,7 @@ esw_setup_indir_table(struct mlx5_flow_destination *dest,
        struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
        int j, err;
 
-       if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SRC_REWRITE))
+       if (!(attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE))
                return -EOPNOTSUPP;
 
        for (j = esw_attr->split_count; j < esw_attr->out_count; j++, (*i)++) {
@@ -463,15 +463,16 @@ esw_setup_dests(struct mlx5_flow_destination *dest,
 
        if (!mlx5_eswitch_termtbl_required(esw, attr, flow_act, spec) &&
            esw_src_port_rewrite_supported(esw))
-               attr->flags |= MLX5_ESW_ATTR_FLAG_SRC_REWRITE;
+               attr->flags |= MLX5_ATTR_FLAG_SRC_REWRITE;
 
-       if (attr->flags & MLX5_ESW_ATTR_FLAG_SAMPLE) {
-               esw_setup_sampler_dest(dest, flow_act, attr, *i);
+       if (attr->flags & MLX5_ATTR_FLAG_SAMPLE &&
+           !(attr->flags & MLX5_ATTR_FLAG_SLOW_PATH)) {
+               esw_setup_sampler_dest(dest, flow_act, attr->sample_attr.sampler_id, *i);
                (*i)++;
        } else if (attr->dest_ft) {
                esw_setup_ft_dest(dest, flow_act, esw, attr, spec, *i);
                (*i)++;
-       } else if (mlx5_esw_attr_flags_skip(attr->flags)) {
+       } else if (mlx5e_tc_attr_flags_skip(attr->flags)) {
                esw_setup_slow_path_dest(dest, flow_act, chains, *i);
                (*i)++;
        } else if (attr->dest_chain) {
@@ -498,7 +499,7 @@ esw_cleanup_dests(struct mlx5_eswitch *esw,
 
        if (attr->dest_ft) {
                esw_cleanup_decap_indir(esw, attr);
-       } else if (!mlx5_esw_attr_flags_skip(attr->flags)) {
+       } else if (!mlx5e_tc_attr_flags_skip(attr->flags)) {
                if (attr->dest_chain)
                        esw_cleanup_chain_dest(chains, attr->dest_chain, 1, 0);
                else if (esw_is_indir_table(esw, attr))
@@ -589,7 +590,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
                else
                        fdb = attr->ft;
 
-               if (!(attr->flags & MLX5_ESW_ATTR_FLAG_NO_IN_PORT))
+               if (!(attr->flags & MLX5_ATTR_FLAG_NO_IN_PORT))
                        mlx5_eswitch_set_rule_source_port(esw, spec, attr,
                                                          esw_attr->in_mdev->priv.eswitch,
                                                          esw_attr->in_rep->vport);
@@ -721,7 +722,7 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw,
 
        mlx5_del_flow_rules(rule);
 
-       if (!mlx5_esw_attr_flags_skip(attr->flags)) {
+       if (!mlx5e_tc_attr_flags_skip(attr->flags)) {
                /* unref the term table */
                for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
                        if (esw_attr->dests[i].termtbl)
@@ -863,7 +864,7 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
        if (err)
                goto unlock;
 
-       attr->flags &= ~MLX5_ESW_ATTR_FLAG_VLAN_HANDLED;
+       attr->flags &= ~MLX5_ATTR_FLAG_VLAN_HANDLED;
 
        vport = esw_vlan_action_get_vport(esw_attr, push, pop);
 
@@ -871,7 +872,7 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
                /* tracks VF --> wire rules without vlan push action */
                if (esw_attr->dests[0].rep->vport == MLX5_VPORT_UPLINK) {
                        vport->vlan_refcount++;
-                       attr->flags |= MLX5_ESW_ATTR_FLAG_VLAN_HANDLED;
+                       attr->flags |= MLX5_ATTR_FLAG_VLAN_HANDLED;
                }
 
                goto unlock;
@@ -902,7 +903,7 @@ skip_set_push:
        }
 out:
        if (!err)
-               attr->flags |= MLX5_ESW_ATTR_FLAG_VLAN_HANDLED;
+               attr->flags |= MLX5_ATTR_FLAG_VLAN_HANDLED;
 unlock:
        mutex_unlock(&esw->state_lock);
        return err;
@@ -921,7 +922,7 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
        if (mlx5_eswitch_vlan_actions_supported(esw->dev, 1))
                return 0;
 
-       if (!(attr->flags & MLX5_ESW_ATTR_FLAG_VLAN_HANDLED))
+       if (!(attr->flags & MLX5_ATTR_FLAG_VLAN_HANDLED))
                return 0;
 
        push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH);
index 182306b..ee568bf 100644 (file)
@@ -219,12 +219,14 @@ mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw,
 
        if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, termination_table) ||
            !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level) ||
-           mlx5_esw_attr_flags_skip(attr->flags) ||
+           mlx5e_tc_attr_flags_skip(attr->flags) ||
            (!mlx5_eswitch_offload_is_uplink_port(esw, spec) && !esw_attr->int_port))
                return false;
 
        /* push vlan on RX */
-       if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH)
+       if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH &&
+           !(mlx5_fs_get_capabilities(esw->dev, MLX5_FLOW_NAMESPACE_FDB) &
+             MLX5_FLOW_STEERING_CAP_VLAN_PUSH_ON_RX))
                return true;
 
        /* hairpin */
index dafe341..a0ac17c 100644 (file)
@@ -152,6 +152,12 @@ static int mlx5_cmd_stub_destroy_ns(struct mlx5_flow_root_namespace *ns)
        return 0;
 }
 
+static u32 mlx5_cmd_stub_get_capabilities(struct mlx5_flow_root_namespace *ns,
+                                         enum fs_flow_table_type ft_type)
+{
+       return 0;
+}
+
 static int mlx5_cmd_set_slave_root_fdb(struct mlx5_core_dev *master,
                                       struct mlx5_core_dev *slave,
                                       bool ft_id_valid,
@@ -971,6 +977,12 @@ static int mlx5_cmd_create_match_definer(struct mlx5_flow_root_namespace *ns,
        return err ? err : MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
 }
 
+static u32 mlx5_cmd_get_capabilities(struct mlx5_flow_root_namespace *ns,
+                                    enum fs_flow_table_type ft_type)
+{
+       return 0;
+}
+
 static const struct mlx5_flow_cmds mlx5_flow_cmds = {
        .create_flow_table = mlx5_cmd_create_flow_table,
        .destroy_flow_table = mlx5_cmd_destroy_flow_table,
@@ -990,6 +1002,7 @@ static const struct mlx5_flow_cmds mlx5_flow_cmds = {
        .set_peer = mlx5_cmd_stub_set_peer,
        .create_ns = mlx5_cmd_stub_create_ns,
        .destroy_ns = mlx5_cmd_stub_destroy_ns,
+       .get_capabilities = mlx5_cmd_get_capabilities,
 };
 
 static const struct mlx5_flow_cmds mlx5_flow_cmd_stubs = {
@@ -1011,6 +1024,7 @@ static const struct mlx5_flow_cmds mlx5_flow_cmd_stubs = {
        .set_peer = mlx5_cmd_stub_set_peer,
        .create_ns = mlx5_cmd_stub_create_ns,
        .destroy_ns = mlx5_cmd_stub_destroy_ns,
+       .get_capabilities = mlx5_cmd_stub_get_capabilities,
 };
 
 const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void)
index 220ec63..274004e 100644 (file)
@@ -101,6 +101,9 @@ struct mlx5_flow_cmds {
                                    u16 format_id, u32 *match_mask);
        int (*destroy_match_definer)(struct mlx5_flow_root_namespace *ns,
                                     int definer_id);
+
+       u32 (*get_capabilities)(struct mlx5_flow_root_namespace *ns,
+                               enum fs_flow_table_type ft_type);
 };
 
 int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id);
index b628917..42f878e 100644 (file)
@@ -3040,6 +3040,22 @@ void mlx5_fs_ingress_acls_cleanup(struct mlx5_core_dev *dev)
        steering->esw_ingress_root_ns = NULL;
 }
 
+u32 mlx5_fs_get_capabilities(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type type)
+{
+       struct mlx5_flow_root_namespace *root;
+       struct mlx5_flow_namespace *ns;
+
+       ns = mlx5_get_flow_namespace(dev, type);
+       if (!ns)
+               return 0;
+
+       root = find_root(&ns->node);
+       if (!root)
+               return 0;
+
+       return root->cmds->get_capabilities(root, root->table_type);
+}
+
 static int init_egress_root_ns(struct mlx5_flow_steering *steering)
 {
        int err;
index 5469b08..c488a7c 100644 (file)
@@ -120,6 +120,11 @@ enum mlx5_flow_steering_mode {
        MLX5_FLOW_STEERING_MODE_SMFS
 };
 
+enum mlx5_flow_steering_capabilty {
+       MLX5_FLOW_STEERING_CAP_VLAN_PUSH_ON_RX = 1UL << 0,
+       MLX5_FLOW_STEERING_CAP_VLAN_POP_ON_TX = 1UL << 1,
+};
+
 struct mlx5_flow_steering {
        struct mlx5_core_dev *dev;
        enum   mlx5_flow_steering_mode  mode;
@@ -301,6 +306,8 @@ void mlx5_fs_egress_acls_cleanup(struct mlx5_core_dev *dev);
 int mlx5_fs_ingress_acls_init(struct mlx5_core_dev *dev, int total_vports);
 void mlx5_fs_ingress_acls_cleanup(struct mlx5_core_dev *dev);
 
+u32 mlx5_fs_get_capabilities(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type type);
+
 struct mlx5_flow_root_namespace *find_root(struct fs_node *node);
 
 #define fs_get_obj(v, _node)  {v = container_of((_node), typeof(*v), node); }
index a476da2..033757b 100644 (file)
@@ -735,6 +735,16 @@ static int mlx5_cmd_dr_destroy_ns(struct mlx5_flow_root_namespace *ns)
        return mlx5dr_domain_destroy(ns->fs_dr_domain.dr_domain);
 }
 
+static u32 mlx5_cmd_dr_get_capabilities(struct mlx5_flow_root_namespace *ns,
+                                       enum fs_flow_table_type ft_type)
+{
+       if (ft_type != FS_FT_FDB ||
+           MLX5_CAP_GEN(ns->dev, steering_format_version) != MLX5_STEERING_FORMAT_CONNECTX_6DX)
+               return 0;
+
+       return MLX5_FLOW_STEERING_CAP_VLAN_PUSH_ON_RX | MLX5_FLOW_STEERING_CAP_VLAN_POP_ON_TX;
+}
+
 bool mlx5_fs_dr_is_supported(struct mlx5_core_dev *dev)
 {
        return mlx5dr_is_supported(dev);
@@ -759,6 +769,7 @@ static const struct mlx5_flow_cmds mlx5_flow_cmds_dr = {
        .set_peer = mlx5_cmd_dr_set_peer,
        .create_ns = mlx5_cmd_dr_create_ns,
        .destroy_ns = mlx5_cmd_dr_destroy_ns,
+       .get_capabilities = mlx5_cmd_dr_get_capabilities,
 };
 
 const struct mlx5_flow_cmds *mlx5_fs_cmd_get_dr_cmds(void)
index 866b935..f45df5f 100644 (file)
@@ -212,6 +212,29 @@ struct mlxsw_event_listener_item {
        void *priv;
 };
 
+static const u8 mlxsw_core_trap_groups[] = {
+       MLXSW_REG_HTGT_TRAP_GROUP_EMAD,
+       MLXSW_REG_HTGT_TRAP_GROUP_CORE_EVENT,
+};
+
+static int mlxsw_core_trap_groups_set(struct mlxsw_core *mlxsw_core)
+{
+       char htgt_pl[MLXSW_REG_HTGT_LEN];
+       int err;
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(mlxsw_core_trap_groups); i++) {
+               mlxsw_reg_htgt_pack(htgt_pl, mlxsw_core_trap_groups[i],
+                                   MLXSW_REG_HTGT_INVALID_POLICER,
+                                   MLXSW_REG_HTGT_DEFAULT_PRIORITY,
+                                   MLXSW_REG_HTGT_DEFAULT_TC);
+               err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl);
+               if (err)
+                       return err;
+       }
+       return 0;
+}
+
 /******************
  * EMAD processing
  ******************/
@@ -777,16 +800,10 @@ static int mlxsw_emad_init(struct mlxsw_core *mlxsw_core)
        if (err)
                goto err_trap_register;
 
-       err = mlxsw_core->driver->basic_trap_groups_set(mlxsw_core);
-       if (err)
-               goto err_emad_trap_set;
        mlxsw_core->emad.use_emad = true;
 
        return 0;
 
-err_emad_trap_set:
-       mlxsw_core_trap_unregister(mlxsw_core, &mlxsw_emad_rx_listener,
-                                  mlxsw_core);
 err_trap_register:
        destroy_workqueue(mlxsw_core->emad_wq);
        return err;
@@ -1706,7 +1723,7 @@ static void mlxsw_core_health_listener_func(const struct mlxsw_reg_info *reg,
 }
 
 static const struct mlxsw_listener mlxsw_core_health_listener =
-       MLXSW_EVENTL(mlxsw_core_health_listener_func, MFDE, MFDE);
+       MLXSW_CORE_EVENTL(mlxsw_core_health_listener_func, MFDE);
 
 static int
 mlxsw_core_health_fw_fatal_dump_fatal_cause(const char *mfde_pl,
@@ -2122,6 +2139,10 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
                }
        }
 
+       err = mlxsw_core_trap_groups_set(mlxsw_core);
+       if (err)
+               goto err_trap_groups_set;
+
        err = mlxsw_emad_init(mlxsw_core);
        if (err)
                goto err_emad_init;
@@ -2181,6 +2202,7 @@ err_fw_rev_validate:
 err_register_params:
        mlxsw_emad_fini(mlxsw_core);
 err_emad_init:
+err_trap_groups_set:
        kfree(mlxsw_core->lag.mapping);
 err_alloc_lag_mapping:
        mlxsw_ports_fini(mlxsw_core, reload);
@@ -2540,6 +2562,45 @@ void mlxsw_core_trap_unregister(struct mlxsw_core *mlxsw_core,
 }
 EXPORT_SYMBOL(mlxsw_core_trap_unregister);
 
+int mlxsw_core_traps_register(struct mlxsw_core *mlxsw_core,
+                             const struct mlxsw_listener *listeners,
+                             size_t listeners_count, void *priv)
+{
+       int i, err;
+
+       for (i = 0; i < listeners_count; i++) {
+               err = mlxsw_core_trap_register(mlxsw_core,
+                                              &listeners[i],
+                                              priv);
+               if (err)
+                       goto err_listener_register;
+       }
+       return 0;
+
+err_listener_register:
+       for (i--; i >= 0; i--) {
+               mlxsw_core_trap_unregister(mlxsw_core,
+                                          &listeners[i],
+                                          priv);
+       }
+       return err;
+}
+EXPORT_SYMBOL(mlxsw_core_traps_register);
+
+void mlxsw_core_traps_unregister(struct mlxsw_core *mlxsw_core,
+                                const struct mlxsw_listener *listeners,
+                                size_t listeners_count, void *priv)
+{
+       int i;
+
+       for (i = 0; i < listeners_count; i++) {
+               mlxsw_core_trap_unregister(mlxsw_core,
+                                          &listeners[i],
+                                          priv);
+       }
+}
+EXPORT_SYMBOL(mlxsw_core_traps_unregister);
+
 int mlxsw_core_trap_state_set(struct mlxsw_core *mlxsw_core,
                              const struct mlxsw_listener *listener,
                              bool enabled)
index f30bb86..6d30409 100644 (file)
@@ -163,6 +163,9 @@ struct mlxsw_listener {
                .enabled_on_register = true,                                    \
        }
 
+#define MLXSW_CORE_EVENTL(_func, _trap_id)             \
+       MLXSW_EVENTL(_func, _trap_id, CORE_EVENT)
+
 int mlxsw_core_rx_listener_register(struct mlxsw_core *mlxsw_core,
                                    const struct mlxsw_rx_listener *rxl,
                                    void *priv, bool enabled);
@@ -181,6 +184,12 @@ int mlxsw_core_trap_register(struct mlxsw_core *mlxsw_core,
 void mlxsw_core_trap_unregister(struct mlxsw_core *mlxsw_core,
                                const struct mlxsw_listener *listener,
                                void *priv);
+int mlxsw_core_traps_register(struct mlxsw_core *mlxsw_core,
+                             const struct mlxsw_listener *listeners,
+                             size_t listeners_count, void *priv);
+void mlxsw_core_traps_unregister(struct mlxsw_core *mlxsw_core,
+                                const struct mlxsw_listener *listeners,
+                                size_t listeners_count, void *priv);
 int mlxsw_core_trap_state_set(struct mlxsw_core *mlxsw_core,
                              const struct mlxsw_listener *listener,
                              bool enabled);
@@ -315,7 +324,6 @@ struct mlxsw_driver {
                    const struct mlxsw_bus_info *mlxsw_bus_info,
                    struct netlink_ext_ack *extack);
        void (*fini)(struct mlxsw_core *mlxsw_core);
-       int (*basic_trap_groups_set)(struct mlxsw_core *mlxsw_core);
        int (*port_type_set)(struct mlxsw_core *mlxsw_core, u16 local_port,
                             enum devlink_port_type new_type);
        int (*port_split)(struct mlxsw_core *mlxsw_core, u16 local_port,
index 6dd4ae2..6ea4bf8 100644 (file)
@@ -18,6 +18,7 @@ struct mlxsw_env_module_info {
        int num_ports_mapped;
        int num_ports_up;
        enum ethtool_module_power_mode_policy power_mode_policy;
+       enum mlxsw_reg_pmtm_module_type type;
 };
 
 struct mlxsw_env {
@@ -27,14 +28,47 @@ struct mlxsw_env {
        struct mlxsw_env_module_info module_info[];
 };
 
-static int mlxsw_env_validate_cable_ident(struct mlxsw_core *core, int id,
-                                         bool *qsfp, bool *cmis)
+static int __mlxsw_env_validate_module_type(struct mlxsw_core *core, u8 module)
+{
+       struct mlxsw_env *mlxsw_env = mlxsw_core_env(core);
+       int err;
+
+       switch (mlxsw_env->module_info[module].type) {
+       case MLXSW_REG_PMTM_MODULE_TYPE_TWISTED_PAIR:
+               err = -EINVAL;
+               break;
+       default:
+               err = 0;
+       }
+
+       return err;
+}
+
+static int mlxsw_env_validate_module_type(struct mlxsw_core *core, u8 module)
+{
+       struct mlxsw_env *mlxsw_env = mlxsw_core_env(core);
+       int err;
+
+       mutex_lock(&mlxsw_env->module_info_lock);
+       err = __mlxsw_env_validate_module_type(core, module);
+       mutex_unlock(&mlxsw_env->module_info_lock);
+
+       return err;
+}
+
+static int
+mlxsw_env_validate_cable_ident(struct mlxsw_core *core, int id, bool *qsfp,
+                              bool *cmis)
 {
        char mcia_pl[MLXSW_REG_MCIA_LEN];
        char *eeprom_tmp;
        u8 ident;
        int err;
 
+       err = mlxsw_env_validate_module_type(core, id);
+       if (err)
+               return err;
+
        mlxsw_reg_mcia_pack(mcia_pl, id, 0, MLXSW_REG_MCIA_PAGE0_LO_OFF, 0, 1,
                            MLXSW_REG_MCIA_I2C_ADDR_LOW);
        err = mlxsw_reg_query(core, MLXSW_REG(mcia), mcia_pl);
@@ -206,7 +240,8 @@ int mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, int module,
        return 0;
 }
 
-int mlxsw_env_get_module_info(struct mlxsw_core *mlxsw_core, int module,
+int mlxsw_env_get_module_info(struct net_device *netdev,
+                             struct mlxsw_core *mlxsw_core, int module,
                              struct ethtool_modinfo *modinfo)
 {
        u8 module_info[MLXSW_REG_MCIA_EEPROM_MODULE_INFO_SIZE];
@@ -215,6 +250,13 @@ int mlxsw_env_get_module_info(struct mlxsw_core *mlxsw_core, int module,
        unsigned int read_size;
        int err;
 
+       err = mlxsw_env_validate_module_type(mlxsw_core, module);
+       if (err) {
+               netdev_err(netdev,
+                          "EEPROM is not equipped on port module type");
+               return err;
+       }
+
        err = mlxsw_env_query_module_eeprom(mlxsw_core, module, 0, offset,
                                            module_info, false, &read_size);
        if (err)
@@ -356,6 +398,13 @@ mlxsw_env_get_module_eeprom_by_page(struct mlxsw_core *mlxsw_core, u8 module,
 {
        u32 bytes_read = 0;
        u16 device_addr;
+       int err;
+
+       err = mlxsw_env_validate_module_type(mlxsw_core, module);
+       if (err) {
+               NL_SET_ERR_MSG_MOD(extack, "EEPROM is not equipped on port module type");
+               return err;
+       }
 
        /* Offset cannot be larger than 2 * ETH_MODULE_EEPROM_PAGE_LEN */
        device_addr = page->offset;
@@ -364,7 +413,6 @@ mlxsw_env_get_module_eeprom_by_page(struct mlxsw_core *mlxsw_core, u8 module,
                char mcia_pl[MLXSW_REG_MCIA_LEN];
                char *eeprom_tmp;
                u8 size;
-               int err;
 
                size = min_t(u8, page->length - bytes_read,
                             MLXSW_REG_MCIA_EEPROM_SIZE);
@@ -419,6 +467,12 @@ int mlxsw_env_reset_module(struct net_device *netdev,
 
        mutex_lock(&mlxsw_env->module_info_lock);
 
+       err = __mlxsw_env_validate_module_type(mlxsw_core, module);
+       if (err) {
+               netdev_err(netdev, "Reset module is not supported on port module type\n");
+               goto out;
+       }
+
        if (mlxsw_env->module_info[module].num_ports_up) {
                netdev_err(netdev, "Cannot reset module when ports using it are administratively up\n");
                err = -EINVAL;
@@ -461,6 +515,12 @@ mlxsw_env_get_module_power_mode(struct mlxsw_core *mlxsw_core, u8 module,
 
        mutex_lock(&mlxsw_env->module_info_lock);
 
+       err = __mlxsw_env_validate_module_type(mlxsw_core, module);
+       if (err) {
+               NL_SET_ERR_MSG_MOD(extack, "Power mode is not supported on port module type");
+               goto out;
+       }
+
        params->policy = mlxsw_env->module_info[module].power_mode_policy;
 
        mlxsw_reg_mcion_pack(mcion_pl, module);
@@ -571,6 +631,13 @@ mlxsw_env_set_module_power_mode(struct mlxsw_core *mlxsw_core, u8 module,
 
        mutex_lock(&mlxsw_env->module_info_lock);
 
+       err = __mlxsw_env_validate_module_type(mlxsw_core, module);
+       if (err) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "Power mode set is not supported on port module type");
+               goto out;
+       }
+
        if (mlxsw_env->module_info[module].power_mode_policy == policy)
                goto out;
 
@@ -661,13 +728,12 @@ static int mlxsw_env_temp_event_set(struct mlxsw_core *mlxsw_core,
        return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mtmp), mtmp_pl);
 }
 
-static int mlxsw_env_module_temp_event_enable(struct mlxsw_core *mlxsw_core,
-                                             u8 module_count)
+static int mlxsw_env_module_temp_event_enable(struct mlxsw_core *mlxsw_core)
 {
        int i, err, sensor_index;
        bool has_temp_sensor;
 
-       for (i = 0; i < module_count; i++) {
+       for (i = 0; i < mlxsw_core_env(mlxsw_core)->module_count; i++) {
                err = mlxsw_env_module_has_temp_sensor(mlxsw_core, i,
                                                       &has_temp_sensor);
                if (err)
@@ -759,7 +825,7 @@ mlxsw_env_mtwe_listener_func(const struct mlxsw_reg_info *reg, char *mtwe_pl,
 }
 
 static const struct mlxsw_listener mlxsw_env_temp_warn_listener =
-       MLXSW_EVENTL(mlxsw_env_mtwe_listener_func, MTWE, MTWE);
+       MLXSW_CORE_EVENTL(mlxsw_env_mtwe_listener_func, MTWE);
 
 static int mlxsw_env_temp_warn_event_register(struct mlxsw_core *mlxsw_core)
 {
@@ -849,7 +915,7 @@ mlxsw_env_pmpe_listener_func(const struct mlxsw_reg_info *reg, char *pmpe_pl,
 }
 
 static const struct mlxsw_listener mlxsw_env_module_plug_listener =
-       MLXSW_EVENTL(mlxsw_env_pmpe_listener_func, PMPE, PMPE);
+       MLXSW_CORE_EVENTL(mlxsw_env_pmpe_listener_func, PMPE);
 
 static int
 mlxsw_env_module_plug_event_register(struct mlxsw_core *mlxsw_core)
@@ -876,12 +942,11 @@ mlxsw_env_module_plug_event_unregister(struct mlxsw_env *mlxsw_env)
 }
 
 static int
-mlxsw_env_module_oper_state_event_enable(struct mlxsw_core *mlxsw_core,
-                                        u8 module_count)
+mlxsw_env_module_oper_state_event_enable(struct mlxsw_core *mlxsw_core)
 {
        int i, err;
 
-       for (i = 0; i < module_count; i++) {
+       for (i = 0; i < mlxsw_core_env(mlxsw_core)->module_count; i++) {
                char pmaos_pl[MLXSW_REG_PMAOS_LEN];
 
                mlxsw_reg_pmaos_pack(pmaos_pl, i);
@@ -999,6 +1064,28 @@ out_unlock:
 }
 EXPORT_SYMBOL(mlxsw_env_module_port_down);
 
+static int
+mlxsw_env_module_type_set(struct mlxsw_core *mlxsw_core)
+{
+       struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core);
+       int i;
+
+       for (i = 0; i < mlxsw_env->module_count; i++) {
+               char pmtm_pl[MLXSW_REG_PMTM_LEN];
+               int err;
+
+               mlxsw_reg_pmtm_pack(pmtm_pl, 0, i);
+               err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(pmtm), pmtm_pl);
+               if (err)
+                       return err;
+
+               mlxsw_env->module_info[i].type =
+                       mlxsw_reg_pmtm_module_type_get(pmtm_pl);
+       }
+
+       return 0;
+}
+
 int mlxsw_env_init(struct mlxsw_core *mlxsw_core, struct mlxsw_env **p_env)
 {
        char mgpir_pl[MLXSW_REG_MGPIR_LEN];
@@ -1037,17 +1124,21 @@ int mlxsw_env_init(struct mlxsw_core *mlxsw_core, struct mlxsw_env **p_env)
        if (err)
                goto err_module_plug_event_register;
 
-       err = mlxsw_env_module_oper_state_event_enable(mlxsw_core,
-                                                      env->module_count);
+       err = mlxsw_env_module_oper_state_event_enable(mlxsw_core);
        if (err)
                goto err_oper_state_event_enable;
 
-       err = mlxsw_env_module_temp_event_enable(mlxsw_core, env->module_count);
+       err = mlxsw_env_module_temp_event_enable(mlxsw_core);
        if (err)
                goto err_temp_event_enable;
 
+       err = mlxsw_env_module_type_set(mlxsw_core);
+       if (err)
+               goto err_type_set;
+
        return 0;
 
+err_type_set:
 err_temp_event_enable:
 err_oper_state_event_enable:
        mlxsw_env_module_plug_event_unregister(env);
index da121b1..ec6564e 100644 (file)
@@ -12,7 +12,8 @@ struct ethtool_eeprom;
 int mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, int module,
                                         int off, int *temp);
 
-int mlxsw_env_get_module_info(struct mlxsw_core *mlxsw_core, int module,
+int mlxsw_env_get_module_info(struct net_device *netdev,
+                             struct mlxsw_core *mlxsw_core, int module,
                              struct ethtool_modinfo *modinfo);
 
 int mlxsw_env_get_module_eeprom(struct net_device *netdev,
index 10d13f5..9ac8ce0 100644 (file)
@@ -110,7 +110,8 @@ static int mlxsw_m_get_module_info(struct net_device *netdev,
        struct mlxsw_m_port *mlxsw_m_port = netdev_priv(netdev);
        struct mlxsw_core *core = mlxsw_m_port->mlxsw_m->core;
 
-       return mlxsw_env_get_module_info(core, mlxsw_m_port->module, modinfo);
+       return mlxsw_env_get_module_info(netdev, core, mlxsw_m_port->module,
+                                        modinfo);
 }
 
 static int
index 24cc650..eebd047 100644 (file)
@@ -4482,6 +4482,8 @@ MLXSW_ITEM32(reg, ptys, ext_eth_proto_cap, 0x08, 0, 32);
 #define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4          BIT(21)
 #define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4          BIT(22)
 #define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_LR4_ER4      BIT(23)
+#define MLXSW_REG_PTYS_ETH_SPEED_100BASE_T             BIT(24)
+#define MLXSW_REG_PTYS_ETH_SPEED_1000BASE_T            BIT(25)
 #define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_CR            BIT(27)
 #define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_KR            BIT(28)
 #define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_SR            BIT(29)
@@ -6062,6 +6064,58 @@ static inline void mlxsw_reg_pllp_unpack(char *payload, u8 *label_port,
        *slot_index = mlxsw_reg_pllp_slot_index_get(payload);
 }
 
+/* PMTM - Port Module Type Mapping Register
+ * ----------------------------------------
+ * The PMTM register allows query or configuration of module types.
+ * The register can only be set when the module is disabled by PMAOS register
+ */
+#define MLXSW_REG_PMTM_ID 0x5067
+#define MLXSW_REG_PMTM_LEN 0x10
+
+MLXSW_REG_DEFINE(pmtm, MLXSW_REG_PMTM_ID, MLXSW_REG_PMTM_LEN);
+
+/* reg_pmtm_slot_index
+ * Slot index.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, pmtm, slot_index, 0x00, 24, 4);
+
+/* reg_pmtm_module
+ * Module number.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, pmtm, module, 0x00, 16, 8);
+
+enum mlxsw_reg_pmtm_module_type {
+       MLXSW_REG_PMTM_MODULE_TYPE_BACKPLANE_4_LANES = 0,
+       MLXSW_REG_PMTM_MODULE_TYPE_QSFP = 1,
+       MLXSW_REG_PMTM_MODULE_TYPE_SFP = 2,
+       MLXSW_REG_PMTM_MODULE_TYPE_BACKPLANE_SINGLE_LANE = 4,
+       MLXSW_REG_PMTM_MODULE_TYPE_BACKPLANE_2_LANES = 8,
+       MLXSW_REG_PMTM_MODULE_TYPE_CHIP2CHIP4X = 10,
+       MLXSW_REG_PMTM_MODULE_TYPE_CHIP2CHIP2X = 11,
+       MLXSW_REG_PMTM_MODULE_TYPE_CHIP2CHIP1X = 12,
+       MLXSW_REG_PMTM_MODULE_TYPE_QSFP_DD = 14,
+       MLXSW_REG_PMTM_MODULE_TYPE_OSFP = 15,
+       MLXSW_REG_PMTM_MODULE_TYPE_SFP_DD = 16,
+       MLXSW_REG_PMTM_MODULE_TYPE_DSFP = 17,
+       MLXSW_REG_PMTM_MODULE_TYPE_CHIP2CHIP8X = 18,
+       MLXSW_REG_PMTM_MODULE_TYPE_TWISTED_PAIR = 19,
+};
+
+/* reg_pmtm_module_type
+ * Module type.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pmtm, module_type, 0x04, 0, 5);
+
+static inline void mlxsw_reg_pmtm_pack(char *payload, u8 slot_index, u8 module)
+{
+       MLXSW_REG_ZERO(pmtm, payload);
+       mlxsw_reg_pmtm_slot_index_set(payload, slot_index);
+       mlxsw_reg_pmtm_module_set(payload, module);
+}
+
 /* HTGT - Host Trap Group Table
  * ----------------------------
  * Configures the properties for forwarding to CPU.
@@ -6087,9 +6141,7 @@ MLXSW_ITEM32(reg, htgt, type, 0x00, 8, 4);
 
 enum mlxsw_reg_htgt_trap_group {
        MLXSW_REG_HTGT_TRAP_GROUP_EMAD,
-       MLXSW_REG_HTGT_TRAP_GROUP_MFDE,
-       MLXSW_REG_HTGT_TRAP_GROUP_MTWE,
-       MLXSW_REG_HTGT_TRAP_GROUP_PMPE,
+       MLXSW_REG_HTGT_TRAP_GROUP_CORE_EVENT,
        MLXSW_REG_HTGT_TRAP_GROUP_SP_STP,
        MLXSW_REG_HTGT_TRAP_GROUP_SP_LACP,
        MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP,
@@ -12568,6 +12620,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
        MLXSW_REG(pddr),
        MLXSW_REG(pmmp),
        MLXSW_REG(pllp),
+       MLXSW_REG(pmtm),
        MLXSW_REG(htgt),
        MLXSW_REG(hpkt),
        MLXSW_REG(rgcr),
index c7fc650..daacf62 100644 (file)
@@ -33,6 +33,7 @@ enum mlxsw_res_id {
        MLXSW_RES_ID_ACL_MAX_REGIONS,
        MLXSW_RES_ID_ACL_MAX_GROUPS,
        MLXSW_RES_ID_ACL_MAX_GROUP_SIZE,
+       MLXSW_RES_ID_ACL_MAX_DEFAULT_ACTIONS,
        MLXSW_RES_ID_ACL_FLEX_KEYS,
        MLXSW_RES_ID_ACL_MAX_ACTION_PER_RULE,
        MLXSW_RES_ID_ACL_ACTIONS_PER_SET,
@@ -90,6 +91,7 @@ static u16 mlxsw_res_ids[] = {
        [MLXSW_RES_ID_ACL_MAX_REGIONS] = 0x2903,
        [MLXSW_RES_ID_ACL_MAX_GROUPS] = 0x2904,
        [MLXSW_RES_ID_ACL_MAX_GROUP_SIZE] = 0x2905,
+       [MLXSW_RES_ID_ACL_MAX_DEFAULT_ACTIONS] = 0x2908,
        [MLXSW_RES_ID_ACL_FLEX_KEYS] = 0x2910,
        [MLXSW_RES_ID_ACL_MAX_ACTION_PER_RULE] = 0x2911,
        [MLXSW_RES_ID_ACL_ACTIONS_PER_SET] = 0x2912,
index aa411de..a4b94ee 100644 (file)
@@ -2148,13 +2148,11 @@ static void mlxsw_sp_pude_event_func(const struct mlxsw_reg_info *reg,
        struct mlxsw_sp *mlxsw_sp = priv;
        struct mlxsw_sp_port *mlxsw_sp_port;
        enum mlxsw_reg_pude_oper_status status;
-       unsigned int max_ports;
        u16 local_port;
 
-       max_ports = mlxsw_core_max_ports(mlxsw_sp->core);
        local_port = mlxsw_reg_pude_local_port_get(pude_pl);
 
-       if (WARN_ON_ONCE(!local_port || local_port >= max_ports))
+       if (WARN_ON_ONCE(!mlxsw_sp_local_port_is_valid(mlxsw_sp, local_port)))
                return;
        mlxsw_sp_port = mlxsw_sp->ports[local_port];
        if (!mlxsw_sp_port)
@@ -2393,45 +2391,6 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core)
        return 0;
 }
 
-static int mlxsw_sp_traps_register(struct mlxsw_sp *mlxsw_sp,
-                                  const struct mlxsw_listener listeners[],
-                                  size_t listeners_count)
-{
-       int i;
-       int err;
-
-       for (i = 0; i < listeners_count; i++) {
-               err = mlxsw_core_trap_register(mlxsw_sp->core,
-                                              &listeners[i],
-                                              mlxsw_sp);
-               if (err)
-                       goto err_listener_register;
-
-       }
-       return 0;
-
-err_listener_register:
-       for (i--; i >= 0; i--) {
-               mlxsw_core_trap_unregister(mlxsw_sp->core,
-                                          &listeners[i],
-                                          mlxsw_sp);
-       }
-       return err;
-}
-
-static void mlxsw_sp_traps_unregister(struct mlxsw_sp *mlxsw_sp,
-                                     const struct mlxsw_listener listeners[],
-                                     size_t listeners_count)
-{
-       int i;
-
-       for (i = 0; i < listeners_count; i++) {
-               mlxsw_core_trap_unregister(mlxsw_sp->core,
-                                          &listeners[i],
-                                          mlxsw_sp);
-       }
-}
-
 static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp)
 {
        struct mlxsw_sp_trap *trap;
@@ -2456,21 +2415,23 @@ static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp)
        if (err)
                goto err_trap_groups_set;
 
-       err = mlxsw_sp_traps_register(mlxsw_sp, mlxsw_sp_listener,
-                                     ARRAY_SIZE(mlxsw_sp_listener));
+       err = mlxsw_core_traps_register(mlxsw_sp->core, mlxsw_sp_listener,
+                                       ARRAY_SIZE(mlxsw_sp_listener),
+                                       mlxsw_sp);
        if (err)
                goto err_traps_register;
 
-       err = mlxsw_sp_traps_register(mlxsw_sp, mlxsw_sp->listeners,
-                                     mlxsw_sp->listeners_count);
+       err = mlxsw_core_traps_register(mlxsw_sp->core, mlxsw_sp->listeners,
+                                       mlxsw_sp->listeners_count, mlxsw_sp);
        if (err)
                goto err_extra_traps_init;
 
        return 0;
 
 err_extra_traps_init:
-       mlxsw_sp_traps_unregister(mlxsw_sp, mlxsw_sp_listener,
-                                 ARRAY_SIZE(mlxsw_sp_listener));
+       mlxsw_core_traps_unregister(mlxsw_sp->core, mlxsw_sp_listener,
+                                   ARRAY_SIZE(mlxsw_sp_listener),
+                                   mlxsw_sp);
 err_traps_register:
 err_trap_groups_set:
 err_cpu_policers_set:
@@ -2480,10 +2441,11 @@ err_cpu_policers_set:
 
 static void mlxsw_sp_traps_fini(struct mlxsw_sp *mlxsw_sp)
 {
-       mlxsw_sp_traps_unregister(mlxsw_sp, mlxsw_sp->listeners,
-                                 mlxsw_sp->listeners_count);
-       mlxsw_sp_traps_unregister(mlxsw_sp, mlxsw_sp_listener,
-                                 ARRAY_SIZE(mlxsw_sp_listener));
+       mlxsw_core_traps_unregister(mlxsw_sp->core, mlxsw_sp->listeners,
+                                   mlxsw_sp->listeners_count,
+                                   mlxsw_sp);
+       mlxsw_core_traps_unregister(mlxsw_sp->core, mlxsw_sp_listener,
+                                   ARRAY_SIZE(mlxsw_sp_listener), mlxsw_sp);
        kfree(mlxsw_sp->trap);
 }
 
@@ -2528,42 +2490,6 @@ static void mlxsw_sp_lag_fini(struct mlxsw_sp *mlxsw_sp)
        kfree(mlxsw_sp->lags);
 }
 
-static int mlxsw_sp_basic_trap_groups_set(struct mlxsw_core *mlxsw_core)
-{
-       char htgt_pl[MLXSW_REG_HTGT_LEN];
-       int err;
-
-       mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_EMAD,
-                           MLXSW_REG_HTGT_INVALID_POLICER,
-                           MLXSW_REG_HTGT_DEFAULT_PRIORITY,
-                           MLXSW_REG_HTGT_DEFAULT_TC);
-       err =  mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl);
-       if (err)
-               return err;
-
-       mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_MFDE,
-                           MLXSW_REG_HTGT_INVALID_POLICER,
-                           MLXSW_REG_HTGT_DEFAULT_PRIORITY,
-                           MLXSW_REG_HTGT_DEFAULT_TC);
-       err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl);
-       if (err)
-               return err;
-
-       mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_MTWE,
-                           MLXSW_REG_HTGT_INVALID_POLICER,
-                           MLXSW_REG_HTGT_DEFAULT_PRIORITY,
-                           MLXSW_REG_HTGT_DEFAULT_TC);
-       err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl);
-       if (err)
-               return err;
-
-       mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_PMPE,
-                           MLXSW_REG_HTGT_INVALID_POLICER,
-                           MLXSW_REG_HTGT_DEFAULT_PRIORITY,
-                           MLXSW_REG_HTGT_DEFAULT_TC);
-       return mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl);
-}
-
 static const struct mlxsw_sp_ptp_ops mlxsw_sp1_ptp_ops = {
        .clock_init     = mlxsw_sp1_ptp_clock_init,
        .clock_fini     = mlxsw_sp1_ptp_clock_fini,
@@ -3677,7 +3603,6 @@ static struct mlxsw_driver mlxsw_sp1_driver = {
        .fw_filename                    = MLXSW_SP1_FW_FILENAME,
        .init                           = mlxsw_sp1_init,
        .fini                           = mlxsw_sp_fini,
-       .basic_trap_groups_set          = mlxsw_sp_basic_trap_groups_set,
        .port_split                     = mlxsw_sp_port_split,
        .port_unsplit                   = mlxsw_sp_port_unsplit,
        .sb_pool_get                    = mlxsw_sp_sb_pool_get,
@@ -3717,7 +3642,6 @@ static struct mlxsw_driver mlxsw_sp2_driver = {
        .fw_filename                    = MLXSW_SP2_FW_FILENAME,
        .init                           = mlxsw_sp2_init,
        .fini                           = mlxsw_sp_fini,
-       .basic_trap_groups_set          = mlxsw_sp_basic_trap_groups_set,
        .port_split                     = mlxsw_sp_port_split,
        .port_unsplit                   = mlxsw_sp_port_unsplit,
        .sb_pool_get                    = mlxsw_sp_sb_pool_get,
@@ -3758,7 +3682,6 @@ static struct mlxsw_driver mlxsw_sp3_driver = {
        .fw_filename                    = MLXSW_SP3_FW_FILENAME,
        .init                           = mlxsw_sp3_init,
        .fini                           = mlxsw_sp_fini,
-       .basic_trap_groups_set          = mlxsw_sp_basic_trap_groups_set,
        .port_split                     = mlxsw_sp_port_split,
        .port_unsplit                   = mlxsw_sp_port_unsplit,
        .sb_pool_get                    = mlxsw_sp_sb_pool_get,
@@ -3797,7 +3720,6 @@ static struct mlxsw_driver mlxsw_sp4_driver = {
        .priv_size                      = sizeof(struct mlxsw_sp),
        .init                           = mlxsw_sp4_init,
        .fini                           = mlxsw_sp_fini,
-       .basic_trap_groups_set          = mlxsw_sp_basic_trap_groups_set,
        .port_split                     = mlxsw_sp_port_split,
        .port_unsplit                   = mlxsw_sp_port_unsplit,
        .sb_pool_get                    = mlxsw_sp_sb_pool_get,
index bb2442e..30942b6 100644 (file)
@@ -481,6 +481,13 @@ int
 mlxsw_sp_port_vlan_classification_set(struct mlxsw_sp_port *mlxsw_sp_port,
                                      bool is_8021ad_tagged,
                                      bool is_8021q_tagged);
+static inline bool
+mlxsw_sp_local_port_is_valid(struct mlxsw_sp *mlxsw_sp, u16 local_port)
+{
+       unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sp->core);
+
+       return local_port < max_ports && local_port;
+}
 
 /* spectrum_buffers.c */
 struct mlxsw_sp_hdroom_prio {
index a9fff8a..d20e794 100644 (file)
@@ -213,7 +213,6 @@ mlxsw_sp1_kvdl_part_init(struct mlxsw_sp *mlxsw_sp,
        struct mlxsw_sp1_kvdl_part *part;
        bool need_update = true;
        unsigned int nr_entries;
-       size_t usage_size;
        u64 resource_size;
        int err;
 
@@ -225,8 +224,8 @@ mlxsw_sp1_kvdl_part_init(struct mlxsw_sp *mlxsw_sp,
        }
 
        nr_entries = div_u64(resource_size, info->alloc_size);
-       usage_size = BITS_TO_LONGS(nr_entries) * sizeof(unsigned long);
-       part = kzalloc(sizeof(*part) + usage_size, GFP_KERNEL);
+       part = kzalloc(struct_size(part, usage, BITS_TO_LONGS(nr_entries)),
+                      GFP_KERNEL);
        if (!part)
                return ERR_PTR(-ENOMEM);
 
index ad69913..5b02108 100644 (file)
@@ -77,7 +77,14 @@ static int mlxsw_sp2_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv,
        int i;
        int err;
 
+       /* Some TCAM regions are not exposed to the host and used internally
+        * by the device. Allocate KVDL entries for the default actions of
+        * these regions to avoid the host from overwriting them.
+        */
        tcam->kvdl_count = _tcam->max_regions;
+       if (MLXSW_CORE_RES_VALID(mlxsw_sp->core, ACL_MAX_DEFAULT_ACTIONS))
+               tcam->kvdl_count = MLXSW_CORE_RES_GET(mlxsw_sp->core,
+                                                     ACL_MAX_DEFAULT_ACTIONS);
        err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ACTSET,
                                  tcam->kvdl_count, &tcam->kvdl_index);
        if (err)
@@ -97,7 +104,10 @@ static int mlxsw_sp2_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv,
                goto err_afa_block_continue;
        enc_actions = mlxsw_afa_block_cur_set(afa_block);
 
-       for (i = 0; i < tcam->kvdl_count; i++) {
+       /* Only write to KVDL entries used by TCAM regions exposed to the
+        * host.
+        */
+       for (i = 0; i < _tcam->max_regions; i++) {
                mlxsw_reg_pefa_pack(pefa_pl, tcam->kvdl_index + i,
                                    true, enc_actions);
                err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pefa), pefa_pl);
index 2053071..8b5d7f8 100644 (file)
@@ -1034,13 +1034,10 @@ static int mlxsw_sp_get_module_info(struct net_device *netdev,
 {
        struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(netdev);
        struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
-       int err;
-
-       err = mlxsw_env_get_module_info(mlxsw_sp->core,
-                                       mlxsw_sp_port->mapping.module,
-                                       modinfo);
 
-       return err;
+       return mlxsw_env_get_module_info(netdev, mlxsw_sp->core,
+                                        mlxsw_sp_port->mapping.module,
+                                        modinfo);
 }
 
 static int mlxsw_sp_get_module_eeprom(struct net_device *netdev,
@@ -1048,13 +1045,10 @@ static int mlxsw_sp_get_module_eeprom(struct net_device *netdev,
 {
        struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(netdev);
        struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
-       int err;
-
-       err = mlxsw_env_get_module_eeprom(netdev, mlxsw_sp->core,
-                                         mlxsw_sp_port->mapping.module, ee,
-                                         data);
 
-       return err;
+       return mlxsw_env_get_module_eeprom(netdev, mlxsw_sp->core,
+                                          mlxsw_sp_port->mapping.module, ee,
+                                          data);
 }
 
 static int
@@ -1273,12 +1267,22 @@ struct mlxsw_sp1_port_link_mode {
 
 static const struct mlxsw_sp1_port_link_mode mlxsw_sp1_port_link_mode[] = {
        {
+               .mask           = MLXSW_REG_PTYS_ETH_SPEED_100BASE_T,
+               .mask_ethtool   = ETHTOOL_LINK_MODE_100baseT_Full_BIT,
+               .speed          = SPEED_100,
+       },
+       {
                .mask           = MLXSW_REG_PTYS_ETH_SPEED_SGMII |
                                  MLXSW_REG_PTYS_ETH_SPEED_1000BASE_KX,
                .mask_ethtool   = ETHTOOL_LINK_MODE_1000baseKX_Full_BIT,
                .speed          = SPEED_1000,
        },
        {
+               .mask           = MLXSW_REG_PTYS_ETH_SPEED_1000BASE_T,
+               .mask_ethtool   = ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
+               .speed          = SPEED_1000,
+       },
+       {
                .mask           = MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CX4 |
                                  MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4,
                .mask_ethtool   = ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT,
index 0ff163f..35422e6 100644 (file)
@@ -568,12 +568,11 @@ void mlxsw_sp1_ptp_got_timestamp(struct mlxsw_sp *mlxsw_sp, bool ingress,
                                 u8 domain_number, u16 sequence_id,
                                 u64 timestamp)
 {
-       unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sp->core);
        struct mlxsw_sp_port *mlxsw_sp_port;
        struct mlxsw_sp1_ptp_key key;
        u8 types;
 
-       if (WARN_ON_ONCE(local_port >= max_ports))
+       if (WARN_ON_ONCE(!mlxsw_sp_local_port_is_valid(mlxsw_sp, local_port)))
                return;
        mlxsw_sp_port = mlxsw_sp->ports[local_port];
        if (!mlxsw_sp_port)
index 65c1724..bffdb41 100644 (file)
@@ -2616,7 +2616,6 @@ static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp,
                                            char *sfn_pl, int rec_index,
                                            bool adding)
 {
-       unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sp->core);
        struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
        struct mlxsw_sp_bridge_device *bridge_device;
        struct mlxsw_sp_bridge_port *bridge_port;
@@ -2630,7 +2629,7 @@ static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp,
 
        mlxsw_reg_sfn_mac_unpack(sfn_pl, rec_index, mac, &fid, &local_port);
 
-       if (WARN_ON_ONCE(local_port >= max_ports))
+       if (WARN_ON_ONCE(!mlxsw_sp_local_port_is_valid(mlxsw_sp, local_port)))
                return;
        mlxsw_sp_port = mlxsw_sp->ports[local_port];
        if (!mlxsw_sp_port) {
index 91a755e..5f1e7b8 100644 (file)
@@ -750,7 +750,7 @@ static int lan743x_ethtool_set_eee(struct net_device *netdev,
        }
 
        if (eee->eee_enabled) {
-               ret = phy_init_eee(phydev, 0);
+               ret = phy_init_eee(phydev, false);
                if (ret) {
                        netif_err(adapter, drv, adapter->netdev,
                                  "EEE initialization failed\n");
index 040cfff..a9ffc71 100644 (file)
@@ -7,4 +7,5 @@ obj-$(CONFIG_LAN966X_SWITCH) += lan966x-switch.o
 
 lan966x-switch-objs  := lan966x_main.o lan966x_phylink.o lan966x_port.o \
                        lan966x_mac.o lan966x_ethtool.o lan966x_switchdev.o \
-                       lan966x_vlan.o lan966x_fdb.o lan966x_mdb.o
+                       lan966x_vlan.o lan966x_fdb.o lan966x_mdb.o \
+                       lan966x_ptp.o
index 614f12c..e58a27f 100644 (file)
@@ -545,6 +545,39 @@ static int lan966x_set_pauseparam(struct net_device *dev,
        return phylink_ethtool_set_pauseparam(port->phylink, pause);
 }
 
+static int lan966x_get_ts_info(struct net_device *dev,
+                              struct ethtool_ts_info *info)
+{
+       struct lan966x_port *port = netdev_priv(dev);
+       struct lan966x *lan966x = port->lan966x;
+       struct lan966x_phc *phc;
+
+       if (!lan966x->ptp)
+               return ethtool_op_get_ts_info(dev, info);
+
+       phc = &lan966x->phc[LAN966X_PHC_PORT];
+
+       info->phc_index = phc->clock ? ptp_clock_index(phc->clock) : -1;
+       if (info->phc_index == -1) {
+               info->so_timestamping |= SOF_TIMESTAMPING_TX_SOFTWARE |
+                                        SOF_TIMESTAMPING_RX_SOFTWARE |
+                                        SOF_TIMESTAMPING_SOFTWARE;
+               return 0;
+       }
+       info->so_timestamping |= SOF_TIMESTAMPING_TX_SOFTWARE |
+                                SOF_TIMESTAMPING_RX_SOFTWARE |
+                                SOF_TIMESTAMPING_SOFTWARE |
+                                SOF_TIMESTAMPING_TX_HARDWARE |
+                                SOF_TIMESTAMPING_RX_HARDWARE |
+                                SOF_TIMESTAMPING_RAW_HARDWARE;
+       info->tx_types = BIT(HWTSTAMP_TX_OFF) | BIT(HWTSTAMP_TX_ON) |
+                        BIT(HWTSTAMP_TX_ONESTEP_SYNC);
+       info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) |
+                          BIT(HWTSTAMP_FILTER_ALL);
+
+       return 0;
+}
+
 const struct ethtool_ops lan966x_ethtool_ops = {
        .get_link_ksettings     = lan966x_get_link_ksettings,
        .set_link_ksettings     = lan966x_set_link_ksettings,
@@ -556,6 +589,7 @@ const struct ethtool_ops lan966x_ethtool_ops = {
        .get_eth_mac_stats      = lan966x_get_eth_mac_stats,
        .get_rmon_stats         = lan966x_get_eth_rmon_stats,
        .get_link               = ethtool_op_get_link,
+       .get_ts_info            = lan966x_get_ts_info,
 };
 
 static void lan966x_check_stats_work(struct work_struct *work)
index 1f60fd1..e62758b 100644 (file)
@@ -44,6 +44,7 @@ static const struct lan966x_main_io_resource lan966x_main_iomap[] =  {
        { TARGET_ORG,                         0, 1 }, /* 0xe2000000 */
        { TARGET_GCB,                    0x4000, 1 }, /* 0xe2004000 */
        { TARGET_QS,                     0x8000, 1 }, /* 0xe2008000 */
+       { TARGET_PTP,                    0xc000, 1 }, /* 0xe200c000 */
        { TARGET_CHIP_TOP,              0x10000, 1 }, /* 0xe2010000 */
        { TARGET_REW,                   0x14000, 1 }, /* 0xe2014000 */
        { TARGET_SYS,                   0x28000, 1 }, /* 0xe2028000 */
@@ -201,7 +202,7 @@ static int lan966x_port_ifh_xmit(struct sk_buff *skb,
        val = lan_rd(lan966x, QS_INJ_STATUS);
        if (!(QS_INJ_STATUS_FIFO_RDY_GET(val) & BIT(grp)) ||
            (QS_INJ_STATUS_WMARK_REACHED_GET(val) & BIT(grp)))
-               return NETDEV_TX_BUSY;
+               goto err;
 
        /* Write start of frame */
        lan_wr(QS_INJ_CTRL_GAP_SIZE_SET(1) |
@@ -213,7 +214,7 @@ static int lan966x_port_ifh_xmit(struct sk_buff *skb,
                /* Wait until the fifo is ready */
                err = lan966x_port_inj_ready(lan966x, grp);
                if (err)
-                       return NETDEV_TX_BUSY;
+                       goto err;
 
                lan_wr((__force u32)ifh[i], lan966x, QS_INJ_WR(grp));
        }
@@ -225,7 +226,7 @@ static int lan966x_port_ifh_xmit(struct sk_buff *skb,
                /* Wait until the fifo is ready */
                err = lan966x_port_inj_ready(lan966x, grp);
                if (err)
-                       return NETDEV_TX_BUSY;
+                       goto err;
 
                lan_wr(((u32 *)skb->data)[i], lan966x, QS_INJ_WR(grp));
        }
@@ -235,7 +236,7 @@ static int lan966x_port_ifh_xmit(struct sk_buff *skb,
                /* Wait until the fifo is ready */
                err = lan966x_port_inj_ready(lan966x, grp);
                if (err)
-                       return NETDEV_TX_BUSY;
+                       goto err;
 
                lan_wr(0, lan966x, QS_INJ_WR(grp));
                ++i;
@@ -255,8 +256,19 @@ static int lan966x_port_ifh_xmit(struct sk_buff *skb,
        dev->stats.tx_packets++;
        dev->stats.tx_bytes += skb->len;
 
+       if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP &&
+           LAN966X_SKB_CB(skb)->rew_op == IFH_REW_OP_TWO_STEP_PTP)
+               return NETDEV_TX_OK;
+
        dev_consume_skb_any(skb);
        return NETDEV_TX_OK;
+
+err:
+       if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP &&
+           LAN966X_SKB_CB(skb)->rew_op == IFH_REW_OP_TWO_STEP_PTP)
+               lan966x_ptp_txtstamp_release(port, skb);
+
+       return NETDEV_TX_BUSY;
 }
 
 static void lan966x_ifh_set_bypass(void *ifh, u64 bypass)
@@ -289,10 +301,23 @@ static void lan966x_ifh_set_vid(void *ifh, u64 vid)
                IFH_POS_TCI, IFH_LEN * 4, PACK, 0);
 }
 
+static void lan966x_ifh_set_rew_op(void *ifh, u64 rew_op)
+{
+       packing(ifh, &rew_op, IFH_POS_REW_CMD + IFH_WID_REW_CMD - 1,
+               IFH_POS_REW_CMD, IFH_LEN * 4, PACK, 0);
+}
+
+static void lan966x_ifh_set_timestamp(void *ifh, u64 timestamp)
+{
+       packing(ifh, &timestamp, IFH_POS_TIMESTAMP + IFH_WID_TIMESTAMP - 1,
+               IFH_POS_TIMESTAMP, IFH_LEN * 4, PACK, 0);
+}
+
 static int lan966x_port_xmit(struct sk_buff *skb, struct net_device *dev)
 {
        struct lan966x_port *port = netdev_priv(dev);
        __be32 ifh[IFH_LEN];
+       int err;
 
        memset(ifh, 0x0, sizeof(__be32) * IFH_LEN);
 
@@ -302,6 +327,15 @@ static int lan966x_port_xmit(struct sk_buff *skb, struct net_device *dev)
        lan966x_ifh_set_ipv(ifh, skb->priority >= 7 ? 0x7 : skb->priority);
        lan966x_ifh_set_vid(ifh, skb_vlan_tag_get(skb));
 
+       if (port->lan966x->ptp && skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) {
+               err = lan966x_ptp_txtstamp_request(port, skb);
+               if (err)
+                       return err;
+
+               lan966x_ifh_set_rew_op(ifh, LAN966X_SKB_CB(skb)->rew_op);
+               lan966x_ifh_set_timestamp(ifh, LAN966X_SKB_CB(skb)->ts_id);
+       }
+
        return lan966x_port_ifh_xmit(skb, ifh, dev);
 }
 
@@ -350,6 +384,23 @@ static int lan966x_port_get_parent_id(struct net_device *dev,
        return 0;
 }
 
+static int lan966x_port_ioctl(struct net_device *dev, struct ifreq *ifr,
+                             int cmd)
+{
+       struct lan966x_port *port = netdev_priv(dev);
+
+       if (!phy_has_hwtstamp(dev->phydev) && port->lan966x->ptp) {
+               switch (cmd) {
+               case SIOCSHWTSTAMP:
+                       return lan966x_ptp_hwtstamp_set(port, ifr);
+               case SIOCGHWTSTAMP:
+                       return lan966x_ptp_hwtstamp_get(port, ifr);
+               }
+       }
+
+       return phy_mii_ioctl(dev->phydev, ifr, cmd);
+}
+
 static const struct net_device_ops lan966x_port_netdev_ops = {
        .ndo_open                       = lan966x_port_open,
        .ndo_stop                       = lan966x_port_stop,
@@ -360,6 +411,7 @@ static const struct net_device_ops lan966x_port_netdev_ops = {
        .ndo_get_stats64                = lan966x_stats_get,
        .ndo_set_mac_address            = lan966x_port_set_mac_address,
        .ndo_get_port_parent_id         = lan966x_port_get_parent_id,
+       .ndo_eth_ioctl                  = lan966x_port_ioctl,
 };
 
 bool lan966x_netdevice_check(const struct net_device *dev)
@@ -434,6 +486,12 @@ static void lan966x_ifh_get_len(void *ifh, u64 *len)
                IFH_POS_LEN, IFH_LEN * 4, UNPACK, 0);
 }
 
+static void lan966x_ifh_get_timestamp(void *ifh, u64 *timestamp)
+{
+       packing(ifh, timestamp, IFH_POS_TIMESTAMP + IFH_WID_TIMESTAMP - 1,
+               IFH_POS_TIMESTAMP, IFH_LEN * 4, UNPACK, 0);
+}
+
 static irqreturn_t lan966x_xtr_irq_handler(int irq, void *args)
 {
        struct lan966x *lan966x = args;
@@ -443,10 +501,10 @@ static irqreturn_t lan966x_xtr_irq_handler(int irq, void *args)
                return IRQ_NONE;
 
        do {
+               u64 src_port, len, timestamp;
                struct net_device *dev;
                struct sk_buff *skb;
                int sz = 0, buf_len;
-               u64 src_port, len;
                u32 ifh[IFH_LEN];
                u32 *buf;
                u32 val;
@@ -461,6 +519,7 @@ static irqreturn_t lan966x_xtr_irq_handler(int irq, void *args)
 
                lan966x_ifh_get_src_port(ifh, &src_port);
                lan966x_ifh_get_len(ifh, &len);
+               lan966x_ifh_get_timestamp(ifh, &timestamp);
 
                WARN_ON(src_port >= lan966x->num_phys_ports);
 
@@ -501,6 +560,7 @@ static irqreturn_t lan966x_xtr_irq_handler(int irq, void *args)
                        *buf = val;
                }
 
+               lan966x_ptp_rxtstamp(lan966x, skb, timestamp);
                skb->protocol = eth_type_trans(skb, dev);
 
                if (lan966x->bridge_mask & BIT(src_port))
@@ -897,6 +957,17 @@ static int lan966x_probe(struct platform_device *pdev)
                        return dev_err_probe(&pdev->dev, err, "Unable to use ana irq");
        }
 
+       lan966x->ptp_irq = platform_get_irq_byname(pdev, "ptp");
+       if (lan966x->ptp_irq > 0) {
+               err = devm_request_threaded_irq(&pdev->dev, lan966x->ptp_irq, NULL,
+                                               lan966x_ptp_irq_handler, IRQF_ONESHOT,
+                                               "ptp irq", lan966x);
+               if (err)
+                       return dev_err_probe(&pdev->dev, err, "Unable to use ptp irq");
+
+               lan966x->ptp = 1;
+       }
+
        /* init switch */
        lan966x_init(lan966x);
        lan966x_stats_init(lan966x);
@@ -931,8 +1002,15 @@ static int lan966x_probe(struct platform_device *pdev)
        if (err)
                goto cleanup_ports;
 
+       err = lan966x_ptp_init(lan966x);
+       if (err)
+               goto cleanup_fdb;
+
        return 0;
 
+cleanup_fdb:
+       lan966x_fdb_deinit(lan966x);
+
 cleanup_ports:
        fwnode_handle_put(portnp);
 
@@ -958,6 +1036,7 @@ static int lan966x_remove(struct platform_device *pdev)
        lan966x_mac_purge_entries(lan966x);
        lan966x_mdb_deinit(lan966x);
        lan966x_fdb_deinit(lan966x);
+       lan966x_ptp_deinit(lan966x);
 
        return 0;
 }
index 99c6d0a..026474c 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/jiffies.h>
 #include <linux/phy.h>
 #include <linux/phylink.h>
+#include <linux/ptp_clock_kernel.h>
 #include <net/switchdev.h>
 
 #include "lan966x_regs.h"
 #define LAN966X_SPEED_100              2
 #define LAN966X_SPEED_10               3
 
+#define LAN966X_PHC_COUNT              3
+#define LAN966X_PHC_PORT               0
+
+#define IFH_REW_OP_NOOP                        0x0
+#define IFH_REW_OP_ONE_STEP_PTP                0x3
+#define IFH_REW_OP_TWO_STEP_PTP                0x4
+
 /* MAC table entry types.
  * ENTRYTYPE_NORMAL is subject to aging.
  * ENTRYTYPE_LOCKED is not subject to aging.
@@ -70,6 +78,24 @@ struct lan966x_stat_layout {
        char name[ETH_GSTRING_LEN];
 };
 
+struct lan966x_phc {
+       struct ptp_clock *clock;
+       struct ptp_clock_info info;
+       struct hwtstamp_config hwtstamp_config;
+       struct lan966x *lan966x;
+       u8 index;
+};
+
+struct lan966x_skb_cb {
+       u8 rew_op;
+       u16 ts_id;
+       unsigned long jiffies;
+};
+
+#define LAN966X_PTP_TIMEOUT            msecs_to_jiffies(10)
+#define LAN966X_SKB_CB(skb) \
+       ((struct lan966x_skb_cb *)((skb)->cb))
+
 struct lan966x {
        struct device *dev;
 
@@ -105,6 +131,7 @@ struct lan966x {
        /* interrupts */
        int xtr_irq;
        int ana_irq;
+       int ptp_irq;
 
        /* worqueue for fdb */
        struct workqueue_struct *fdb_work;
@@ -113,6 +140,14 @@ struct lan966x {
        /* mdb */
        struct list_head mdb_entries;
        struct list_head pgid_entries;
+
+       /* ptp */
+       bool ptp;
+       struct lan966x_phc phc[LAN966X_PHC_COUNT];
+       spinlock_t ptp_clock_lock; /* lock for phc */
+       spinlock_t ptp_ts_id_lock; /* lock for ts_id */
+       struct mutex ptp_lock; /* lock for ptp interface state */
+       u16 ptp_skbs;
 };
 
 struct lan966x_port_config {
@@ -142,6 +177,10 @@ struct lan966x_port {
        struct phylink *phylink;
        struct phy *serdes;
        struct fwnode_handle *fwnode;
+
+       u8 ptp_cmd;
+       u16 ts_id;
+       struct sk_buff_head tx_skbs;
 };
 
 extern const struct phylink_mac_ops lan966x_phylink_mac_ops;
@@ -228,6 +267,18 @@ int lan966x_handle_port_mdb_del(struct lan966x_port *port,
 void lan966x_mdb_erase_entries(struct lan966x *lan966x, u16 vid);
 void lan966x_mdb_write_entries(struct lan966x *lan966x, u16 vid);
 
+int lan966x_ptp_init(struct lan966x *lan966x);
+void lan966x_ptp_deinit(struct lan966x *lan966x);
+int lan966x_ptp_hwtstamp_set(struct lan966x_port *port, struct ifreq *ifr);
+int lan966x_ptp_hwtstamp_get(struct lan966x_port *port, struct ifreq *ifr);
+void lan966x_ptp_rxtstamp(struct lan966x *lan966x, struct sk_buff *skb,
+                         u64 timestamp);
+int lan966x_ptp_txtstamp_request(struct lan966x_port *port,
+                                struct sk_buff *skb);
+void lan966x_ptp_txtstamp_release(struct lan966x_port *port,
+                                 struct sk_buff *skb);
+irqreturn_t lan966x_ptp_irq_handler(int irq, void *args);
+
 static inline void __iomem *lan_addr(void __iomem *base[],
                                     int id, int tinst, int tcnt,
                                     int gbase, int ginst,
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c b/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c
new file mode 100644 (file)
index 0000000..ae78277
--- /dev/null
@@ -0,0 +1,618 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <linux/ptp_classify.h>
+
+#include "lan966x_main.h"
+
+#define LAN966X_MAX_PTP_ID     512
+
+/* Represents 1ppm adjustment in 2^59 format with 6.037735849ns as reference
+ * The value is calculated as following: (1/1000000)/((2^-59)/6.037735849)
+ */
+#define LAN966X_1PPM_FORMAT            3480517749723LL
+
+/* Represents 1ppb adjustment in 2^29 format with 6.037735849ns as reference
+ * The value is calculated as following: (1/1000000000)/((2^59)/6.037735849)
+ */
+#define LAN966X_1PPB_FORMAT            3480517749LL
+
+#define TOD_ACC_PIN            0x5
+
+enum {
+       PTP_PIN_ACTION_IDLE = 0,
+       PTP_PIN_ACTION_LOAD,
+       PTP_PIN_ACTION_SAVE,
+       PTP_PIN_ACTION_CLOCK,
+       PTP_PIN_ACTION_DELTA,
+       PTP_PIN_ACTION_TOD
+};
+
+static u64 lan966x_ptp_get_nominal_value(void)
+{
+       u64 res = 0x304d2df1;
+
+       res <<= 32;
+       return res;
+}
+
+int lan966x_ptp_hwtstamp_set(struct lan966x_port *port, struct ifreq *ifr)
+{
+       struct lan966x *lan966x = port->lan966x;
+       struct hwtstamp_config cfg;
+       struct lan966x_phc *phc;
+
+       /* For now don't allow to run ptp on ports that are part of a bridge,
+        * because in case of transparent clock the HW will still forward the
+        * frames, so there would be duplicate frames
+        */
+       if (lan966x->bridge_mask & BIT(port->chip_port))
+               return -EINVAL;
+
+       if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
+               return -EFAULT;
+
+       switch (cfg.tx_type) {
+       case HWTSTAMP_TX_ON:
+               port->ptp_cmd = IFH_REW_OP_TWO_STEP_PTP;
+               break;
+       case HWTSTAMP_TX_ONESTEP_SYNC:
+               port->ptp_cmd = IFH_REW_OP_ONE_STEP_PTP;
+               break;
+       case HWTSTAMP_TX_OFF:
+               port->ptp_cmd = IFH_REW_OP_NOOP;
+               break;
+       default:
+               return -ERANGE;
+       }
+
+       switch (cfg.rx_filter) {
+       case HWTSTAMP_FILTER_NONE:
+               break;
+       case HWTSTAMP_FILTER_ALL:
+       case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+       case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+       case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+       case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+       case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+       case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+       case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+       case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+       case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+       case HWTSTAMP_FILTER_PTP_V2_EVENT:
+       case HWTSTAMP_FILTER_PTP_V2_SYNC:
+       case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+       case HWTSTAMP_FILTER_NTP_ALL:
+               cfg.rx_filter = HWTSTAMP_FILTER_ALL;
+               break;
+       default:
+               return -ERANGE;
+       }
+
+       /* Commit back the result & save it */
+       mutex_lock(&lan966x->ptp_lock);
+       phc = &lan966x->phc[LAN966X_PHC_PORT];
+       memcpy(&phc->hwtstamp_config, &cfg, sizeof(cfg));
+       mutex_unlock(&lan966x->ptp_lock);
+
+       return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0;
+}
+
+int lan966x_ptp_hwtstamp_get(struct lan966x_port *port, struct ifreq *ifr)
+{
+       struct lan966x *lan966x = port->lan966x;
+       struct lan966x_phc *phc;
+
+       phc = &lan966x->phc[LAN966X_PHC_PORT];
+       return copy_to_user(ifr->ifr_data, &phc->hwtstamp_config,
+                           sizeof(phc->hwtstamp_config)) ? -EFAULT : 0;
+}
+
+static int lan966x_ptp_classify(struct lan966x_port *port, struct sk_buff *skb)
+{
+       struct ptp_header *header;
+       u8 msgtype;
+       int type;
+
+       if (port->ptp_cmd == IFH_REW_OP_NOOP)
+               return IFH_REW_OP_NOOP;
+
+       type = ptp_classify_raw(skb);
+       if (type == PTP_CLASS_NONE)
+               return IFH_REW_OP_NOOP;
+
+       header = ptp_parse_header(skb, type);
+       if (!header)
+               return IFH_REW_OP_NOOP;
+
+       if (port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP)
+               return IFH_REW_OP_TWO_STEP_PTP;
+
+       /* If it is sync and run 1 step then set the correct operation,
+        * otherwise run as 2 step
+        */
+       msgtype = ptp_get_msgtype(header, type);
+       if ((msgtype & 0xf) == 0)
+               return IFH_REW_OP_ONE_STEP_PTP;
+
+       return IFH_REW_OP_TWO_STEP_PTP;
+}
+
+static void lan966x_ptp_txtstamp_old_release(struct lan966x_port *port)
+{
+       struct sk_buff *skb, *skb_tmp;
+       unsigned long flags;
+
+       spin_lock_irqsave(&port->tx_skbs.lock, flags);
+       skb_queue_walk_safe(&port->tx_skbs, skb, skb_tmp) {
+               if time_after(LAN966X_SKB_CB(skb)->jiffies + LAN966X_PTP_TIMEOUT,
+                             jiffies)
+                       break;
+
+               __skb_unlink(skb, &port->tx_skbs);
+               dev_kfree_skb_any(skb);
+       }
+       spin_unlock_irqrestore(&port->tx_skbs.lock, flags);
+}
+
+int lan966x_ptp_txtstamp_request(struct lan966x_port *port,
+                                struct sk_buff *skb)
+{
+       struct lan966x *lan966x = port->lan966x;
+       unsigned long flags;
+       u8 rew_op;
+
+       rew_op = lan966x_ptp_classify(port, skb);
+       LAN966X_SKB_CB(skb)->rew_op = rew_op;
+
+       if (rew_op != IFH_REW_OP_TWO_STEP_PTP)
+               return 0;
+
+       lan966x_ptp_txtstamp_old_release(port);
+
+       spin_lock_irqsave(&lan966x->ptp_ts_id_lock, flags);
+       if (lan966x->ptp_skbs == LAN966X_MAX_PTP_ID) {
+               spin_unlock_irqrestore(&lan966x->ptp_ts_id_lock, flags);
+               return -EBUSY;
+       }
+
+       skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+
+       skb_queue_tail(&port->tx_skbs, skb);
+       LAN966X_SKB_CB(skb)->ts_id = port->ts_id;
+       LAN966X_SKB_CB(skb)->jiffies = jiffies;
+
+       lan966x->ptp_skbs++;
+       port->ts_id++;
+       if (port->ts_id == LAN966X_MAX_PTP_ID)
+               port->ts_id = 0;
+
+       spin_unlock_irqrestore(&lan966x->ptp_ts_id_lock, flags);
+
+       return 0;
+}
+
+void lan966x_ptp_txtstamp_release(struct lan966x_port *port,
+                                 struct sk_buff *skb)
+{
+       struct lan966x *lan966x = port->lan966x;
+       unsigned long flags;
+
+       spin_lock_irqsave(&lan966x->ptp_ts_id_lock, flags);
+       port->ts_id--;
+       lan966x->ptp_skbs--;
+       skb_unlink(skb, &port->tx_skbs);
+       spin_unlock_irqrestore(&lan966x->ptp_ts_id_lock, flags);
+}
+
+static void lan966x_get_hwtimestamp(struct lan966x *lan966x,
+                                   struct timespec64 *ts,
+                                   u32 nsec)
+{
+       /* Read current PTP time to get seconds */
+       unsigned long flags;
+       u32 curr_nsec;
+
+       spin_lock_irqsave(&lan966x->ptp_clock_lock, flags);
+
+       lan_rmw(PTP_PIN_CFG_PIN_ACTION_SET(PTP_PIN_ACTION_SAVE) |
+               PTP_PIN_CFG_PIN_DOM_SET(LAN966X_PHC_PORT) |
+               PTP_PIN_CFG_PIN_SYNC_SET(0),
+               PTP_PIN_CFG_PIN_ACTION |
+               PTP_PIN_CFG_PIN_DOM |
+               PTP_PIN_CFG_PIN_SYNC,
+               lan966x, PTP_PIN_CFG(TOD_ACC_PIN));
+
+       ts->tv_sec = lan_rd(lan966x, PTP_TOD_SEC_LSB(TOD_ACC_PIN));
+       curr_nsec = lan_rd(lan966x, PTP_TOD_NSEC(TOD_ACC_PIN));
+
+       ts->tv_nsec = nsec;
+
+       /* Sec has incremented since the ts was registered */
+       if (curr_nsec < nsec)
+               ts->tv_sec--;
+
+       spin_unlock_irqrestore(&lan966x->ptp_clock_lock, flags);
+}
+
+irqreturn_t lan966x_ptp_irq_handler(int irq, void *args)
+{
+       int budget = LAN966X_MAX_PTP_ID;
+       struct lan966x *lan966x = args;
+
+       while (budget--) {
+               struct sk_buff *skb, *skb_tmp, *skb_match = NULL;
+               struct skb_shared_hwtstamps shhwtstamps;
+               struct lan966x_port *port;
+               struct timespec64 ts;
+               unsigned long flags;
+               u32 val, id, txport;
+               u32 delay;
+
+               val = lan_rd(lan966x, PTP_TWOSTEP_CTRL);
+
+               /* Check if a timestamp can be retrieved */
+               if (!(val & PTP_TWOSTEP_CTRL_VLD))
+                       break;
+
+               WARN_ON(val & PTP_TWOSTEP_CTRL_OVFL);
+
+               if (!(val & PTP_TWOSTEP_CTRL_STAMP_TX))
+                       continue;
+
+               /* Retrieve the ts Tx port */
+               txport = PTP_TWOSTEP_CTRL_STAMP_PORT_GET(val);
+
+               /* Retrieve its associated skb */
+               port = lan966x->ports[txport];
+
+               /* Retrieve the delay */
+               delay = lan_rd(lan966x, PTP_TWOSTEP_STAMP);
+               delay = PTP_TWOSTEP_STAMP_STAMP_NSEC_GET(delay);
+
+               /* Get next timestamp from fifo, which needs to be the
+                * rx timestamp which represents the id of the frame
+                */
+               lan_rmw(PTP_TWOSTEP_CTRL_NXT_SET(1),
+                       PTP_TWOSTEP_CTRL_NXT,
+                       lan966x, PTP_TWOSTEP_CTRL);
+
+               val = lan_rd(lan966x, PTP_TWOSTEP_CTRL);
+
+               /* Check if a timestamp can be retried */
+               if (!(val & PTP_TWOSTEP_CTRL_VLD))
+                       break;
+
+               /* Read RX timestamping to get the ID */
+               id = lan_rd(lan966x, PTP_TWOSTEP_STAMP);
+
+               spin_lock_irqsave(&port->tx_skbs.lock, flags);
+               skb_queue_walk_safe(&port->tx_skbs, skb, skb_tmp) {
+                       if (LAN966X_SKB_CB(skb)->ts_id != id)
+                               continue;
+
+                       __skb_unlink(skb, &port->tx_skbs);
+                       skb_match = skb;
+                       break;
+               }
+               spin_unlock_irqrestore(&port->tx_skbs.lock, flags);
+
+               /* Next ts */
+               lan_rmw(PTP_TWOSTEP_CTRL_NXT_SET(1),
+                       PTP_TWOSTEP_CTRL_NXT,
+                       lan966x, PTP_TWOSTEP_CTRL);
+
+               if (WARN_ON(!skb_match))
+                       continue;
+
+               spin_lock(&lan966x->ptp_ts_id_lock);
+               lan966x->ptp_skbs--;
+               spin_unlock(&lan966x->ptp_ts_id_lock);
+
+               /* Get the h/w timestamp */
+               lan966x_get_hwtimestamp(lan966x, &ts, delay);
+
+               /* Set the timestamp into the skb */
+               shhwtstamps.hwtstamp = ktime_set(ts.tv_sec, ts.tv_nsec);
+               skb_tstamp_tx(skb_match, &shhwtstamps);
+
+               dev_kfree_skb_any(skb_match);
+       }
+
+       return IRQ_HANDLED;
+}
+
+static int lan966x_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
+{
+       struct lan966x_phc *phc = container_of(ptp, struct lan966x_phc, info);
+       struct lan966x *lan966x = phc->lan966x;
+       unsigned long flags;
+       bool neg_adj = 0;
+       u64 tod_inc;
+       u64 ref;
+
+       if (!scaled_ppm)
+               return 0;
+
+       if (scaled_ppm < 0) {
+               neg_adj = 1;
+               scaled_ppm = -scaled_ppm;
+       }
+
+       tod_inc = lan966x_ptp_get_nominal_value();
+
+       /* The multiplication is split in 2 separate additions because of
+        * overflow issues. If scaled_ppm with 16bit fractional part was bigger
+        * than 20ppm then we got overflow.
+        */
+       ref = LAN966X_1PPM_FORMAT * (scaled_ppm >> 16);
+       ref += (LAN966X_1PPM_FORMAT * (0xffff & scaled_ppm)) >> 16;
+       tod_inc = neg_adj ? tod_inc - ref : tod_inc + ref;
+
+       spin_lock_irqsave(&lan966x->ptp_clock_lock, flags);
+
+       lan_rmw(PTP_DOM_CFG_CLKCFG_DIS_SET(1 << BIT(phc->index)),
+               PTP_DOM_CFG_CLKCFG_DIS,
+               lan966x, PTP_DOM_CFG);
+
+       lan_wr((u32)tod_inc & 0xFFFFFFFF, lan966x,
+              PTP_CLK_PER_CFG(phc->index, 0));
+       lan_wr((u32)(tod_inc >> 32), lan966x,
+              PTP_CLK_PER_CFG(phc->index, 1));
+
+       lan_rmw(PTP_DOM_CFG_CLKCFG_DIS_SET(0),
+               PTP_DOM_CFG_CLKCFG_DIS,
+               lan966x, PTP_DOM_CFG);
+
+       spin_unlock_irqrestore(&lan966x->ptp_clock_lock, flags);
+
+       return 0;
+}
+
+static int lan966x_ptp_settime64(struct ptp_clock_info *ptp,
+                                const struct timespec64 *ts)
+{
+       struct lan966x_phc *phc = container_of(ptp, struct lan966x_phc, info);
+       struct lan966x *lan966x = phc->lan966x;
+       unsigned long flags;
+
+       spin_lock_irqsave(&lan966x->ptp_clock_lock, flags);
+
+       /* Must be in IDLE mode before the time can be loaded */
+       lan_rmw(PTP_PIN_CFG_PIN_ACTION_SET(PTP_PIN_ACTION_IDLE) |
+               PTP_PIN_CFG_PIN_DOM_SET(phc->index) |
+               PTP_PIN_CFG_PIN_SYNC_SET(0),
+               PTP_PIN_CFG_PIN_ACTION |
+               PTP_PIN_CFG_PIN_DOM |
+               PTP_PIN_CFG_PIN_SYNC,
+               lan966x, PTP_PIN_CFG(TOD_ACC_PIN));
+
+       /* Set new value */
+       lan_wr(PTP_TOD_SEC_MSB_TOD_SEC_MSB_SET(upper_32_bits(ts->tv_sec)),
+              lan966x, PTP_TOD_SEC_MSB(TOD_ACC_PIN));
+       lan_wr(lower_32_bits(ts->tv_sec),
+              lan966x, PTP_TOD_SEC_LSB(TOD_ACC_PIN));
+       lan_wr(ts->tv_nsec, lan966x, PTP_TOD_NSEC(TOD_ACC_PIN));
+
+       /* Apply new values */
+       lan_rmw(PTP_PIN_CFG_PIN_ACTION_SET(PTP_PIN_ACTION_LOAD) |
+               PTP_PIN_CFG_PIN_DOM_SET(phc->index) |
+               PTP_PIN_CFG_PIN_SYNC_SET(0),
+               PTP_PIN_CFG_PIN_ACTION |
+               PTP_PIN_CFG_PIN_DOM |
+               PTP_PIN_CFG_PIN_SYNC,
+               lan966x, PTP_PIN_CFG(TOD_ACC_PIN));
+
+       spin_unlock_irqrestore(&lan966x->ptp_clock_lock, flags);
+
+       return 0;
+}
+
+static int lan966x_ptp_gettime64(struct ptp_clock_info *ptp,
+                                struct timespec64 *ts)
+{
+       struct lan966x_phc *phc = container_of(ptp, struct lan966x_phc, info);
+       struct lan966x *lan966x = phc->lan966x;
+       unsigned long flags;
+       time64_t s;
+       s64 ns;
+
+       spin_lock_irqsave(&lan966x->ptp_clock_lock, flags);
+
+       lan_rmw(PTP_PIN_CFG_PIN_ACTION_SET(PTP_PIN_ACTION_SAVE) |
+               PTP_PIN_CFG_PIN_DOM_SET(phc->index) |
+               PTP_PIN_CFG_PIN_SYNC_SET(0),
+               PTP_PIN_CFG_PIN_ACTION |
+               PTP_PIN_CFG_PIN_DOM |
+               PTP_PIN_CFG_PIN_SYNC,
+               lan966x, PTP_PIN_CFG(TOD_ACC_PIN));
+
+       s = lan_rd(lan966x, PTP_TOD_SEC_MSB(TOD_ACC_PIN));
+       s <<= 32;
+       s |= lan_rd(lan966x, PTP_TOD_SEC_LSB(TOD_ACC_PIN));
+       ns = lan_rd(lan966x, PTP_TOD_NSEC(TOD_ACC_PIN));
+       ns &= PTP_TOD_NSEC_TOD_NSEC;
+
+       spin_unlock_irqrestore(&lan966x->ptp_clock_lock, flags);
+
+       /* Deal with negative values */
+       if ((ns & 0xFFFFFFF0) == 0x3FFFFFF0) {
+               s--;
+               ns &= 0xf;
+               ns += 999999984;
+       }
+
+       set_normalized_timespec64(ts, s, ns);
+       return 0;
+}
+
+static int lan966x_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+       struct lan966x_phc *phc = container_of(ptp, struct lan966x_phc, info);
+       struct lan966x *lan966x = phc->lan966x;
+
+       if (delta > -(NSEC_PER_SEC / 2) && delta < (NSEC_PER_SEC / 2)) {
+               unsigned long flags;
+
+               spin_lock_irqsave(&lan966x->ptp_clock_lock, flags);
+
+               /* Must be in IDLE mode before the time can be loaded */
+               lan_rmw(PTP_PIN_CFG_PIN_ACTION_SET(PTP_PIN_ACTION_IDLE) |
+                       PTP_PIN_CFG_PIN_DOM_SET(phc->index) |
+                       PTP_PIN_CFG_PIN_SYNC_SET(0),
+                       PTP_PIN_CFG_PIN_ACTION |
+                       PTP_PIN_CFG_PIN_DOM |
+                       PTP_PIN_CFG_PIN_SYNC,
+                       lan966x, PTP_PIN_CFG(TOD_ACC_PIN));
+
+               lan_wr(PTP_TOD_NSEC_TOD_NSEC_SET(delta),
+                      lan966x, PTP_TOD_NSEC(TOD_ACC_PIN));
+
+               /* Adjust time with the value of PTP_TOD_NSEC */
+               lan_rmw(PTP_PIN_CFG_PIN_ACTION_SET(PTP_PIN_ACTION_DELTA) |
+                       PTP_PIN_CFG_PIN_DOM_SET(phc->index) |
+                       PTP_PIN_CFG_PIN_SYNC_SET(0),
+                       PTP_PIN_CFG_PIN_ACTION |
+                       PTP_PIN_CFG_PIN_DOM |
+                       PTP_PIN_CFG_PIN_SYNC,
+                       lan966x, PTP_PIN_CFG(TOD_ACC_PIN));
+
+               spin_unlock_irqrestore(&lan966x->ptp_clock_lock, flags);
+       } else {
+               /* Fall back using lan966x_ptp_settime64 which is not exact */
+               struct timespec64 ts;
+               u64 now;
+
+               lan966x_ptp_gettime64(ptp, &ts);
+
+               now = ktime_to_ns(timespec64_to_ktime(ts));
+               ts = ns_to_timespec64(now + delta);
+
+               lan966x_ptp_settime64(ptp, &ts);
+       }
+
+       return 0;
+}
+
+static struct ptp_clock_info lan966x_ptp_clock_info = {
+       .owner          = THIS_MODULE,
+       .name           = "lan966x ptp",
+       .max_adj        = 200000,
+       .gettime64      = lan966x_ptp_gettime64,
+       .settime64      = lan966x_ptp_settime64,
+       .adjtime        = lan966x_ptp_adjtime,
+       .adjfine        = lan966x_ptp_adjfine,
+};
+
+static int lan966x_ptp_phc_init(struct lan966x *lan966x,
+                               int index,
+                               struct ptp_clock_info *clock_info)
+{
+       struct lan966x_phc *phc = &lan966x->phc[index];
+
+       phc->info = *clock_info;
+       phc->clock = ptp_clock_register(&phc->info, lan966x->dev);
+       if (IS_ERR(phc->clock))
+               return PTR_ERR(phc->clock);
+
+       phc->index = index;
+       phc->lan966x = lan966x;
+
+       /* PTP Rx stamping is always enabled.  */
+       phc->hwtstamp_config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+
+       return 0;
+}
+
+int lan966x_ptp_init(struct lan966x *lan966x)
+{
+       u64 tod_adj = lan966x_ptp_get_nominal_value();
+       struct lan966x_port *port;
+       int err, i;
+
+       if (!lan966x->ptp)
+               return 0;
+
+       for (i = 0; i < LAN966X_PHC_COUNT; ++i) {
+               err = lan966x_ptp_phc_init(lan966x, i, &lan966x_ptp_clock_info);
+               if (err)
+                       return err;
+       }
+
+       spin_lock_init(&lan966x->ptp_clock_lock);
+       spin_lock_init(&lan966x->ptp_ts_id_lock);
+       mutex_init(&lan966x->ptp_lock);
+
+       /* Disable master counters */
+       lan_wr(PTP_DOM_CFG_ENA_SET(0), lan966x, PTP_DOM_CFG);
+
+       /* Configure the nominal TOD increment per clock cycle */
+       lan_rmw(PTP_DOM_CFG_CLKCFG_DIS_SET(0x7),
+               PTP_DOM_CFG_CLKCFG_DIS,
+               lan966x, PTP_DOM_CFG);
+
+       for (i = 0; i < LAN966X_PHC_COUNT; ++i) {
+               lan_wr((u32)tod_adj & 0xFFFFFFFF, lan966x,
+                      PTP_CLK_PER_CFG(i, 0));
+               lan_wr((u32)(tod_adj >> 32), lan966x,
+                      PTP_CLK_PER_CFG(i, 1));
+       }
+
+       lan_rmw(PTP_DOM_CFG_CLKCFG_DIS_SET(0),
+               PTP_DOM_CFG_CLKCFG_DIS,
+               lan966x, PTP_DOM_CFG);
+
+       /* Enable master counters */
+       lan_wr(PTP_DOM_CFG_ENA_SET(0x7), lan966x, PTP_DOM_CFG);
+
+       for (i = 0; i < lan966x->num_phys_ports; i++) {
+               port = lan966x->ports[i];
+               if (!port)
+                       continue;
+
+               skb_queue_head_init(&port->tx_skbs);
+       }
+
+       return 0;
+}
+
+void lan966x_ptp_deinit(struct lan966x *lan966x)
+{
+       struct lan966x_port *port;
+       int i;
+
+       for (i = 0; i < lan966x->num_phys_ports; i++) {
+               port = lan966x->ports[i];
+               if (!port)
+                       continue;
+
+               skb_queue_purge(&port->tx_skbs);
+       }
+
+       for (i = 0; i < LAN966X_PHC_COUNT; ++i)
+               ptp_clock_unregister(lan966x->phc[i].clock);
+}
+
+void lan966x_ptp_rxtstamp(struct lan966x *lan966x, struct sk_buff *skb,
+                         u64 timestamp)
+{
+       struct skb_shared_hwtstamps *shhwtstamps;
+       struct lan966x_phc *phc;
+       struct timespec64 ts;
+       u64 full_ts_in_ns;
+
+       if (!lan966x->ptp)
+               return;
+
+       phc = &lan966x->phc[LAN966X_PHC_PORT];
+       lan966x_ptp_gettime64(&phc->info, &ts);
+
+       /* Drop the sub-ns precision */
+       timestamp = timestamp >> 2;
+       if (ts.tv_nsec < timestamp)
+               ts.tv_sec--;
+       ts.tv_nsec = timestamp;
+       full_ts_in_ns = ktime_set(ts.tv_sec, ts.tv_nsec);
+
+       shhwtstamps = skb_hwtstamps(skb);
+       shhwtstamps->hwtstamp = full_ts_in_ns;
+}
index 7975601..37a5d7e 100644 (file)
@@ -19,6 +19,7 @@ enum lan966x_target {
        TARGET_DEV = 13,
        TARGET_GCB = 27,
        TARGET_ORG = 36,
+       TARGET_PTP = 41,
        TARGET_QS = 42,
        TARGET_QSYS = 46,
        TARGET_REW = 47,
@@ -559,6 +560,108 @@ enum lan966x_target {
 #define DEV_PCS1G_STICKY_LINK_DOWN_STICKY_GET(x)\
        FIELD_GET(DEV_PCS1G_STICKY_LINK_DOWN_STICKY, x)
 
+/*      PTP:PTP_CFG:PTP_DOM_CFG */
+#define PTP_DOM_CFG               __REG(TARGET_PTP, 0, 1, 512, 0, 1, 16, 12, 0, 1, 4)
+
+#define PTP_DOM_CFG_ENA                          GENMASK(11, 9)
+#define PTP_DOM_CFG_ENA_SET(x)\
+       FIELD_PREP(PTP_DOM_CFG_ENA, x)
+#define PTP_DOM_CFG_ENA_GET(x)\
+       FIELD_GET(PTP_DOM_CFG_ENA, x)
+
+#define PTP_DOM_CFG_CLKCFG_DIS                   GENMASK(2, 0)
+#define PTP_DOM_CFG_CLKCFG_DIS_SET(x)\
+       FIELD_PREP(PTP_DOM_CFG_CLKCFG_DIS, x)
+#define PTP_DOM_CFG_CLKCFG_DIS_GET(x)\
+       FIELD_GET(PTP_DOM_CFG_CLKCFG_DIS, x)
+
+/*      PTP:PTP_TOD_DOMAINS:CLK_PER_CFG */
+#define PTP_CLK_PER_CFG(g, r)     __REG(TARGET_PTP, 0, 1, 528, g, 3, 28, 0, r, 2, 4)
+
+/*      PTP:PTP_PINS:PTP_PIN_CFG */
+#define PTP_PIN_CFG(g)            __REG(TARGET_PTP, 0, 1, 0, g, 8, 64, 0, 0, 1, 4)
+
+#define PTP_PIN_CFG_PIN_ACTION                   GENMASK(29, 27)
+#define PTP_PIN_CFG_PIN_ACTION_SET(x)\
+       FIELD_PREP(PTP_PIN_CFG_PIN_ACTION, x)
+#define PTP_PIN_CFG_PIN_ACTION_GET(x)\
+       FIELD_GET(PTP_PIN_CFG_PIN_ACTION, x)
+
+#define PTP_PIN_CFG_PIN_SYNC                     GENMASK(26, 25)
+#define PTP_PIN_CFG_PIN_SYNC_SET(x)\
+       FIELD_PREP(PTP_PIN_CFG_PIN_SYNC, x)
+#define PTP_PIN_CFG_PIN_SYNC_GET(x)\
+       FIELD_GET(PTP_PIN_CFG_PIN_SYNC, x)
+
+#define PTP_PIN_CFG_PIN_DOM                      GENMASK(17, 16)
+#define PTP_PIN_CFG_PIN_DOM_SET(x)\
+       FIELD_PREP(PTP_PIN_CFG_PIN_DOM, x)
+#define PTP_PIN_CFG_PIN_DOM_GET(x)\
+       FIELD_GET(PTP_PIN_CFG_PIN_DOM, x)
+
+/*      PTP:PTP_PINS:PTP_TOD_SEC_MSB */
+#define PTP_TOD_SEC_MSB(g)        __REG(TARGET_PTP, 0, 1, 0, g, 8, 64, 4, 0, 1, 4)
+
+#define PTP_TOD_SEC_MSB_TOD_SEC_MSB              GENMASK(15, 0)
+#define PTP_TOD_SEC_MSB_TOD_SEC_MSB_SET(x)\
+       FIELD_PREP(PTP_TOD_SEC_MSB_TOD_SEC_MSB, x)
+#define PTP_TOD_SEC_MSB_TOD_SEC_MSB_GET(x)\
+       FIELD_GET(PTP_TOD_SEC_MSB_TOD_SEC_MSB, x)
+
+/*      PTP:PTP_PINS:PTP_TOD_SEC_LSB */
+#define PTP_TOD_SEC_LSB(g)        __REG(TARGET_PTP, 0, 1, 0, g, 8, 64, 8, 0, 1, 4)
+
+/*      PTP:PTP_PINS:PTP_TOD_NSEC */
+#define PTP_TOD_NSEC(g)           __REG(TARGET_PTP, 0, 1, 0, g, 8, 64, 12, 0, 1, 4)
+
+#define PTP_TOD_NSEC_TOD_NSEC                    GENMASK(29, 0)
+#define PTP_TOD_NSEC_TOD_NSEC_SET(x)\
+       FIELD_PREP(PTP_TOD_NSEC_TOD_NSEC, x)
+#define PTP_TOD_NSEC_TOD_NSEC_GET(x)\
+       FIELD_GET(PTP_TOD_NSEC_TOD_NSEC, x)
+
+/*      PTP:PTP_TS_FIFO:PTP_TWOSTEP_CTRL */
+#define PTP_TWOSTEP_CTRL          __REG(TARGET_PTP, 0, 1, 612, 0, 1, 12, 0, 0, 1, 4)
+
+#define PTP_TWOSTEP_CTRL_NXT                     BIT(11)
+#define PTP_TWOSTEP_CTRL_NXT_SET(x)\
+       FIELD_PREP(PTP_TWOSTEP_CTRL_NXT, x)
+#define PTP_TWOSTEP_CTRL_NXT_GET(x)\
+       FIELD_GET(PTP_TWOSTEP_CTRL_NXT, x)
+
+#define PTP_TWOSTEP_CTRL_VLD                     BIT(10)
+#define PTP_TWOSTEP_CTRL_VLD_SET(x)\
+       FIELD_PREP(PTP_TWOSTEP_CTRL_VLD, x)
+#define PTP_TWOSTEP_CTRL_VLD_GET(x)\
+       FIELD_GET(PTP_TWOSTEP_CTRL_VLD, x)
+
+#define PTP_TWOSTEP_CTRL_STAMP_TX                BIT(9)
+#define PTP_TWOSTEP_CTRL_STAMP_TX_SET(x)\
+       FIELD_PREP(PTP_TWOSTEP_CTRL_STAMP_TX, x)
+#define PTP_TWOSTEP_CTRL_STAMP_TX_GET(x)\
+       FIELD_GET(PTP_TWOSTEP_CTRL_STAMP_TX, x)
+
+#define PTP_TWOSTEP_CTRL_STAMP_PORT              GENMASK(8, 1)
+#define PTP_TWOSTEP_CTRL_STAMP_PORT_SET(x)\
+       FIELD_PREP(PTP_TWOSTEP_CTRL_STAMP_PORT, x)
+#define PTP_TWOSTEP_CTRL_STAMP_PORT_GET(x)\
+       FIELD_GET(PTP_TWOSTEP_CTRL_STAMP_PORT, x)
+
+#define PTP_TWOSTEP_CTRL_OVFL                    BIT(0)
+#define PTP_TWOSTEP_CTRL_OVFL_SET(x)\
+       FIELD_PREP(PTP_TWOSTEP_CTRL_OVFL, x)
+#define PTP_TWOSTEP_CTRL_OVFL_GET(x)\
+       FIELD_GET(PTP_TWOSTEP_CTRL_OVFL, x)
+
+/*      PTP:PTP_TS_FIFO:PTP_TWOSTEP_STAMP */
+#define PTP_TWOSTEP_STAMP         __REG(TARGET_PTP, 0, 1, 612, 0, 1, 12, 4, 0, 1, 4)
+
+#define PTP_TWOSTEP_STAMP_STAMP_NSEC             GENMASK(31, 2)
+#define PTP_TWOSTEP_STAMP_STAMP_NSEC_SET(x)\
+       FIELD_PREP(PTP_TWOSTEP_STAMP_STAMP_NSEC, x)
+#define PTP_TWOSTEP_STAMP_STAMP_NSEC_GET(x)\
+       FIELD_GET(PTP_TWOSTEP_STAMP_STAMP_NSEC, x)
+
 /*      DEVCPU_QS:XTR:XTR_GRP_CFG */
 #define QS_XTR_GRP_CFG(r)         __REG(TARGET_QS, 0, 1, 0, 0, 1, 36, 0, r, 2, 4)
 
index 1626627..35689b5 100644 (file)
@@ -328,7 +328,6 @@ static int sparx5_create_port(struct sparx5 *sparx5,
                return PTR_ERR(phylink);
 
        spx5_port->phylink = phylink;
-       phylink_set_pcs(phylink, &spx5_port->phylink_pcs);
 
        return 0;
 }
index 8ba33bc..830da0e 100644 (file)
@@ -26,6 +26,15 @@ static bool port_conf_has_changed(struct sparx5_port_config *a, struct sparx5_po
        return false;
 }
 
+static struct phylink_pcs *
+sparx5_phylink_mac_select_pcs(struct phylink_config *config,
+                             phy_interface_t interface)
+{
+       struct sparx5_port *port = netdev_priv(to_net_dev(config->dev));
+
+       return &port->phylink_pcs;
+}
+
 static void sparx5_phylink_mac_config(struct phylink_config *config,
                                      unsigned int mode,
                                      const struct phylink_link_state *state)
@@ -130,6 +139,7 @@ const struct phylink_pcs_ops sparx5_phylink_pcs_ops = {
 
 const struct phylink_mac_ops sparx5_phylink_mac_ops = {
        .validate = phylink_generic_validate,
+       .mac_select_pcs = sparx5_phylink_mac_select_pcs,
        .mac_config = sparx5_phylink_mac_config,
        .mac_link_down = sparx5_phylink_mac_link_down,
        .mac_link_up = sparx5_phylink_mac_link_up,
index 636dfef..49b85ca 100644 (file)
@@ -663,7 +663,7 @@ static int mana_gd_create_dma_region(struct gdma_dev *gd,
        struct gdma_context *gc = gd->gdma_context;
        struct hw_channel_context *hwc;
        u32 length = gmi->length;
-       u32 req_msg_size;
+       size_t req_msg_size;
        int err;
        int i;
 
@@ -674,7 +674,7 @@ static int mana_gd_create_dma_region(struct gdma_dev *gd,
                return -EINVAL;
 
        hwc = gc->hwc.driver_data;
-       req_msg_size = sizeof(*req) + num_page * sizeof(u64);
+       req_msg_size = struct_size(req, page_addr_list, num_page);
        if (req_msg_size > hwc->max_req_msg_size)
                return -EINVAL;
 
index 9a12607..d36405a 100644 (file)
@@ -48,7 +48,15 @@ enum TRI_STATE {
 
 #define MAX_PORTS_IN_MANA_DEV 256
 
-struct mana_stats {
+struct mana_stats_rx {
+       u64 packets;
+       u64 bytes;
+       u64 xdp_drop;
+       u64 xdp_tx;
+       struct u64_stats_sync syncp;
+};
+
+struct mana_stats_tx {
        u64 packets;
        u64 bytes;
        struct u64_stats_sync syncp;
@@ -76,7 +84,7 @@ struct mana_txq {
 
        atomic_t pending_sends;
 
-       struct mana_stats stats;
+       struct mana_stats_tx stats;
 };
 
 /* skb data and frags dma mappings */
@@ -298,10 +306,11 @@ struct mana_rxq {
 
        u32 buf_index;
 
-       struct mana_stats stats;
+       struct mana_stats_rx stats;
 
        struct bpf_prog __rcu *bpf_prog;
        struct xdp_rxq_info xdp_rxq;
+       struct page *xdp_save_page;
 
        /* MUST BE THE LAST MEMBER:
         * Each receive buffer has an associated mana_recv_buf_oob.
index 498d0f9..69e791e 100644 (file)
@@ -136,7 +136,7 @@ int mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
        bool ipv4 = false, ipv6 = false;
        struct mana_tx_package pkg = {};
        struct netdev_queue *net_txq;
-       struct mana_stats *tx_stats;
+       struct mana_stats_tx *tx_stats;
        struct gdma_queue *gdma_sq;
        unsigned int csum_type;
        struct mana_txq *txq;
@@ -299,7 +299,8 @@ static void mana_get_stats64(struct net_device *ndev,
 {
        struct mana_port_context *apc = netdev_priv(ndev);
        unsigned int num_queues = apc->num_queues;
-       struct mana_stats *stats;
+       struct mana_stats_rx *rx_stats;
+       struct mana_stats_tx *tx_stats;
        unsigned int start;
        u64 packets, bytes;
        int q;
@@ -310,26 +311,26 @@ static void mana_get_stats64(struct net_device *ndev,
        netdev_stats_to_stats64(st, &ndev->stats);
 
        for (q = 0; q < num_queues; q++) {
-               stats = &apc->rxqs[q]->stats;
+               rx_stats = &apc->rxqs[q]->stats;
 
                do {
-                       start = u64_stats_fetch_begin_irq(&stats->syncp);
-                       packets = stats->packets;
-                       bytes = stats->bytes;
-               } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+                       start = u64_stats_fetch_begin_irq(&rx_stats->syncp);
+                       packets = rx_stats->packets;
+                       bytes = rx_stats->bytes;
+               } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start));
 
                st->rx_packets += packets;
                st->rx_bytes += bytes;
        }
 
        for (q = 0; q < num_queues; q++) {
-               stats = &apc->tx_qp[q].txq.stats;
+               tx_stats = &apc->tx_qp[q].txq.stats;
 
                do {
-                       start = u64_stats_fetch_begin_irq(&stats->syncp);
-                       packets = stats->packets;
-                       bytes = stats->bytes;
-               } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+                       start = u64_stats_fetch_begin_irq(&tx_stats->syncp);
+                       packets = tx_stats->packets;
+                       bytes = tx_stats->bytes;
+               } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start));
 
                st->tx_packets += packets;
                st->tx_bytes += bytes;
@@ -986,7 +987,7 @@ static struct sk_buff *mana_build_skb(void *buf_va, uint pkt_len,
 static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe,
                        struct mana_rxq *rxq)
 {
-       struct mana_stats *rx_stats = &rxq->stats;
+       struct mana_stats_rx *rx_stats = &rxq->stats;
        struct net_device *ndev = rxq->ndev;
        uint pkt_len = cqe->ppi[0].pkt_len;
        u16 rxq_idx = rxq->rxq_idx;
@@ -1007,7 +1008,7 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe,
        act = mana_run_xdp(ndev, rxq, &xdp, buf_va, pkt_len);
 
        if (act != XDP_PASS && act != XDP_TX)
-               goto drop;
+               goto drop_xdp;
 
        skb = mana_build_skb(buf_va, pkt_len, &xdp);
 
@@ -1034,6 +1035,14 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe,
                        skb_set_hash(skb, hash_value, PKT_HASH_TYPE_L3);
        }
 
+       u64_stats_update_begin(&rx_stats->syncp);
+       rx_stats->packets++;
+       rx_stats->bytes += pkt_len;
+
+       if (act == XDP_TX)
+               rx_stats->xdp_tx++;
+       u64_stats_update_end(&rx_stats->syncp);
+
        if (act == XDP_TX) {
                skb_set_queue_mapping(skb, rxq_idx);
                mana_xdp_tx(skb, ndev);
@@ -1042,15 +1051,19 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe,
 
        napi_gro_receive(napi, skb);
 
+       return;
+
+drop_xdp:
        u64_stats_update_begin(&rx_stats->syncp);
-       rx_stats->packets++;
-       rx_stats->bytes += pkt_len;
+       rx_stats->xdp_drop++;
        u64_stats_update_end(&rx_stats->syncp);
-       return;
 
 drop:
-       free_page((unsigned long)buf_va);
+       WARN_ON_ONCE(rxq->xdp_save_page);
+       rxq->xdp_save_page = virt_to_page(buf_va);
+
        ++ndev->stats.rx_dropped;
+
        return;
 }
 
@@ -1105,7 +1118,13 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
        rxbuf_oob = &rxq->rx_oobs[curr];
        WARN_ON_ONCE(rxbuf_oob->wqe_inf.wqe_size_in_bu != 1);
 
-       new_page = alloc_page(GFP_ATOMIC);
+       /* Reuse XDP dropped page if available */
+       if (rxq->xdp_save_page) {
+               new_page = rxq->xdp_save_page;
+               rxq->xdp_save_page = NULL;
+       } else {
+               new_page = alloc_page(GFP_ATOMIC);
+       }
 
        if (new_page) {
                da = dma_map_page(dev, new_page, XDP_PACKET_HEADROOM, rxq->datasize,
@@ -1392,6 +1411,9 @@ static void mana_destroy_rxq(struct mana_port_context *apc,
 
        mana_deinit_cq(apc, &rxq->rx_cq);
 
+       if (rxq->xdp_save_page)
+               __free_page(rxq->xdp_save_page);
+
        for (i = 0; i < rxq->num_rx_buf; i++) {
                rx_oob = &rxq->rx_oobs[i];
 
index c3c81ae..e13f245 100644 (file)
@@ -23,7 +23,7 @@ static int mana_get_sset_count(struct net_device *ndev, int stringset)
        if (stringset != ETH_SS_STATS)
                return -EINVAL;
 
-       return ARRAY_SIZE(mana_eth_stats) + num_queues * 4;
+       return ARRAY_SIZE(mana_eth_stats) + num_queues * 6;
 }
 
 static void mana_get_strings(struct net_device *ndev, u32 stringset, u8 *data)
@@ -46,6 +46,10 @@ static void mana_get_strings(struct net_device *ndev, u32 stringset, u8 *data)
                p += ETH_GSTRING_LEN;
                sprintf(p, "rx_%d_bytes", i);
                p += ETH_GSTRING_LEN;
+               sprintf(p, "rx_%d_xdp_drop", i);
+               p += ETH_GSTRING_LEN;
+               sprintf(p, "rx_%d_xdp_tx", i);
+               p += ETH_GSTRING_LEN;
        }
 
        for (i = 0; i < num_queues; i++) {
@@ -62,9 +66,12 @@ static void mana_get_ethtool_stats(struct net_device *ndev,
        struct mana_port_context *apc = netdev_priv(ndev);
        unsigned int num_queues = apc->num_queues;
        void *eth_stats = &apc->eth_stats;
-       struct mana_stats *stats;
+       struct mana_stats_rx *rx_stats;
+       struct mana_stats_tx *tx_stats;
        unsigned int start;
        u64 packets, bytes;
+       u64 xdp_drop;
+       u64 xdp_tx;
        int q, i = 0;
 
        if (!apc->port_is_up)
@@ -74,26 +81,30 @@ static void mana_get_ethtool_stats(struct net_device *ndev,
                data[i++] = *(u64 *)(eth_stats + mana_eth_stats[q].offset);
 
        for (q = 0; q < num_queues; q++) {
-               stats = &apc->rxqs[q]->stats;
+               rx_stats = &apc->rxqs[q]->stats;
 
                do {
-                       start = u64_stats_fetch_begin_irq(&stats->syncp);
-                       packets = stats->packets;
-                       bytes = stats->bytes;
-               } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+                       start = u64_stats_fetch_begin_irq(&rx_stats->syncp);
+                       packets = rx_stats->packets;
+                       bytes = rx_stats->bytes;
+                       xdp_drop = rx_stats->xdp_drop;
+                       xdp_tx = rx_stats->xdp_tx;
+               } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start));
 
                data[i++] = packets;
                data[i++] = bytes;
+               data[i++] = xdp_drop;
+               data[i++] = xdp_tx;
        }
 
        for (q = 0; q < num_queues; q++) {
-               stats = &apc->tx_qp[q].txq.stats;
+               tx_stats = &apc->tx_qp[q].txq.stats;
 
                do {
-                       start = u64_stats_fetch_begin_irq(&stats->syncp);
-                       packets = stats->packets;
-                       bytes = stats->bytes;
-               } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+                       start = u64_stats_fetch_begin_irq(&tx_stats->syncp);
+                       packets = tx_stats->packets;
+                       bytes = tx_stats->bytes;
+               } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start));
 
                data[i++] = packets;
                data[i++] = bytes;
index dfb4468..ce865e6 100644 (file)
@@ -356,7 +356,7 @@ __nfp_tun_add_route_to_cache(struct list_head *route_list,
                        return 0;
                }
 
-       entry = kmalloc(sizeof(*entry) + add_len, GFP_ATOMIC);
+       entry = kmalloc(struct_size(entry, ip_add, add_len), GFP_ATOMIC);
        if (!entry) {
                spin_unlock_bh(list_lock);
                return -ENOMEM;
index 3d61a8c..50007cc 100644 (file)
@@ -1,8 +1,7 @@
 /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
 /* Copyright (C) 2015-2018 Netronome Systems, Inc. */
 
-/*
- * nfp_net_ctrl.h
+/* nfp_net_ctrl.h
  * Netronome network device driver: Control BAR layout
  * Authors: Jakub Kicinski <jakub.kicinski@netronome.com>
  *          Jason McMullan <jason.mcmullan@netronome.com>
 
 #include <linux/types.h>
 
-/**
- * Configuration BAR size.
+/* Configuration BAR size.
  *
  * The configuration BAR is 8K in size, but due to
  * THB-350, 32k needs to be reserved.
  */
 #define NFP_NET_CFG_BAR_SZ             (32 * 1024)
 
-/**
- * Offset in Freelist buffer where packet starts on RX
- */
+/* Offset in Freelist buffer where packet starts on RX */
 #define NFP_NET_RX_OFFSET              32
 
-/**
- * LSO parameters
+/* LSO parameters
  * %NFP_NET_LSO_MAX_HDR_SZ:    Maximum header size supported for LSO frames
  * %NFP_NET_LSO_MAX_SEGS:      Maximum number of segments LSO frame can produce
  */
 #define NFP_NET_LSO_MAX_HDR_SZ         255
 #define NFP_NET_LSO_MAX_SEGS           64
 
-/**
- * Prepend field types
- */
+/* Prepend field types */
 #define NFP_NET_META_FIELD_SIZE                4
 #define NFP_NET_META_HASH              1 /* next field carries hash type */
 #define NFP_NET_META_MARK              2
@@ -49,9 +42,7 @@
 
 #define NFP_META_PORT_ID_CTRL          ~0U
 
-/**
- * Hash type pre-pended when a RSS hash was computed
- */
+/* Hash type pre-pended when a RSS hash was computed */
 #define NFP_NET_RSS_NONE               0
 #define NFP_NET_RSS_IPV4               1
 #define NFP_NET_RSS_IPV6               2
 #define NFP_NET_RSS_IPV6_UDP           8
 #define NFP_NET_RSS_IPV6_EX_UDP                9
 
-/**
- * Ring counts
+/* Ring counts
  * %NFP_NET_TXR_MAX:        Maximum number of TX rings
  * %NFP_NET_RXR_MAX:        Maximum number of RX rings
  */
 #define NFP_NET_TXR_MAX                        64
 #define NFP_NET_RXR_MAX                        64
 
-/**
- * Read/Write config words (0x0000 - 0x002c)
+/* Read/Write config words (0x0000 - 0x002c)
  * %NFP_NET_CFG_CTRL:       Global control
  * %NFP_NET_CFG_UPDATE:      Indicate which fields are updated
  * %NFP_NET_CFG_TXRS_ENABLE: Bitmask of enabled TX rings
 #define NFP_NET_CFG_LSC                        0x0020
 #define NFP_NET_CFG_MACADDR            0x0024
 
-/**
- * Read-only words (0x0030 - 0x0050):
+/* Read-only words (0x0030 - 0x0050):
  * %NFP_NET_CFG_VERSION:     Firmware version number
  * %NFP_NET_CFG_STS:        Status
  * %NFP_NET_CFG_CAP:        Capabilities (same bits as %NFP_NET_CFG_CTRL)
 #define NFP_NET_CFG_START_TXQ          0x0048
 #define NFP_NET_CFG_START_RXQ          0x004c
 
-/**
- * Prepend configuration
+/* Prepend configuration
  */
 #define NFP_NET_CFG_RX_OFFSET          0x0050
 #define NFP_NET_CFG_RX_OFFSET_DYNAMIC          0       /* Prepend mode */
 
-/**
- * RSS capabilities
+/* RSS capabilities
  * %NFP_NET_CFG_RSS_CAP_HFUNC: supported hash functions (same bits as
  *                             %NFP_NET_CFG_RSS_HFUNC)
  */
 #define NFP_NET_CFG_RSS_CAP            0x0054
 #define   NFP_NET_CFG_RSS_CAP_HFUNC      0xff000000
 
-/**
- * TLV area start
+/* TLV area start
  * %NFP_NET_CFG_TLV_BASE:      start anchor of the TLV area
  */
 #define NFP_NET_CFG_TLV_BASE           0x0058
 
-/**
- * VXLAN/UDP encap configuration
+/* VXLAN/UDP encap configuration
  * %NFP_NET_CFG_VXLAN_PORT:    Base address of table of tunnels' UDP dst ports
  * %NFP_NET_CFG_VXLAN_SZ:      Size of the UDP port table in bytes
  */
 #define NFP_NET_CFG_VXLAN_PORT         0x0060
 #define NFP_NET_CFG_VXLAN_SZ             0x0008
 
-/**
- * BPF section
+/* BPF section
  * %NFP_NET_CFG_BPF_ABI:       BPF ABI version
  * %NFP_NET_CFG_BPF_CAP:       BPF capabilities
  * %NFP_NET_CFG_BPF_MAX_LEN:   Maximum size of JITed BPF code in bytes
 #define   NFP_NET_CFG_BPF_CFG_MASK     7ULL
 #define   NFP_NET_CFG_BPF_ADDR_MASK    (~NFP_NET_CFG_BPF_CFG_MASK)
 
-/**
- * 40B reserved for future use (0x0098 - 0x00c0)
+/* 40B reserved for future use (0x0098 - 0x00c0)
  */
 #define NFP_NET_CFG_RESERVED           0x0098
 #define NFP_NET_CFG_RESERVED_SZ                0x0028
 
-/**
- * RSS configuration (0x0100 - 0x01ac):
+/* RSS configuration (0x0100 - 0x01ac):
  * Used only when NFP_NET_CFG_CTRL_RSS is enabled
  * %NFP_NET_CFG_RSS_CFG:     RSS configuration word
  * %NFP_NET_CFG_RSS_KEY:     RSS "secret" key
                                         NFP_NET_CFG_RSS_KEY_SZ)
 #define NFP_NET_CFG_RSS_ITBL_SZ                0x80
 
-/**
- * TX ring configuration (0x200 - 0x800)
+/* TX ring configuration (0x200 - 0x800)
  * %NFP_NET_CFG_TXR_BASE:    Base offset for TX ring configuration
  * %NFP_NET_CFG_TXR_ADDR:    Per TX ring DMA address (8B entries)
  * %NFP_NET_CFG_TXR_WB_ADDR: Per TX ring write back DMA address (8B entries)
 #define NFP_NET_CFG_TXR_IRQ_MOD(_x)    (NFP_NET_CFG_TXR_BASE + 0x500 + \
                                         ((_x) * 0x4))
 
-/**
- * RX ring configuration (0x0800 - 0x0c00)
+/* RX ring configuration (0x0800 - 0x0c00)
  * %NFP_NET_CFG_RXR_BASE:    Base offset for RX ring configuration
  * %NFP_NET_CFG_RXR_ADDR:    Per RX ring DMA address (8B entries)
  * %NFP_NET_CFG_RXR_SZ:      Per RX ring ring size (1B entries)
 #define NFP_NET_CFG_RXR_IRQ_MOD(_x)    (NFP_NET_CFG_RXR_BASE + 0x300 + \
                                         ((_x) * 0x4))
 
-/**
- * Interrupt Control/Cause registers (0x0c00 - 0x0d00)
+/* Interrupt Control/Cause registers (0x0c00 - 0x0d00)
  * These registers are only used when MSI-X auto-masking is not
  * enabled (%NFP_NET_CFG_CTRL_MSIXAUTO not set).  The array is index
  * by MSI-X entry and are 1B in size.  If an entry is zero, the
 #define   NFP_NET_CFG_ICR_RXTX         0x1
 #define   NFP_NET_CFG_ICR_LSC          0x2
 
-/**
- * General device stats (0x0d00 - 0x0d90)
+/* General device stats (0x0d00 - 0x0d90)
  * all counters are 64bit.
  */
 #define NFP_NET_CFG_STATS_BASE         0x0d00
 #define NFP_NET_CFG_STATS_APP3_FRAMES  (NFP_NET_CFG_STATS_BASE + 0xc0)
 #define NFP_NET_CFG_STATS_APP3_BYTES   (NFP_NET_CFG_STATS_BASE + 0xc8)
 
-/**
- * Per ring stats (0x1000 - 0x1800)
+/* Per ring stats (0x1000 - 0x1800)
  * options, 64bit per entry
  * %NFP_NET_CFG_TXR_STATS:   TX ring statistics (Packet and Byte count)
  * %NFP_NET_CFG_RXR_STATS:   RX ring statistics (Packet and Byte count)
 #define NFP_NET_CFG_RXR_STATS(_x)      (NFP_NET_CFG_RXR_STATS_BASE + \
                                         ((_x) * 0x10))
 
-/**
- * General use mailbox area (0x1800 - 0x19ff)
+/* General use mailbox area (0x1800 - 0x19ff)
  * 4B used for update command and 4B return code
  * followed by a max of 504B of variable length value
  */
 #define NFP_NET_CFG_MBOX_CMD_PCI_DSCP_PRIOMAP_SET      5
 #define NFP_NET_CFG_MBOX_CMD_TLV_CMSG                  6
 
-/**
- * VLAN filtering using general use mailbox
+/* VLAN filtering using general use mailbox
  * %NFP_NET_CFG_VLAN_FILTER:           Base address of VLAN filter mailbox
  * %NFP_NET_CFG_VLAN_FILTER_VID:       VLAN ID to filter
  * %NFP_NET_CFG_VLAN_FILTER_PROTO:     VLAN proto to filter
 #define  NFP_NET_CFG_VLAN_FILTER_PROTO  (NFP_NET_CFG_VLAN_FILTER + 2)
 #define NFP_NET_CFG_VLAN_FILTER_SZ      0x0004
 
-/**
- * TLV capabilities
+/* TLV capabilities
  * %NFP_NET_CFG_TLV_TYPE:      Offset of type within the TLV
  * %NFP_NET_CFG_TLV_TYPE_REQUIRED: Driver must be able to parse the TLV
  * %NFP_NET_CFG_TLV_LENGTH:    Offset of length within the TLV
 #define NFP_NET_CFG_TLV_HEADER_TYPE    0x7fff0000
 #define NFP_NET_CFG_TLV_HEADER_LENGTH  0x0000ffff
 
-/**
- * Capability TLV types
+/* Capability TLV types
  *
  * %NFP_NET_CFG_TLV_TYPE_UNKNOWN:
  * Special TLV type to catch bugs, should never be encountered.  Drivers should
 
 struct device;
 
-/**
- * struct nfp_net_tlv_caps - parsed control BAR TLV capabilities
+/* struct nfp_net_tlv_caps - parsed control BAR TLV capabilities
  * @me_freq_mhz:       ME clock_freq (MHz)
  * @mbox_off:          vNIC mailbox area offset
  * @mbox_len:          vNIC mailbox area length
index a3db0cb..786be58 100644 (file)
@@ -4,8 +4,7 @@
 #ifndef _NFP_NET_SRIOV_H_
 #define _NFP_NET_SRIOV_H_
 
-/**
- * SRIOV VF configuration.
+/* SRIOV VF configuration.
  * The configuration memory begins with a mailbox region for communication with
  * the firmware followed by individual VF entries.
  */
index ae4da18..df316b9 100644 (file)
@@ -132,8 +132,7 @@ void nfp_devlink_port_unregister(struct nfp_port *port);
 void nfp_devlink_port_type_eth_set(struct nfp_port *port);
 void nfp_devlink_port_type_clear(struct nfp_port *port);
 
-/**
- * Mac stats (0x0000 - 0x0200)
+/* Mac stats (0x0000 - 0x0200)
  * all counters are 64bit.
  */
 #define NFP_MAC_STATS_BASE                0x0000
index 10e7d8b..730fea2 100644 (file)
@@ -513,7 +513,7 @@ nfp_nsp_command_buf_dma_sg(struct nfp_nsp *nsp,
        dma_size = BIT_ULL(dma_order);
        nseg = DIV_ROUND_UP(max_size, chunk_size);
 
-       chunks = kzalloc(array_size(sizeof(*chunks), nseg), GFP_KERNEL);
+       chunks = kcalloc(nseg, sizeof(*chunks), GFP_KERNEL);
        if (!chunks)
                return -ENOMEM;
 
index 5e25411..602f4d4 100644 (file)
@@ -18,7 +18,7 @@ struct ionic_lif;
 #define PCI_DEVICE_ID_PENSANDO_IONIC_ETH_PF    0x1002
 #define PCI_DEVICE_ID_PENSANDO_IONIC_ETH_VF    0x1003
 
-#define DEVCMD_TIMEOUT  10
+#define DEVCMD_TIMEOUT                 5
 #define IONIC_ADMINQ_TIME_SLICE                msecs_to_jiffies(100)
 
 #define IONIC_PHC_UPDATE_NS    10000000000         /* 10s in nanoseconds */
@@ -78,6 +78,9 @@ void ionic_adminq_netdev_err_print(struct ionic_lif *lif, u8 opcode,
                                   u8 status, int err);
 
 int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_wait);
+int ionic_dev_cmd_wait_nomsg(struct ionic *ionic, unsigned long max_wait);
+void ionic_dev_cmd_dev_err_print(struct ionic *ionic, u8 opcode, u8 status,
+                                int err);
 int ionic_set_dma_mask(struct ionic *ionic);
 int ionic_setup(struct ionic *ionic);
 
@@ -89,4 +92,6 @@ int ionic_port_identify(struct ionic *ionic);
 int ionic_port_init(struct ionic *ionic);
 int ionic_port_reset(struct ionic *ionic);
 
+const char *ionic_vf_attr_to_str(enum ionic_vf_attr attr);
+
 #endif /* _IONIC_H_ */
index 7e296fa..6ffc62c 100644 (file)
@@ -109,8 +109,8 @@ void ionic_bus_unmap_dbpage(struct ionic *ionic, void __iomem *page)
 
 static void ionic_vf_dealloc_locked(struct ionic *ionic)
 {
+       struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_STATSADDR };
        struct ionic_vf *v;
-       dma_addr_t dma = 0;
        int i;
 
        if (!ionic->vfs)
@@ -120,9 +120,8 @@ static void ionic_vf_dealloc_locked(struct ionic *ionic)
                v = &ionic->vfs[i];
 
                if (v->stats_pa) {
-                       (void)ionic_set_vf_config(ionic, i,
-                                                 IONIC_VF_ATTR_STATSADDR,
-                                                 (u8 *)&dma);
+                       vfc.stats_pa = 0;
+                       (void)ionic_set_vf_config(ionic, i, &vfc);
                        dma_unmap_single(ionic->dev, v->stats_pa,
                                         sizeof(v->stats), DMA_FROM_DEVICE);
                        v->stats_pa = 0;
@@ -143,6 +142,7 @@ static void ionic_vf_dealloc(struct ionic *ionic)
 
 static int ionic_vf_alloc(struct ionic *ionic, int num_vfs)
 {
+       struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_STATSADDR };
        struct ionic_vf *v;
        int err = 0;
        int i;
@@ -166,9 +166,10 @@ static int ionic_vf_alloc(struct ionic *ionic, int num_vfs)
                }
 
                ionic->num_vfs++;
+
                /* ignore failures from older FW, we just won't get stats */
-               (void)ionic_set_vf_config(ionic, i, IONIC_VF_ATTR_STATSADDR,
-                                         (u8 *)&v->stats_pa);
+               vfc.stats_pa = cpu_to_le64(v->stats_pa);
+               (void)ionic_set_vf_config(ionic, i, &vfc);
        }
 
 out:
@@ -331,6 +332,9 @@ static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                goto err_out_deregister_lifs;
        }
 
+       mod_timer(&ionic->watchdog_timer,
+                 round_jiffies(jiffies + ionic->watchdog_period));
+
        return 0;
 
 err_out_deregister_lifs:
@@ -348,7 +352,6 @@ err_out_port_reset:
 err_out_reset:
        ionic_reset(ionic);
 err_out_teardown:
-       del_timer_sync(&ionic->watchdog_timer);
        pci_clear_master(pdev);
        /* Don't fail the probe for these errors, keep
         * the hw interface around for inspection
index d57e80d..52a1b5c 100644 (file)
@@ -33,7 +33,8 @@ static void ionic_watchdog_cb(struct timer_list *t)
            !test_bit(IONIC_LIF_F_FW_RESET, lif->state))
                ionic_link_status_check_request(lif, CAN_NOT_SLEEP);
 
-       if (test_bit(IONIC_LIF_F_FILTER_SYNC_NEEDED, lif->state)) {
+       if (test_bit(IONIC_LIF_F_FILTER_SYNC_NEEDED, lif->state) &&
+           !test_bit(IONIC_LIF_F_FW_RESET, lif->state)) {
                work = kzalloc(sizeof(*work), GFP_ATOMIC);
                if (!work) {
                        netdev_err(lif->netdev, "rxmode change dropped\n");
@@ -46,6 +47,24 @@ static void ionic_watchdog_cb(struct timer_list *t)
        }
 }
 
+static void ionic_watchdog_init(struct ionic *ionic)
+{
+       struct ionic_dev *idev = &ionic->idev;
+
+       timer_setup(&ionic->watchdog_timer, ionic_watchdog_cb, 0);
+       ionic->watchdog_period = IONIC_WATCHDOG_SECS * HZ;
+
+       /* set times to ensure the first check will proceed */
+       atomic_long_set(&idev->last_check_time, jiffies - 2 * HZ);
+       idev->last_hb_time = jiffies - 2 * ionic->watchdog_period;
+       /* init as ready, so no transition if the first check succeeds */
+       idev->last_fw_hb = 0;
+       idev->fw_hb_ready = true;
+       idev->fw_status_ready = true;
+       idev->fw_generation = IONIC_FW_STS_F_GENERATION &
+                             ioread8(&idev->dev_info_regs->fw_status);
+}
+
 void ionic_init_devinfo(struct ionic *ionic)
 {
        struct ionic_dev *idev = &ionic->idev;
@@ -109,21 +128,7 @@ int ionic_dev_setup(struct ionic *ionic)
                return -EFAULT;
        }
 
-       timer_setup(&ionic->watchdog_timer, ionic_watchdog_cb, 0);
-       ionic->watchdog_period = IONIC_WATCHDOG_SECS * HZ;
-
-       /* set times to ensure the first check will proceed */
-       atomic_long_set(&idev->last_check_time, jiffies - 2 * HZ);
-       idev->last_hb_time = jiffies - 2 * ionic->watchdog_period;
-       /* init as ready, so no transition if the first check succeeds */
-       idev->last_fw_hb = 0;
-       idev->fw_hb_ready = true;
-       idev->fw_status_ready = true;
-       idev->fw_generation = IONIC_FW_STS_F_GENERATION &
-                             ioread8(&idev->dev_info_regs->fw_status);
-
-       mod_timer(&ionic->watchdog_timer,
-                 round_jiffies(jiffies + ionic->watchdog_period));
+       ionic_watchdog_init(ionic);
 
        idev->db_pages = bar->vaddr;
        idev->phy_db_pages = bar->bus_addr;
@@ -132,10 +137,21 @@ int ionic_dev_setup(struct ionic *ionic)
 }
 
 /* Devcmd Interface */
+bool ionic_is_fw_running(struct ionic_dev *idev)
+{
+       u8 fw_status = ioread8(&idev->dev_info_regs->fw_status);
+
+       /* firmware is useful only if the running bit is set and
+        * fw_status != 0xff (bad PCI read)
+        */
+       return (fw_status != 0xff) && (fw_status & IONIC_FW_STS_F_RUNNING);
+}
+
 int ionic_heartbeat_check(struct ionic *ionic)
 {
-       struct ionic_dev *idev = &ionic->idev;
        unsigned long check_time, last_check_time;
+       struct ionic_dev *idev = &ionic->idev;
+       struct ionic_lif *lif = ionic->lif;
        bool fw_status_ready = true;
        bool fw_hb_ready;
        u8 fw_generation;
@@ -155,13 +171,10 @@ do_check_time:
                goto do_check_time;
        }
 
-       /* firmware is useful only if the running bit is set and
-        * fw_status != 0xff (bad PCI read)
-        * If fw_status is not ready don't bother with the generation.
-        */
        fw_status = ioread8(&idev->dev_info_regs->fw_status);
 
-       if (fw_status == 0xff || !(fw_status & IONIC_FW_STS_F_RUNNING)) {
+       /* If fw_status is not ready don't bother with the generation */
+       if (!ionic_is_fw_running(idev)) {
                fw_status_ready = false;
        } else {
                fw_generation = fw_status & IONIC_FW_STS_F_GENERATION;
@@ -176,31 +189,41 @@ do_check_time:
                         * the down, the next watchdog will see the fw is up
                         * and the generation value stable, so will trigger
                         * the fw-up activity.
+                        *
+                        * If we had already moved to FW_RESET from a RESET event,
+                        * it is possible that we never saw the fw_status go to 0,
+                        * so we fake the current idev->fw_status_ready here to
+                        * force the transition and get FW up again.
                         */
-                       fw_status_ready = false;
+                       if (test_bit(IONIC_LIF_F_FW_RESET, lif->state))
+                               idev->fw_status_ready = false;  /* go to running */
+                       else
+                               fw_status_ready = false;        /* go to down */
                }
        }
 
        /* is this a transition? */
        if (fw_status_ready != idev->fw_status_ready) {
-               struct ionic_lif *lif = ionic->lif;
                bool trigger = false;
 
-               idev->fw_status_ready = fw_status_ready;
-
-               if (!fw_status_ready) {
-                       dev_info(ionic->dev, "FW stopped %u\n", fw_status);
-                       if (lif && !test_bit(IONIC_LIF_F_FW_RESET, lif->state))
-                               trigger = true;
-               } else {
-                       dev_info(ionic->dev, "FW running %u\n", fw_status);
-                       if (lif && test_bit(IONIC_LIF_F_FW_RESET, lif->state))
-                               trigger = true;
+               if (!fw_status_ready && lif &&
+                   !test_bit(IONIC_LIF_F_FW_RESET, lif->state) &&
+                   !test_and_set_bit(IONIC_LIF_F_FW_STOPPING, lif->state)) {
+                       dev_info(ionic->dev, "FW stopped 0x%02x\n", fw_status);
+                       trigger = true;
+
+               } else if (fw_status_ready && lif &&
+                          test_bit(IONIC_LIF_F_FW_RESET, lif->state) &&
+                          !test_bit(IONIC_LIF_F_FW_STOPPING, lif->state)) {
+                       dev_info(ionic->dev, "FW running 0x%02x\n", fw_status);
+                       trigger = true;
                }
 
                if (trigger) {
                        struct ionic_deferred_work *work;
 
+                       idev->fw_status_ready = fw_status_ready;
+
                        work = kzalloc(sizeof(*work), GFP_ATOMIC);
                        if (work) {
                                work->type = IONIC_DW_TYPE_LIF_RESET;
@@ -210,12 +233,14 @@ do_check_time:
                }
        }
 
-       if (!fw_status_ready)
+       if (!idev->fw_status_ready)
                return -ENXIO;
 
-       /* wait at least one watchdog period since the last heartbeat */
+       /* Because of some variability in the actual FW heartbeat, we
+        * wait longer than the DEVCMD_TIMEOUT before checking again.
+        */
        last_check_time = idev->last_hb_time;
-       if (time_before(check_time, last_check_time + ionic->watchdog_period))
+       if (time_before(check_time, last_check_time + DEVCMD_TIMEOUT * 2 * HZ))
                return 0;
 
        fw_hb = ioread32(&idev->dev_info_regs->fw_heartbeat);
@@ -392,60 +417,63 @@ void ionic_dev_cmd_port_pause(struct ionic_dev *idev, u8 pause_type)
 }
 
 /* VF commands */
-int ionic_set_vf_config(struct ionic *ionic, int vf, u8 attr, u8 *data)
+int ionic_set_vf_config(struct ionic *ionic, int vf,
+                       struct ionic_vf_setattr_cmd *vfc)
 {
        union ionic_dev_cmd cmd = {
                .vf_setattr.opcode = IONIC_CMD_VF_SETATTR,
-               .vf_setattr.attr = attr,
+               .vf_setattr.attr = vfc->attr,
                .vf_setattr.vf_index = cpu_to_le16(vf),
        };
        int err;
 
+       memcpy(cmd.vf_setattr.pad, vfc->pad, sizeof(vfc->pad));
+
+       mutex_lock(&ionic->dev_cmd_lock);
+       ionic_dev_cmd_go(&ionic->idev, &cmd);
+       err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+       mutex_unlock(&ionic->dev_cmd_lock);
+
+       return err;
+}
+
+int ionic_dev_cmd_vf_getattr(struct ionic *ionic, int vf, u8 attr,
+                            struct ionic_vf_getattr_comp *comp)
+{
+       union ionic_dev_cmd cmd = {
+               .vf_getattr.opcode = IONIC_CMD_VF_GETATTR,
+               .vf_getattr.attr = attr,
+               .vf_getattr.vf_index = cpu_to_le16(vf),
+       };
+       int err;
+
+       if (vf >= ionic->num_vfs)
+               return -EINVAL;
+
        switch (attr) {
        case IONIC_VF_ATTR_SPOOFCHK:
-               cmd.vf_setattr.spoofchk = *data;
-               dev_dbg(ionic->dev, "%s: vf %d spoof %d\n",
-                       __func__, vf, *data);
-               break;
        case IONIC_VF_ATTR_TRUST:
-               cmd.vf_setattr.trust = *data;
-               dev_dbg(ionic->dev, "%s: vf %d trust %d\n",
-                       __func__, vf, *data);
-               break;
        case IONIC_VF_ATTR_LINKSTATE:
-               cmd.vf_setattr.linkstate = *data;
-               dev_dbg(ionic->dev, "%s: vf %d linkstate %d\n",
-                       __func__, vf, *data);
-               break;
        case IONIC_VF_ATTR_MAC:
-               ether_addr_copy(cmd.vf_setattr.macaddr, data);
-               dev_dbg(ionic->dev, "%s: vf %d macaddr %pM\n",
-                       __func__, vf, data);
-               break;
        case IONIC_VF_ATTR_VLAN:
-               cmd.vf_setattr.vlanid = cpu_to_le16(*(u16 *)data);
-               dev_dbg(ionic->dev, "%s: vf %d vlan %d\n",
-                       __func__, vf, *(u16 *)data);
-               break;
        case IONIC_VF_ATTR_RATE:
-               cmd.vf_setattr.maxrate = cpu_to_le32(*(u32 *)data);
-               dev_dbg(ionic->dev, "%s: vf %d maxrate %d\n",
-                       __func__, vf, *(u32 *)data);
                break;
        case IONIC_VF_ATTR_STATSADDR:
-               cmd.vf_setattr.stats_pa = cpu_to_le64(*(u64 *)data);
-               dev_dbg(ionic->dev, "%s: vf %d stats_pa 0x%08llx\n",
-                       __func__, vf, *(u64 *)data);
-               break;
        default:
                return -EINVAL;
        }
 
        mutex_lock(&ionic->dev_cmd_lock);
        ionic_dev_cmd_go(&ionic->idev, &cmd);
-       err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+       err = ionic_dev_cmd_wait_nomsg(ionic, DEVCMD_TIMEOUT);
+       memcpy_fromio(comp, &ionic->idev.dev_cmd_regs->comp.vf_getattr,
+                     sizeof(*comp));
        mutex_unlock(&ionic->dev_cmd_lock);
 
+       if (err && comp->status != IONIC_RC_ENOSUPP)
+               ionic_dev_cmd_dev_err_print(ionic, cmd.vf_getattr.opcode,
+                                           comp->status, err);
+
        return err;
 }
 
index e5acf3b..563c302 100644 (file)
@@ -318,7 +318,10 @@ void ionic_dev_cmd_port_autoneg(struct ionic_dev *idev, u8 an_enable);
 void ionic_dev_cmd_port_fec(struct ionic_dev *idev, u8 fec_type);
 void ionic_dev_cmd_port_pause(struct ionic_dev *idev, u8 pause_type);
 
-int ionic_set_vf_config(struct ionic *ionic, int vf, u8 attr, u8 *data);
+int ionic_set_vf_config(struct ionic *ionic, int vf,
+                       struct ionic_vf_setattr_cmd *vfc);
+int ionic_dev_cmd_vf_getattr(struct ionic *ionic, int vf, u8 attr,
+                            struct ionic_vf_getattr_comp *comp);
 void ionic_dev_cmd_queue_identify(struct ionic_dev *idev,
                                  u16 lif_type, u8 qtype, u8 qver);
 void ionic_dev_cmd_lif_identify(struct ionic_dev *idev, u8 type, u8 ver);
@@ -353,5 +356,6 @@ void ionic_q_rewind(struct ionic_queue *q, struct ionic_desc_info *start);
 void ionic_q_service(struct ionic_queue *q, struct ionic_cq_info *cq_info,
                     unsigned int stop_index);
 int ionic_heartbeat_check(struct ionic *ionic);
+bool ionic_is_fw_running(struct ionic_dev *idev);
 
 #endif /* _IONIC_DEV_H_ */
index 2ff7be1..542e395 100644 (file)
@@ -1112,12 +1112,17 @@ static bool ionic_notifyq_service(struct ionic_cq *cq,
                ionic_link_status_check_request(lif, CAN_NOT_SLEEP);
                break;
        case IONIC_EVENT_RESET:
-               work = kzalloc(sizeof(*work), GFP_ATOMIC);
-               if (!work) {
-                       netdev_err(lif->netdev, "Reset event dropped\n");
-               } else {
-                       work->type = IONIC_DW_TYPE_LIF_RESET;
-                       ionic_lif_deferred_enqueue(&lif->deferred, work);
+               if (lif->ionic->idev.fw_status_ready &&
+                   !test_bit(IONIC_LIF_F_FW_RESET, lif->state) &&
+                   !test_and_set_bit(IONIC_LIF_F_FW_STOPPING, lif->state)) {
+                       work = kzalloc(sizeof(*work), GFP_ATOMIC);
+                       if (!work) {
+                               netdev_err(lif->netdev, "Reset event dropped\n");
+                               clear_bit(IONIC_LIF_F_FW_STOPPING, lif->state);
+                       } else {
+                               work->type = IONIC_DW_TYPE_LIF_RESET;
+                               ionic_lif_deferred_enqueue(&lif->deferred, work);
+                       }
                }
                break;
        default:
@@ -1782,7 +1787,7 @@ static void ionic_lif_quiesce(struct ionic_lif *lif)
 
        err = ionic_adminq_post_wait(lif, &ctx);
        if (err)
-               netdev_err(lif->netdev, "lif quiesce failed %d\n", err);
+               netdev_dbg(lif->netdev, "lif quiesce failed %d\n", err);
 }
 
 static void ionic_txrx_disable(struct ionic_lif *lif)
@@ -2152,6 +2157,76 @@ static int ionic_eth_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd
        }
 }
 
+static int ionic_update_cached_vf_config(struct ionic *ionic, int vf)
+{
+       struct ionic_vf_getattr_comp comp = { 0 };
+       int err;
+       u8 attr;
+
+       attr = IONIC_VF_ATTR_VLAN;
+       err = ionic_dev_cmd_vf_getattr(ionic, vf, attr, &comp);
+       if (err && comp.status != IONIC_RC_ENOSUPP)
+               goto err_out;
+       if (!err)
+               ionic->vfs[vf].vlanid = comp.vlanid;
+
+       attr = IONIC_VF_ATTR_SPOOFCHK;
+       err = ionic_dev_cmd_vf_getattr(ionic, vf, attr, &comp);
+       if (err && comp.status != IONIC_RC_ENOSUPP)
+               goto err_out;
+       if (!err)
+               ionic->vfs[vf].spoofchk = comp.spoofchk;
+
+       attr = IONIC_VF_ATTR_LINKSTATE;
+       err = ionic_dev_cmd_vf_getattr(ionic, vf, attr, &comp);
+       if (err && comp.status != IONIC_RC_ENOSUPP)
+               goto err_out;
+       if (!err) {
+               switch (comp.linkstate) {
+               case IONIC_VF_LINK_STATUS_UP:
+                       ionic->vfs[vf].linkstate = IFLA_VF_LINK_STATE_ENABLE;
+                       break;
+               case IONIC_VF_LINK_STATUS_DOWN:
+                       ionic->vfs[vf].linkstate = IFLA_VF_LINK_STATE_DISABLE;
+                       break;
+               case IONIC_VF_LINK_STATUS_AUTO:
+                       ionic->vfs[vf].linkstate = IFLA_VF_LINK_STATE_AUTO;
+                       break;
+               default:
+                       dev_warn(ionic->dev, "Unexpected link state %u\n", comp.linkstate);
+                       break;
+               }
+       }
+
+       attr = IONIC_VF_ATTR_RATE;
+       err = ionic_dev_cmd_vf_getattr(ionic, vf, attr, &comp);
+       if (err && comp.status != IONIC_RC_ENOSUPP)
+               goto err_out;
+       if (!err)
+               ionic->vfs[vf].maxrate = comp.maxrate;
+
+       attr = IONIC_VF_ATTR_TRUST;
+       err = ionic_dev_cmd_vf_getattr(ionic, vf, attr, &comp);
+       if (err && comp.status != IONIC_RC_ENOSUPP)
+               goto err_out;
+       if (!err)
+               ionic->vfs[vf].trusted = comp.trust;
+
+       attr = IONIC_VF_ATTR_MAC;
+       err = ionic_dev_cmd_vf_getattr(ionic, vf, attr, &comp);
+       if (err && comp.status != IONIC_RC_ENOSUPP)
+               goto err_out;
+       if (!err)
+               ether_addr_copy(ionic->vfs[vf].macaddr, comp.macaddr);
+
+err_out:
+       if (err)
+               dev_err(ionic->dev, "Failed to get %s for VF %d\n",
+                       ionic_vf_attr_to_str(attr), vf);
+
+       return err;
+}
+
 static int ionic_get_vf_config(struct net_device *netdev,
                               int vf, struct ifla_vf_info *ivf)
 {
@@ -2167,14 +2242,18 @@ static int ionic_get_vf_config(struct net_device *netdev,
        if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
                ret = -EINVAL;
        } else {
-               ivf->vf           = vf;
-               ivf->vlan         = le16_to_cpu(ionic->vfs[vf].vlanid);
-               ivf->qos          = 0;
-               ivf->spoofchk     = ionic->vfs[vf].spoofchk;
-               ivf->linkstate    = ionic->vfs[vf].linkstate;
-               ivf->max_tx_rate  = le32_to_cpu(ionic->vfs[vf].maxrate);
-               ivf->trusted      = ionic->vfs[vf].trusted;
-               ether_addr_copy(ivf->mac, ionic->vfs[vf].macaddr);
+               ivf->vf = vf;
+               ivf->qos = 0;
+
+               ret = ionic_update_cached_vf_config(ionic, vf);
+               if (!ret) {
+                       ivf->vlan         = le16_to_cpu(ionic->vfs[vf].vlanid);
+                       ivf->spoofchk     = ionic->vfs[vf].spoofchk;
+                       ivf->linkstate    = ionic->vfs[vf].linkstate;
+                       ivf->max_tx_rate  = le32_to_cpu(ionic->vfs[vf].maxrate);
+                       ivf->trusted      = ionic->vfs[vf].trusted;
+                       ether_addr_copy(ivf->mac, ionic->vfs[vf].macaddr);
+               }
        }
 
        up_read(&ionic->vf_op_lock);
@@ -2220,6 +2299,7 @@ static int ionic_get_vf_stats(struct net_device *netdev, int vf,
 
 static int ionic_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
 {
+       struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_MAC };
        struct ionic_lif *lif = netdev_priv(netdev);
        struct ionic *ionic = lif->ionic;
        int ret;
@@ -2235,7 +2315,11 @@ static int ionic_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
        if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
                ret = -EINVAL;
        } else {
-               ret = ionic_set_vf_config(ionic, vf, IONIC_VF_ATTR_MAC, mac);
+               ether_addr_copy(vfc.macaddr, mac);
+               dev_dbg(ionic->dev, "%s: vf %d macaddr %pM\n",
+                       __func__, vf, vfc.macaddr);
+
+               ret = ionic_set_vf_config(ionic, vf, &vfc);
                if (!ret)
                        ether_addr_copy(ionic->vfs[vf].macaddr, mac);
        }
@@ -2247,6 +2331,7 @@ static int ionic_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
 static int ionic_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan,
                             u8 qos, __be16 proto)
 {
+       struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_VLAN };
        struct ionic_lif *lif = netdev_priv(netdev);
        struct ionic *ionic = lif->ionic;
        int ret;
@@ -2269,8 +2354,11 @@ static int ionic_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan,
        if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
                ret = -EINVAL;
        } else {
-               ret = ionic_set_vf_config(ionic, vf,
-                                         IONIC_VF_ATTR_VLAN, (u8 *)&vlan);
+               vfc.vlanid = cpu_to_le16(vlan);
+               dev_dbg(ionic->dev, "%s: vf %d vlan %d\n",
+                       __func__, vf, le16_to_cpu(vfc.vlanid));
+
+               ret = ionic_set_vf_config(ionic, vf, &vfc);
                if (!ret)
                        ionic->vfs[vf].vlanid = cpu_to_le16(vlan);
        }
@@ -2282,6 +2370,7 @@ static int ionic_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan,
 static int ionic_set_vf_rate(struct net_device *netdev, int vf,
                             int tx_min, int tx_max)
 {
+       struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_RATE };
        struct ionic_lif *lif = netdev_priv(netdev);
        struct ionic *ionic = lif->ionic;
        int ret;
@@ -2298,8 +2387,11 @@ static int ionic_set_vf_rate(struct net_device *netdev, int vf,
        if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
                ret = -EINVAL;
        } else {
-               ret = ionic_set_vf_config(ionic, vf,
-                                         IONIC_VF_ATTR_RATE, (u8 *)&tx_max);
+               vfc.maxrate = cpu_to_le32(tx_max);
+               dev_dbg(ionic->dev, "%s: vf %d maxrate %d\n",
+                       __func__, vf, le32_to_cpu(vfc.maxrate));
+
+               ret = ionic_set_vf_config(ionic, vf, &vfc);
                if (!ret)
                        lif->ionic->vfs[vf].maxrate = cpu_to_le32(tx_max);
        }
@@ -2310,9 +2402,9 @@ static int ionic_set_vf_rate(struct net_device *netdev, int vf,
 
 static int ionic_set_vf_spoofchk(struct net_device *netdev, int vf, bool set)
 {
+       struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_SPOOFCHK };
        struct ionic_lif *lif = netdev_priv(netdev);
        struct ionic *ionic = lif->ionic;
-       u8 data = set;  /* convert to u8 for config */
        int ret;
 
        if (!netif_device_present(netdev))
@@ -2323,10 +2415,13 @@ static int ionic_set_vf_spoofchk(struct net_device *netdev, int vf, bool set)
        if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
                ret = -EINVAL;
        } else {
-               ret = ionic_set_vf_config(ionic, vf,
-                                         IONIC_VF_ATTR_SPOOFCHK, &data);
+               vfc.spoofchk = set;
+               dev_dbg(ionic->dev, "%s: vf %d spoof %d\n",
+                       __func__, vf, vfc.spoofchk);
+
+               ret = ionic_set_vf_config(ionic, vf, &vfc);
                if (!ret)
-                       ionic->vfs[vf].spoofchk = data;
+                       ionic->vfs[vf].spoofchk = set;
        }
 
        up_write(&ionic->vf_op_lock);
@@ -2335,9 +2430,9 @@ static int ionic_set_vf_spoofchk(struct net_device *netdev, int vf, bool set)
 
 static int ionic_set_vf_trust(struct net_device *netdev, int vf, bool set)
 {
+       struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_TRUST };
        struct ionic_lif *lif = netdev_priv(netdev);
        struct ionic *ionic = lif->ionic;
-       u8 data = set;  /* convert to u8 for config */
        int ret;
 
        if (!netif_device_present(netdev))
@@ -2348,10 +2443,13 @@ static int ionic_set_vf_trust(struct net_device *netdev, int vf, bool set)
        if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
                ret = -EINVAL;
        } else {
-               ret = ionic_set_vf_config(ionic, vf,
-                                         IONIC_VF_ATTR_TRUST, &data);
+               vfc.trust = set;
+               dev_dbg(ionic->dev, "%s: vf %d trust %d\n",
+                       __func__, vf, vfc.trust);
+
+               ret = ionic_set_vf_config(ionic, vf, &vfc);
                if (!ret)
-                       ionic->vfs[vf].trusted = data;
+                       ionic->vfs[vf].trusted = set;
        }
 
        up_write(&ionic->vf_op_lock);
@@ -2360,20 +2458,21 @@ static int ionic_set_vf_trust(struct net_device *netdev, int vf, bool set)
 
 static int ionic_set_vf_link_state(struct net_device *netdev, int vf, int set)
 {
+       struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_LINKSTATE };
        struct ionic_lif *lif = netdev_priv(netdev);
        struct ionic *ionic = lif->ionic;
-       u8 data;
+       u8 vfls;
        int ret;
 
        switch (set) {
        case IFLA_VF_LINK_STATE_ENABLE:
-               data = IONIC_VF_LINK_STATUS_UP;
+               vfls = IONIC_VF_LINK_STATUS_UP;
                break;
        case IFLA_VF_LINK_STATE_DISABLE:
-               data = IONIC_VF_LINK_STATUS_DOWN;
+               vfls = IONIC_VF_LINK_STATUS_DOWN;
                break;
        case IFLA_VF_LINK_STATE_AUTO:
-               data = IONIC_VF_LINK_STATUS_AUTO;
+               vfls = IONIC_VF_LINK_STATUS_AUTO;
                break;
        default:
                return -EINVAL;
@@ -2387,8 +2486,11 @@ static int ionic_set_vf_link_state(struct net_device *netdev, int vf, int set)
        if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
                ret = -EINVAL;
        } else {
-               ret = ionic_set_vf_config(ionic, vf,
-                                         IONIC_VF_ATTR_LINKSTATE, &data);
+               vfc.linkstate = vfls;
+               dev_dbg(ionic->dev, "%s: vf %d linkstate %d\n",
+                       __func__, vf, vfc.linkstate);
+
+               ret = ionic_set_vf_config(ionic, vf, &vfc);
                if (!ret)
                        ionic->vfs[vf].linkstate = set;
        }
@@ -2835,6 +2937,7 @@ static void ionic_lif_handle_fw_down(struct ionic_lif *lif)
 
        mutex_unlock(&lif->queue_lock);
 
+       clear_bit(IONIC_LIF_F_FW_STOPPING, lif->state);
        dev_info(ionic->dev, "FW Down: LIFs stopped\n");
 }
 
@@ -2934,8 +3037,6 @@ void ionic_lif_free(struct ionic_lif *lif)
        /* unmap doorbell page */
        ionic_bus_unmap_dbpage(lif->ionic, lif->kern_dbpage);
        lif->kern_dbpage = NULL;
-       kfree(lif->dbid_inuse);
-       lif->dbid_inuse = NULL;
 
        mutex_destroy(&lif->config_lock);
        mutex_destroy(&lif->queue_lock);
@@ -3135,22 +3236,12 @@ int ionic_lif_init(struct ionic_lif *lif)
                return -EINVAL;
        }
 
-       lif->dbid_inuse = bitmap_zalloc(lif->dbid_count, GFP_KERNEL);
-       if (!lif->dbid_inuse) {
-               dev_err(dev, "Failed alloc doorbell id bitmap, aborting\n");
-               return -ENOMEM;
-       }
-
-       /* first doorbell id reserved for kernel (dbid aka pid == zero) */
-       set_bit(0, lif->dbid_inuse);
        lif->kern_pid = 0;
-
        dbpage_num = ionic_db_page_num(lif, lif->kern_pid);
        lif->kern_dbpage = ionic_bus_map_dbpage(lif->ionic, dbpage_num);
        if (!lif->kern_dbpage) {
                dev_err(dev, "Cannot map dbpage, aborting\n");
-               err = -ENOMEM;
-               goto err_out_free_dbid;
+               return -ENOMEM;
        }
 
        err = ionic_lif_adminq_init(lif);
@@ -3186,15 +3277,13 @@ int ionic_lif_init(struct ionic_lif *lif)
        return 0;
 
 err_out_notifyq_deinit:
+       napi_disable(&lif->adminqcq->napi);
        ionic_lif_qcq_deinit(lif, lif->notifyqcq);
 err_out_adminq_deinit:
        ionic_lif_qcq_deinit(lif, lif->adminqcq);
        ionic_lif_reset(lif);
        ionic_bus_unmap_dbpage(lif->ionic, lif->kern_dbpage);
        lif->kern_dbpage = NULL;
-err_out_free_dbid:
-       kfree(lif->dbid_inuse);
-       lif->dbid_inuse = NULL;
 
        return err;
 }
index 9f7ab2f..a53984b 100644 (file)
@@ -135,6 +135,7 @@ enum ionic_lif_state_flags {
        IONIC_LIF_F_LINK_CHECK_REQUESTED,
        IONIC_LIF_F_FILTER_SYNC_NEEDED,
        IONIC_LIF_F_FW_RESET,
+       IONIC_LIF_F_FW_STOPPING,
        IONIC_LIF_F_SPLIT_INTR,
        IONIC_LIF_F_BROKEN,
        IONIC_LIF_F_TX_DIM_INTR,
@@ -213,7 +214,6 @@ struct ionic_lif {
        u32 rx_coalesce_hw;             /* what the hw is using */
        u32 tx_coalesce_usecs;          /* what the user asked for */
        u32 tx_coalesce_hw;             /* what the hw is using */
-       unsigned long *dbid_inuse;
        unsigned int dbid_count;
 
        struct ionic_phc *phc;
index 875f4ec..4029b4e 100644 (file)
@@ -188,6 +188,28 @@ static const char *ionic_opcode_to_str(enum ionic_cmd_opcode opcode)
        }
 }
 
+const char *ionic_vf_attr_to_str(enum ionic_vf_attr attr)
+{
+       switch (attr) {
+       case IONIC_VF_ATTR_SPOOFCHK:
+               return "IONIC_VF_ATTR_SPOOFCHK";
+       case IONIC_VF_ATTR_TRUST:
+               return "IONIC_VF_ATTR_TRUST";
+       case IONIC_VF_ATTR_LINKSTATE:
+               return "IONIC_VF_ATTR_LINKSTATE";
+       case IONIC_VF_ATTR_MAC:
+               return "IONIC_VF_ATTR_MAC";
+       case IONIC_VF_ATTR_VLAN:
+               return "IONIC_VF_ATTR_VLAN";
+       case IONIC_VF_ATTR_RATE:
+               return "IONIC_VF_ATTR_RATE";
+       case IONIC_VF_ATTR_STATSADDR:
+               return "IONIC_VF_ATTR_STATSADDR";
+       default:
+               return "IONIC_VF_ATTR_UNKNOWN";
+       }
+}
+
 static void ionic_adminq_flush(struct ionic_lif *lif)
 {
        struct ionic_desc_info *desc_info;
@@ -215,9 +237,13 @@ static void ionic_adminq_flush(struct ionic_lif *lif)
 void ionic_adminq_netdev_err_print(struct ionic_lif *lif, u8 opcode,
                                   u8 status, int err)
 {
+       const char *stat_str;
+
+       stat_str = (err == -ETIMEDOUT) ? "TIMEOUT" :
+                                        ionic_error_to_str(status);
+
        netdev_err(lif->netdev, "%s (%d) failed: %s (%d)\n",
-                  ionic_opcode_to_str(opcode), opcode,
-                  ionic_error_to_str(status), err);
+                  ionic_opcode_to_str(opcode), opcode, stat_str, err);
 }
 
 static int ionic_adminq_check_err(struct ionic_lif *lif,
@@ -318,6 +344,7 @@ int ionic_adminq_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx,
                if (do_msg && !test_bit(IONIC_LIF_F_FW_RESET, lif->state))
                        netdev_err(netdev, "Posting of %s (%d) failed: %d\n",
                                   name, ctx->cmd.cmd.opcode, err);
+               ctx->comp.comp.status = IONIC_RC_ERROR;
                return err;
        }
 
@@ -331,11 +358,15 @@ int ionic_adminq_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx,
                if (remaining)
                        break;
 
-               /* interrupt the wait if FW stopped */
-               if (test_bit(IONIC_LIF_F_FW_RESET, lif->state)) {
+               /* force a check of FW status and break out if FW reset */
+               (void)ionic_heartbeat_check(lif->ionic);
+               if ((test_bit(IONIC_LIF_F_FW_RESET, lif->state) &&
+                    !lif->ionic->idev.fw_status_ready) ||
+                   test_bit(IONIC_LIF_F_FW_STOPPING, lif->state)) {
                        if (do_msg)
-                               netdev_err(netdev, "%s (%d) interrupted, FW in reset\n",
-                                          name, ctx->cmd.cmd.opcode);
+                               netdev_warn(netdev, "%s (%d) interrupted, FW in reset\n",
+                                           name, ctx->cmd.cmd.opcode);
+                       ctx->comp.comp.status = IONIC_RC_ERROR;
                        return -ENXIO;
                }
 
@@ -370,21 +401,34 @@ int ionic_adminq_post_wait_nomsg(struct ionic_lif *lif, struct ionic_admin_ctx *
 
 static void ionic_dev_cmd_clean(struct ionic *ionic)
 {
-       union __iomem ionic_dev_cmd_regs *regs = ionic->idev.dev_cmd_regs;
+       struct ionic_dev *idev = &ionic->idev;
 
-       iowrite32(0, &regs->doorbell);
-       memset_io(&regs->cmd, 0, sizeof(regs->cmd));
+       iowrite32(0, &idev->dev_cmd_regs->doorbell);
+       memset_io(&idev->dev_cmd_regs->cmd, 0, sizeof(idev->dev_cmd_regs->cmd));
 }
 
-int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds)
+void ionic_dev_cmd_dev_err_print(struct ionic *ionic, u8 opcode, u8 status,
+                                int err)
+{
+       const char *stat_str;
+
+       stat_str = (err == -ETIMEDOUT) ? "TIMEOUT" :
+                                        ionic_error_to_str(status);
+
+       dev_err(ionic->dev, "DEV_CMD %s (%d) error, %s (%d) failed\n",
+               ionic_opcode_to_str(opcode), opcode, stat_str, err);
+}
+
+static int __ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds,
+                               const bool do_msg)
 {
        struct ionic_dev *idev = &ionic->idev;
        unsigned long start_time;
        unsigned long max_wait;
        unsigned long duration;
+       int done = 0;
+       bool fw_up;
        int opcode;
-       int hb = 0;
-       int done;
        int err;
 
        /* Wait for dev cmd to complete, retrying if we get EAGAIN,
@@ -394,31 +438,24 @@ int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds)
 try_again:
        opcode = readb(&idev->dev_cmd_regs->cmd.cmd.opcode);
        start_time = jiffies;
-       do {
+       for (fw_up = ionic_is_fw_running(idev);
+            !done && fw_up && time_before(jiffies, max_wait);
+            fw_up = ionic_is_fw_running(idev)) {
                done = ionic_dev_cmd_done(idev);
                if (done)
                        break;
                usleep_range(100, 200);
-
-               /* Don't check the heartbeat on FW_CONTROL commands as they are
-                * notorious for interrupting the firmware's heartbeat update.
-                */
-               if (opcode != IONIC_CMD_FW_CONTROL)
-                       hb = ionic_heartbeat_check(ionic);
-       } while (!done && !hb && time_before(jiffies, max_wait));
+       }
        duration = jiffies - start_time;
 
        dev_dbg(ionic->dev, "DEVCMD %s (%d) done=%d took %ld secs (%ld jiffies)\n",
                ionic_opcode_to_str(opcode), opcode,
                done, duration / HZ, duration);
 
-       if (!done && hb) {
-               /* It is possible (but unlikely) that FW was busy and missed a
-                * heartbeat check but is still alive and will process this
-                * request, so don't clean the dev_cmd in this case.
-                */
-               dev_dbg(ionic->dev, "DEVCMD %s (%d) failed - FW halted\n",
-                       ionic_opcode_to_str(opcode), opcode);
+       if (!done && !fw_up) {
+               ionic_dev_cmd_clean(ionic);
+               dev_warn(ionic->dev, "DEVCMD %s (%d) interrupted - FW is down\n",
+                        ionic_opcode_to_str(opcode), opcode);
                return -ENXIO;
        }
 
@@ -444,9 +481,9 @@ try_again:
                }
 
                if (!(opcode == IONIC_CMD_FW_CONTROL && err == IONIC_RC_EAGAIN))
-                       dev_err(ionic->dev, "DEV_CMD %s (%d) error, %s (%d) failed\n",
-                               ionic_opcode_to_str(opcode), opcode,
-                               ionic_error_to_str(err), err);
+                       if (do_msg)
+                               ionic_dev_cmd_dev_err_print(ionic, opcode, err,
+                                                           ionic_error_to_errno(err));
 
                return ionic_error_to_errno(err);
        }
@@ -454,6 +491,16 @@ try_again:
        return 0;
 }
 
+int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds)
+{
+       return __ionic_dev_cmd_wait(ionic, max_seconds, true);
+}
+
+int ionic_dev_cmd_wait_nomsg(struct ionic *ionic, unsigned long max_seconds)
+{
+       return __ionic_dev_cmd_wait(ionic, max_seconds, false);
+}
+
 int ionic_setup(struct ionic *ionic)
 {
        int err;
@@ -540,6 +587,9 @@ int ionic_reset(struct ionic *ionic)
        struct ionic_dev *idev = &ionic->idev;
        int err;
 
+       if (!ionic_is_fw_running(idev))
+               return 0;
+
        mutex_lock(&ionic->dev_cmd_lock);
        ionic_dev_cmd_reset(idev);
        err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
@@ -612,15 +662,17 @@ int ionic_port_init(struct ionic *ionic)
 int ionic_port_reset(struct ionic *ionic)
 {
        struct ionic_dev *idev = &ionic->idev;
-       int err;
+       int err = 0;
 
        if (!idev->port_info)
                return 0;
 
-       mutex_lock(&ionic->dev_cmd_lock);
-       ionic_dev_cmd_port_reset(idev);
-       err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
-       mutex_unlock(&ionic->dev_cmd_lock);
+       if (ionic_is_fw_running(idev)) {
+               mutex_lock(&ionic->dev_cmd_lock);
+               ionic_dev_cmd_port_reset(idev);
+               err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+               mutex_unlock(&ionic->dev_cmd_lock);
+       }
 
        dma_free_coherent(ionic->dev, idev->port_info_sz,
                          idev->port_info, idev->port_info_pa);
@@ -628,9 +680,6 @@ int ionic_port_reset(struct ionic *ionic)
        idev->port_info = NULL;
        idev->port_info_pa = 0;
 
-       if (err)
-               dev_err(ionic->dev, "Failed to reset port\n");
-
        return err;
 }
 
index f6e785f..b736337 100644 (file)
@@ -376,10 +376,24 @@ static int ionic_lif_filter_add(struct ionic_lif *lif,
 
                spin_unlock_bh(&lif->rx_filters.lock);
 
-               if (err == -ENOSPC) {
-                       if (le16_to_cpu(ctx.cmd.rx_filter_add.match) == IONIC_RX_FILTER_MATCH_VLAN)
-                               lif->max_vlans = lif->nvlans;
+               /* store the max_vlans limit that we found */
+               if (err == -ENOSPC &&
+                   le16_to_cpu(ctx.cmd.rx_filter_add.match) == IONIC_RX_FILTER_MATCH_VLAN)
+                       lif->max_vlans = lif->nvlans;
+
+               /* Prevent unnecessary error messages on recoverable
+                * errors as the filter will get retried on the next
+                * sync attempt.
+                */
+               switch (err) {
+               case -ENOSPC:
+               case -ENXIO:
+               case -ETIMEDOUT:
+               case -EAGAIN:
+               case -EBUSY:
                        return 0;
+               default:
+                       break;
                }
 
                ionic_adminq_netdev_err_print(lif, ctx.cmd.cmd.opcode,
@@ -494,9 +508,22 @@ static int ionic_lif_filter_del(struct ionic_lif *lif,
        spin_unlock_bh(&lif->rx_filters.lock);
 
        if (state != IONIC_FILTER_STATE_NEW) {
-               err = ionic_adminq_post_wait(lif, &ctx);
-               if (err && err != -EEXIST)
+               err = ionic_adminq_post_wait_nomsg(lif, &ctx);
+
+               switch (err) {
+                       /* ignore these errors */
+               case -EEXIST:
+               case -ENXIO:
+               case -ETIMEDOUT:
+               case -EAGAIN:
+               case -EBUSY:
+               case 0:
+                       break;
+               default:
+                       ionic_adminq_netdev_err_print(lif, ctx.cmd.cmd.opcode,
+                                                     ctx.comp.comp.status, err);
                        return err;
+               }
        }
 
        return 0;
index 94384f5..d197a70 100644 (file)
@@ -669,27 +669,37 @@ dma_fail:
        return -EIO;
 }
 
+static void ionic_tx_desc_unmap_bufs(struct ionic_queue *q,
+                                    struct ionic_desc_info *desc_info)
+{
+       struct ionic_buf_info *buf_info = desc_info->bufs;
+       struct device *dev = q->dev;
+       unsigned int i;
+
+       if (!desc_info->nbufs)
+               return;
+
+       dma_unmap_single(dev, (dma_addr_t)buf_info->dma_addr,
+                        buf_info->len, DMA_TO_DEVICE);
+       buf_info++;
+       for (i = 1; i < desc_info->nbufs; i++, buf_info++)
+               dma_unmap_page(dev, (dma_addr_t)buf_info->dma_addr,
+                              buf_info->len, DMA_TO_DEVICE);
+
+       desc_info->nbufs = 0;
+}
+
 static void ionic_tx_clean(struct ionic_queue *q,
                           struct ionic_desc_info *desc_info,
                           struct ionic_cq_info *cq_info,
                           void *cb_arg)
 {
-       struct ionic_buf_info *buf_info = desc_info->bufs;
        struct ionic_tx_stats *stats = q_to_tx_stats(q);
        struct ionic_qcq *qcq = q_to_qcq(q);
        struct sk_buff *skb = cb_arg;
-       struct device *dev = q->dev;
-       unsigned int i;
        u16 qi;
 
-       if (desc_info->nbufs) {
-               dma_unmap_single(dev, (dma_addr_t)buf_info->dma_addr,
-                                buf_info->len, DMA_TO_DEVICE);
-               buf_info++;
-               for (i = 1; i < desc_info->nbufs; i++, buf_info++)
-                       dma_unmap_page(dev, (dma_addr_t)buf_info->dma_addr,
-                                      buf_info->len, DMA_TO_DEVICE);
-       }
+       ionic_tx_desc_unmap_bufs(q, desc_info);
 
        if (!skb)
                return;
@@ -931,8 +941,11 @@ static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb)
                err = ionic_tx_tcp_inner_pseudo_csum(skb);
        else
                err = ionic_tx_tcp_pseudo_csum(skb);
-       if (err)
+       if (err) {
+               /* clean up mapping from ionic_tx_map_skb */
+               ionic_tx_desc_unmap_bufs(q, desc_info);
                return err;
+       }
 
        if (encap)
                hdrlen = skb_inner_transport_header(skb) - skb->data +
@@ -1003,8 +1016,8 @@ static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb)
        return 0;
 }
 
-static int ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb,
-                             struct ionic_desc_info *desc_info)
+static void ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb,
+                              struct ionic_desc_info *desc_info)
 {
        struct ionic_txq_desc *desc = desc_info->txq_desc;
        struct ionic_buf_info *buf_info = desc_info->bufs;
@@ -1038,12 +1051,10 @@ static int ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb,
                stats->crc32_csum++;
        else
                stats->csum++;
-
-       return 0;
 }
 
-static int ionic_tx_calc_no_csum(struct ionic_queue *q, struct sk_buff *skb,
-                                struct ionic_desc_info *desc_info)
+static void ionic_tx_calc_no_csum(struct ionic_queue *q, struct sk_buff *skb,
+                                 struct ionic_desc_info *desc_info)
 {
        struct ionic_txq_desc *desc = desc_info->txq_desc;
        struct ionic_buf_info *buf_info = desc_info->bufs;
@@ -1074,12 +1085,10 @@ static int ionic_tx_calc_no_csum(struct ionic_queue *q, struct sk_buff *skb,
        desc->csum_offset = 0;
 
        stats->csum_none++;
-
-       return 0;
 }
 
-static int ionic_tx_skb_frags(struct ionic_queue *q, struct sk_buff *skb,
-                             struct ionic_desc_info *desc_info)
+static void ionic_tx_skb_frags(struct ionic_queue *q, struct sk_buff *skb,
+                              struct ionic_desc_info *desc_info)
 {
        struct ionic_txq_sg_desc *sg_desc = desc_info->txq_sg_desc;
        struct ionic_buf_info *buf_info = &desc_info->bufs[1];
@@ -1093,31 +1102,24 @@ static int ionic_tx_skb_frags(struct ionic_queue *q, struct sk_buff *skb,
        }
 
        stats->frags += skb_shinfo(skb)->nr_frags;
-
-       return 0;
 }
 
 static int ionic_tx(struct ionic_queue *q, struct sk_buff *skb)
 {
        struct ionic_desc_info *desc_info = &q->info[q->head_idx];
        struct ionic_tx_stats *stats = q_to_tx_stats(q);
-       int err;
 
        if (unlikely(ionic_tx_map_skb(q, skb, desc_info)))
                return -EIO;
 
        /* set up the initial descriptor */
        if (skb->ip_summed == CHECKSUM_PARTIAL)
-               err = ionic_tx_calc_csum(q, skb, desc_info);
+               ionic_tx_calc_csum(q, skb, desc_info);
        else
-               err = ionic_tx_calc_no_csum(q, skb, desc_info);
-       if (err)
-               return err;
+               ionic_tx_calc_no_csum(q, skb, desc_info);
 
        /* add frags */
-       err = ionic_tx_skb_frags(q, skb, desc_info);
-       if (err)
-               return err;
+       ionic_tx_skb_frags(q, skb, desc_info);
 
        skb_tx_timestamp(skb);
        stats->pkts++;
index da1eada..b3811ad 100644 (file)
@@ -614,12 +614,13 @@ static int qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn,
                                      usecs);
 }
 
-int qed_mcp_cmd(struct qed_hwfn *p_hwfn,
-               struct qed_ptt *p_ptt,
-               u32 cmd,
-               u32 param,
-               u32 *o_mcp_resp,
-               u32 *o_mcp_param)
+static int _qed_mcp_cmd(struct qed_hwfn *p_hwfn,
+                       struct qed_ptt *p_ptt,
+                       u32 cmd,
+                       u32 param,
+                       u32 *o_mcp_resp,
+                       u32 *o_mcp_param,
+                       bool can_sleep)
 {
        struct qed_mcp_mb_params mb_params;
        int rc;
@@ -627,6 +628,7 @@ int qed_mcp_cmd(struct qed_hwfn *p_hwfn,
        memset(&mb_params, 0, sizeof(mb_params));
        mb_params.cmd = cmd;
        mb_params.param = param;
+       mb_params.flags = can_sleep ? QED_MB_FLAG_CAN_SLEEP : 0;
 
        rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
        if (rc)
@@ -638,6 +640,28 @@ int qed_mcp_cmd(struct qed_hwfn *p_hwfn,
        return 0;
 }
 
+int qed_mcp_cmd(struct qed_hwfn *p_hwfn,
+               struct qed_ptt *p_ptt,
+               u32 cmd,
+               u32 param,
+               u32 *o_mcp_resp,
+               u32 *o_mcp_param)
+{
+       return (_qed_mcp_cmd(p_hwfn, p_ptt, cmd, param,
+                            o_mcp_resp, o_mcp_param, true));
+}
+
+int qed_mcp_cmd_nosleep(struct qed_hwfn *p_hwfn,
+                       struct qed_ptt *p_ptt,
+                       u32 cmd,
+                       u32 param,
+                       u32 *o_mcp_resp,
+                       u32 *o_mcp_param)
+{
+       return (_qed_mcp_cmd(p_hwfn, p_ptt, cmd, param,
+                            o_mcp_resp, o_mcp_param, false));
+}
+
 static int
 qed_mcp_nvm_wr_cmd(struct qed_hwfn *p_hwfn,
                   struct qed_ptt *p_ptt,
@@ -1728,8 +1752,8 @@ static void qed_mcp_update_bw(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
        qed_configure_pf_max_bandwidth(p_hwfn->cdev, p_info->bandwidth_max);
 
        /* Acknowledge the MFW */
-       qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_BW_UPDATE_ACK, 0, &resp,
-                   &param);
+       qed_mcp_cmd_nosleep(p_hwfn, p_ptt, DRV_MSG_CODE_BW_UPDATE_ACK, 0, &resp,
+                           &param);
 }
 
 static void qed_mcp_update_stag(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
@@ -1766,8 +1790,8 @@ static void qed_mcp_update_stag(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
                   p_hwfn->mcp_info->func_info.ovlan, p_hwfn->hw_info.hw_mode);
 
        /* Acknowledge the MFW */
-       qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_S_TAG_UPDATE_ACK, 0,
-                   &resp, &param);
+       qed_mcp_cmd_nosleep(p_hwfn, p_ptt, DRV_MSG_CODE_S_TAG_UPDATE_ACK, 0,
+                           &resp, &param);
 }
 
 static void qed_mcp_handle_fan_failure(struct qed_hwfn *p_hwfn,
@@ -3675,8 +3699,8 @@ static int qed_mcp_resource_cmd(struct qed_hwfn *p_hwfn,
 {
        int rc;
 
-       rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_RESOURCE_CMD, param,
-                        p_mcp_resp, p_mcp_param);
+       rc = qed_mcp_cmd_nosleep(p_hwfn, p_ptt, DRV_MSG_CODE_RESOURCE_CMD,
+                                param, p_mcp_resp, p_mcp_param);
        if (rc)
                return rc;
 
index 369e189..2f26bee 100644 (file)
@@ -393,11 +393,12 @@ int qed_mcp_get_board_config(struct qed_hwfn *p_hwfn,
                             struct qed_ptt *p_ptt, u32 *p_board_config);
 
 /**
- * qed_mcp_cmd(): General function for sending commands to the MCP
+ * qed_mcp_cmd(): Sleepable function for sending commands to the MCP
  *                mailbox. It acquire mutex lock for the entire
  *                operation, from sending the request until the MCP
  *                response. Waiting for MCP response will be checked up
- *                to 5 seconds every 5ms.
+ *                to 5 seconds every 10ms. Should not be called from atomic
+ *                context.
  *
  * @p_hwfn: HW device data.
  * @p_ptt: PTT required for register access.
@@ -417,6 +418,31 @@ int qed_mcp_cmd(struct qed_hwfn *p_hwfn,
                u32 *o_mcp_param);
 
 /**
+ * qed_mcp_cmd_nosleep(): Function for sending commands to the MCP
+ *                        mailbox. It acquire mutex lock for the entire
+ *                        operation, from sending the request until the MCP
+ *                        response. Waiting for MCP response will be checked up
+ *                        to 5 seconds every 10us. Should be called when sleep
+ *                        is not allowed.
+ *
+ * @p_hwfn: HW device data.
+ * @p_ptt: PTT required for register access.
+ * @cmd: command to be sent to the MCP.
+ * @param: Optional param
+ * @o_mcp_resp: The MCP response code (exclude sequence).
+ * @o_mcp_param: Optional parameter provided by the MCP
+ *                     response
+ *
+ * Return: Int - 0 - Operation was successul.
+ */
+int qed_mcp_cmd_nosleep(struct qed_hwfn *p_hwfn,
+                       struct qed_ptt *p_ptt,
+                       u32 cmd,
+                       u32 param,
+                       u32 *o_mcp_resp,
+                       u32 *o_mcp_param);
+
+/**
  * qed_mcp_drain(): drains the nig, allowing completion to pass in
  *                  case of pauses.
  *                  (Should be called only from sleepable context)
index 19e2621..67014eb 100644 (file)
@@ -2667,10 +2667,7 @@ static void rtl_enable_exit_l1(struct rtl8169_private *tp)
        case RTL_GIGA_MAC_VER_37 ... RTL_GIGA_MAC_VER_38:
                rtl_eri_set_bits(tp, 0xd4, 0x0c00);
                break;
-       case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_53:
-               rtl_eri_set_bits(tp, 0xd4, 0x1f80);
-               break;
-       case RTL_GIGA_MAC_VER_60 ... RTL_GIGA_MAC_VER_63:
+       case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_63:
                r8168_mac_ocp_modify(tp, 0xc0ac, 0, 0x1f80);
                break;
        default:
@@ -2678,13 +2675,48 @@ static void rtl_enable_exit_l1(struct rtl8169_private *tp)
        }
 }
 
+static void rtl_disable_exit_l1(struct rtl8169_private *tp)
+{
+       switch (tp->mac_version) {
+       case RTL_GIGA_MAC_VER_34 ... RTL_GIGA_MAC_VER_38:
+               rtl_eri_clear_bits(tp, 0xd4, 0x1f00);
+               break;
+       case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_63:
+               r8168_mac_ocp_modify(tp, 0xc0ac, 0x1f80, 0);
+               break;
+       default:
+               break;
+       }
+}
+
 static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable)
 {
        /* Don't enable ASPM in the chip if OS can't control ASPM */
        if (enable && tp->aspm_manageable) {
                RTL_W8(tp, Config5, RTL_R8(tp, Config5) | ASPM_en);
                RTL_W8(tp, Config2, RTL_R8(tp, Config2) | ClkReqEn);
+
+               switch (tp->mac_version) {
+               case RTL_GIGA_MAC_VER_45 ... RTL_GIGA_MAC_VER_48:
+               case RTL_GIGA_MAC_VER_60 ... RTL_GIGA_MAC_VER_63:
+                       /* reset ephy tx/rx disable timer */
+                       r8168_mac_ocp_modify(tp, 0xe094, 0xff00, 0);
+                       /* chip can trigger L1.2 */
+                       r8168_mac_ocp_modify(tp, 0xe092, 0x00ff, BIT(2));
+                       break;
+               default:
+                       break;
+               }
        } else {
+               switch (tp->mac_version) {
+               case RTL_GIGA_MAC_VER_45 ... RTL_GIGA_MAC_VER_48:
+               case RTL_GIGA_MAC_VER_60 ... RTL_GIGA_MAC_VER_63:
+                       r8168_mac_ocp_modify(tp, 0xe092, 0x00ff, 0);
+                       break;
+               default:
+                       break;
+               }
+
                RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
                RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
        }
@@ -4683,7 +4715,7 @@ static void rtl8169_down(struct rtl8169_private *tp)
        rtl_pci_commit(tp);
 
        rtl8169_cleanup(tp, true);
-
+       rtl_disable_exit_l1(tp);
        rtl_prepare_power_down(tp);
 }
 
@@ -4843,8 +4875,6 @@ static void rtl8169_net_suspend(struct rtl8169_private *tp)
                rtl8169_down(tp);
 }
 
-#ifdef CONFIG_PM
-
 static int rtl8169_runtime_resume(struct device *dev)
 {
        struct rtl8169_private *tp = dev_get_drvdata(dev);
@@ -4860,7 +4890,7 @@ static int rtl8169_runtime_resume(struct device *dev)
        return 0;
 }
 
-static int __maybe_unused rtl8169_suspend(struct device *device)
+static int rtl8169_suspend(struct device *device)
 {
        struct rtl8169_private *tp = dev_get_drvdata(device);
 
@@ -4873,7 +4903,7 @@ static int __maybe_unused rtl8169_suspend(struct device *device)
        return 0;
 }
 
-static int __maybe_unused rtl8169_resume(struct device *device)
+static int rtl8169_resume(struct device *device)
 {
        struct rtl8169_private *tp = dev_get_drvdata(device);
 
@@ -4915,13 +4945,11 @@ static int rtl8169_runtime_idle(struct device *device)
 }
 
 static const struct dev_pm_ops rtl8169_pm_ops = {
-       SET_SYSTEM_SLEEP_PM_OPS(rtl8169_suspend, rtl8169_resume)
-       SET_RUNTIME_PM_OPS(rtl8169_runtime_suspend, rtl8169_runtime_resume,
-                          rtl8169_runtime_idle)
+       SYSTEM_SLEEP_PM_OPS(rtl8169_suspend, rtl8169_resume)
+       RUNTIME_PM_OPS(rtl8169_runtime_suspend, rtl8169_runtime_resume,
+                      rtl8169_runtime_idle)
 };
 
-#endif /* CONFIG_PM */
-
 static void rtl_wol_shutdown_quirk(struct rtl8169_private *tp)
 {
        /* WoL fails with 8168b when the receiver is disabled. */
@@ -5255,6 +5283,16 @@ done:
        rtl_rar_set(tp, mac_addr);
 }
 
+/* register is set if system vendor successfully tested ASPM 1.2 */
+static bool rtl_aspm_is_safe(struct rtl8169_private *tp)
+{
+       if (tp->mac_version >= RTL_GIGA_MAC_VER_60 &&
+           r8168_mac_ocp_read(tp, 0xc0b2) & 0xf)
+               return true;
+
+       return false;
+}
+
 static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
        struct rtl8169_private *tp;
@@ -5333,7 +5371,9 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
         * Chips from RTL8168h partially have issues with L1.2, but seem
         * to work fine with L1 and L1.1.
         */
-       if (tp->mac_version >= RTL_GIGA_MAC_VER_45)
+       if (rtl_aspm_is_safe(tp))
+               rc = 0;
+       else if (tp->mac_version >= RTL_GIGA_MAC_VER_45)
                rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L1_2);
        else
                rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L1);
@@ -5460,9 +5500,7 @@ static struct pci_driver rtl8169_pci_driver = {
        .probe          = rtl_init_one,
        .remove         = rtl_remove_one,
        .shutdown       = rtl_shutdown,
-#ifdef CONFIG_PM
-       .driver.pm      = &rtl8169_pm_ops,
-#endif
+       .driver.pm      = pm_ptr(&rtl8169_pm_ops),
 };
 
 module_pci_driver(rtl8169_pci_driver);
index b215cde..24e2635 100644 (file)
@@ -1432,11 +1432,7 @@ static int ravb_phy_init(struct net_device *ndev)
         * at this time.
         */
        if (soc_device_match(r8a7795es10)) {
-               err = phy_set_max_speed(phydev, SPEED_100);
-               if (err) {
-                       netdev_err(ndev, "failed to limit PHY to 100Mbit/s\n");
-                       goto err_phy_disconnect;
-               }
+               phy_set_max_speed(phydev, SPEED_100);
 
                netdev_info(ndev, "limited PHY to 100Mbit/s\n");
        }
@@ -1457,8 +1453,6 @@ static int ravb_phy_init(struct net_device *ndev)
 
        return 0;
 
-err_phy_disconnect:
-       phy_disconnect(phydev);
 err_deregister_fixed_link:
        if (of_phy_is_fixed_link(np))
                of_phy_deregister_fixed_link(np);
@@ -2854,7 +2848,6 @@ static int ravb_wol_restore(struct net_device *ndev)
 {
        struct ravb_private *priv = netdev_priv(ndev);
        const struct ravb_hw_info *info = priv->info;
-       int ret;
 
        if (info->nc_queues)
                napi_enable(&priv->napi[RAVB_NC]);
@@ -2863,9 +2856,7 @@ static int ravb_wol_restore(struct net_device *ndev)
        /* Disable MagicPacket */
        ravb_modify(ndev, ECMR, ECMR_MPDE, 0);
 
-       ret = ravb_close(ndev);
-       if (ret < 0)
-               return ret;
+       ravb_close(ndev);
 
        return disable_irq_wake(priv->emac_irq);
 }
index d947a62..67ade78 100644 (file)
@@ -2026,14 +2026,8 @@ static int sh_eth_phy_init(struct net_device *ndev)
        }
 
        /* mask with MAC supported features */
-       if (mdp->cd->register_type != SH_ETH_REG_GIGABIT) {
-               int err = phy_set_max_speed(phydev, SPEED_100);
-               if (err) {
-                       netdev_err(ndev, "failed to limit PHY to 100 Mbit/s\n");
-                       phy_disconnect(phydev);
-                       return err;
-               }
-       }
+       if (mdp->cd->register_type != SH_ETH_REG_GIGABIT)
+               phy_set_max_speed(phydev, SPEED_100);
 
        phy_attached_info(phydev);
 
@@ -3450,9 +3444,7 @@ static int sh_eth_wol_restore(struct net_device *ndev)
         * both be reset and all registers restored. This is what
         * happens during suspend and resume without WoL enabled.
         */
-       ret = sh_eth_close(ndev);
-       if (ret < 0)
-               return ret;
+       sh_eth_close(ndev);
        ret = sh_eth_open(ndev);
        if (ret < 0)
                return ret;
@@ -3464,7 +3456,7 @@ static int sh_eth_suspend(struct device *dev)
 {
        struct net_device *ndev = dev_get_drvdata(dev);
        struct sh_eth_private *mdp = netdev_priv(ndev);
-       int ret = 0;
+       int ret;
 
        if (!netif_running(ndev))
                return 0;
@@ -3483,7 +3475,7 @@ static int sh_eth_resume(struct device *dev)
 {
        struct net_device *ndev = dev_get_drvdata(dev);
        struct sh_eth_private *mdp = netdev_priv(ndev);
-       int ret = 0;
+       int ret;
 
        if (!netif_running(ndev))
                return 0;
index 32161a5..77a0d9d 100644 (file)
@@ -127,7 +127,7 @@ bool sxgbe_eee_init(struct sxgbe_priv_data * const priv)
        /* MAC core supports the EEE feature. */
        if (priv->hw_cap.eee) {
                /* Check if the PHY supports EEE */
-               if (phy_init_eee(ndev->phydev, 1))
+               if (phy_init_eee(ndev->phydev, true))
                        return false;
 
                priv->eee_active = 1;
index cf366ed..50d5359 100644 (file)
@@ -3990,6 +3990,30 @@ static unsigned int ef10_check_caps(const struct efx_nic *efx,
        }
 }
 
+static unsigned int efx_ef10_recycle_ring_size(const struct efx_nic *efx)
+{
+       unsigned int ret = EFX_RECYCLE_RING_SIZE_10G;
+
+       /* There is no difference between PFs and VFs. The side is based on
+        * the maximum link speed of a given NIC.
+        */
+       switch (efx->pci_dev->device & 0xfff) {
+       case 0x0903:    /* Farmingdale can do up to 10G */
+               break;
+       case 0x0923:    /* Greenport can do up to 40G */
+       case 0x0a03:    /* Medford can do up to 40G */
+               ret *= 4;
+               break;
+       default:        /* Medford2 can do up to 100G */
+               ret *= 10;
+       }
+
+       if (IS_ENABLED(CONFIG_PPC64))
+               ret *= 4;
+
+       return ret;
+}
+
 #define EF10_OFFLOAD_FEATURES          \
        (NETIF_F_IP_CSUM |              \
         NETIF_F_HW_VLAN_CTAG_FILTER |  \
@@ -4106,6 +4130,7 @@ const struct efx_nic_type efx_hunt_a0_vf_nic_type = {
        .check_caps = ef10_check_caps,
        .print_additional_fwver = efx_ef10_print_additional_fwver,
        .sensor_event = efx_mcdi_sensor_event,
+       .rx_recycle_ring_size = efx_ef10_recycle_ring_size,
 };
 
 const struct efx_nic_type efx_hunt_a0_nic_type = {
@@ -4243,4 +4268,5 @@ const struct efx_nic_type efx_hunt_a0_nic_type = {
        .check_caps = ef10_check_caps,
        .print_additional_fwver = efx_ef10_print_additional_fwver,
        .sensor_event = efx_mcdi_sensor_event,
+       .rx_recycle_ring_size = efx_ef10_recycle_ring_size,
 };
index f79b14a..a07cbf4 100644 (file)
@@ -23,6 +23,7 @@
 #include "ef100_rx.h"
 #include "ef100_tx.h"
 #include "ef100_netdev.h"
+#include "rx_common.h"
 
 #define EF100_MAX_VIS 4096
 #define EF100_NUM_MCDI_BUFFERS 1
@@ -696,6 +697,12 @@ static unsigned int ef100_check_caps(const struct efx_nic *efx,
        }
 }
 
+static unsigned int efx_ef100_recycle_ring_size(const struct efx_nic *efx)
+{
+       /* Maximum link speed for Riverhead is 100G */
+       return 10 * EFX_RECYCLE_RING_SIZE_10G;
+}
+
 /*     NIC level access functions
  */
 #define EF100_OFFLOAD_FEATURES (NETIF_F_HW_CSUM | NETIF_F_RXCSUM |     \
@@ -770,6 +777,7 @@ const struct efx_nic_type ef100_pf_nic_type = {
        .rx_push_rss_context_config = efx_mcdi_rx_push_rss_context_config,
        .rx_pull_rss_context_config = efx_mcdi_rx_pull_rss_context_config,
        .rx_restore_rss_contexts = efx_mcdi_rx_restore_rss_contexts,
+       .rx_recycle_ring_size = efx_ef100_recycle_ring_size,
 
        .reconfigure_mac = ef100_reconfigure_mac,
        .reconfigure_port = efx_mcdi_port_reconfigure,
@@ -849,6 +857,7 @@ const struct efx_nic_type ef100_vf_nic_type = {
        .rx_pull_rss_config = efx_mcdi_rx_pull_rss_config,
        .rx_push_rss_config = efx_mcdi_pf_rx_push_rss_config,
        .rx_restore_rss_contexts = efx_mcdi_rx_restore_rss_contexts,
+       .rx_recycle_ring_size = efx_ef100_recycle_ring_size,
 
        .reconfigure_mac = ef100_reconfigure_mac,
        .test_nvram = efx_new_mcdi_nvram_test_all,
index cc15ee8..c75dc75 100644 (file)
@@ -1282,6 +1282,7 @@ struct efx_udp_tunnel {
  * @udp_tnl_has_port: Check if a port has been added as UDP tunnel
  * @print_additional_fwver: Dump NIC-specific additional FW version info
  * @sensor_event: Handle a sensor event from MCDI
+ * @rx_recycle_ring_size: Size of the RX recycle ring
  * @revision: Hardware architecture revision
  * @txd_ptr_tbl_base: TX descriptor ring base address
  * @rxd_ptr_tbl_base: RX descriptor ring base address
@@ -1460,6 +1461,7 @@ struct efx_nic_type {
        size_t (*print_additional_fwver)(struct efx_nic *efx, char *buf,
                                         size_t len);
        void (*sensor_event)(struct efx_nic *efx, efx_qword_t *ev);
+       unsigned int (*rx_recycle_ring_size)(const struct efx_nic *efx);
 
        int revision;
        unsigned int txd_ptr_tbl_base;
index b9cafe9..0cef35c 100644 (file)
@@ -195,6 +195,11 @@ static inline void efx_sensor_event(struct efx_nic *efx, efx_qword_t *ev)
                efx->type->sensor_event(efx, ev);
 }
 
+static inline unsigned int efx_rx_recycle_ring_size(const struct efx_nic *efx)
+{
+       return efx->type->rx_recycle_ring_size(efx);
+}
+
 /* Some statistics are computed as A - B where A and B each increase
  * linearly with some hardware counter(s) and the counters are read
  * asynchronously.  If the counters contributing to B are always read
index 633ca77..1b22c7b 100644 (file)
@@ -23,13 +23,6 @@ module_param(rx_refill_threshold, uint, 0444);
 MODULE_PARM_DESC(rx_refill_threshold,
                 "RX descriptor ring refill threshold (%)");
 
-/* Number of RX buffers to recycle pages for.  When creating the RX page recycle
- * ring, this number is divided by the number of buffers per page to calculate
- * the number of pages to store in the RX page recycle ring.
- */
-#define EFX_RECYCLE_RING_SIZE_IOMMU 4096
-#define EFX_RECYCLE_RING_SIZE_NOIOMMU (2 * EFX_RX_PREFERRED_BATCH)
-
 /* RX maximum head room required.
  *
  * This must be at least 1 to prevent overflow, plus one packet-worth
@@ -141,16 +134,7 @@ static void efx_init_rx_recycle_ring(struct efx_rx_queue *rx_queue)
        unsigned int bufs_in_recycle_ring, page_ring_size;
        struct efx_nic *efx = rx_queue->efx;
 
-       /* Set the RX recycle ring size */
-#ifdef CONFIG_PPC64
-       bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU;
-#else
-       if (iommu_present(&pci_bus_type))
-               bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU;
-       else
-               bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_NOIOMMU;
-#endif /* CONFIG_PPC64 */
-
+       bufs_in_recycle_ring = efx_rx_recycle_ring_size(efx);
        page_ring_size = roundup_pow_of_two(bufs_in_recycle_ring /
                                            efx->rx_bufs_per_page);
        rx_queue->page_ring = kcalloc(page_ring_size,
index 207ccd8..fbd2769 100644 (file)
 #define EFX_RX_MAX_FRAGS DIV_ROUND_UP(EFX_MAX_FRAME_LEN(EFX_MAX_MTU), \
                                      EFX_RX_USR_BUF_SIZE)
 
+/* Number of RX buffers to recycle pages for.  When creating the RX page recycle
+ * ring, this number is divided by the number of buffers per page to calculate
+ * the number of pages to store in the RX page recycle ring.
+ */
+#define EFX_RECYCLE_RING_SIZE_10G      256
+
 static inline u8 *efx_rx_buf_va(struct efx_rx_buffer *buf)
 {
        return page_address(buf->page) + buf->page_offset;
index 16347a6..ce3060e 100644 (file)
@@ -25,6 +25,7 @@
 #include "mcdi_port_common.h"
 #include "selftest.h"
 #include "siena_sriov.h"
+#include "rx_common.h"
 
 /* Hardware control for SFC9000 family including SFL9021 (aka Siena). */
 
@@ -958,6 +959,12 @@ static unsigned int siena_check_caps(const struct efx_nic *efx,
        return 0;
 }
 
+static unsigned int efx_siena_recycle_ring_size(const struct efx_nic *efx)
+{
+       /* Maximum link speed is 10G */
+       return EFX_RECYCLE_RING_SIZE_10G;
+}
+
 /**************************************************************************
  *
  * Revision-dependent attributes used by efx.c and nic.c
@@ -1098,4 +1105,5 @@ const struct efx_nic_type siena_a0_nic_type = {
        .rx_hash_key_size = 16,
        .check_caps = siena_check_caps,
        .sensor_event = efx_mcdi_sensor_event,
+       .rx_recycle_ring_size = efx_siena_recycle_ring_size,
 };
index 09644ab..f86cc83 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/of_net.h>
 #include <linux/phy.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 #include <linux/regulator/consumer.h>
 #include <linux/regmap.h>
 #include <linux/stmmac.h>
@@ -57,7 +58,6 @@ struct emac_variant {
 };
 
 /* struct sunxi_priv_data - hold all sunxi private data
- * @tx_clk:    reference to MAC TX clock
  * @ephy_clk:  reference to the optional EPHY clock for the internal PHY
  * @regulator: reference to the optional regulator
  * @rst_ephy:  reference to the optional EPHY reset for the internal PHY
@@ -68,7 +68,6 @@ struct emac_variant {
  * @mux_handle:        Internal pointer used by mdio-mux lib
  */
 struct sunxi_priv_data {
-       struct clk *tx_clk;
        struct clk *ephy_clk;
        struct regulator *regulator;
        struct reset_control *rst_ephy;
@@ -579,22 +578,14 @@ static int sun8i_dwmac_init(struct platform_device *pdev, void *priv)
                }
        }
 
-       ret = clk_prepare_enable(gmac->tx_clk);
-       if (ret) {
-               dev_err(&pdev->dev, "Could not enable AHB clock\n");
-               goto err_disable_regulator;
-       }
-
        if (gmac->use_internal_phy) {
                ret = sun8i_dwmac_power_internal_phy(netdev_priv(ndev));
                if (ret)
-                       goto err_disable_clk;
+                       goto err_disable_regulator;
        }
 
        return 0;
 
-err_disable_clk:
-       clk_disable_unprepare(gmac->tx_clk);
 err_disable_regulator:
        if (gmac->regulator)
                regulator_disable(gmac->regulator);
@@ -1043,8 +1034,6 @@ static void sun8i_dwmac_exit(struct platform_device *pdev, void *priv)
        if (gmac->variant->soc_has_internal_phy)
                sun8i_dwmac_unpower_internal_phy(gmac);
 
-       clk_disable_unprepare(gmac->tx_clk);
-
        if (gmac->regulator)
                regulator_disable(gmac->regulator);
 }
@@ -1167,12 +1156,6 @@ static int sun8i_dwmac_probe(struct platform_device *pdev)
                return -EINVAL;
        }
 
-       gmac->tx_clk = devm_clk_get(dev, "stmmaceth");
-       if (IS_ERR(gmac->tx_clk)) {
-               dev_err(dev, "Could not get TX clock\n");
-               return PTR_ERR(gmac->tx_clk);
-       }
-
        /* Optional regulator for PHY */
        gmac->regulator = devm_regulator_get_optional(dev, "phy");
        if (IS_ERR(gmac->regulator)) {
@@ -1254,6 +1237,12 @@ static int sun8i_dwmac_probe(struct platform_device *pdev)
        ndev = dev_get_drvdata(&pdev->dev);
        priv = netdev_priv(ndev);
 
+       /* the MAC is runtime suspended after stmmac_dvr_probe(), so we
+        * need to ensure the MAC resume back before other operations such
+        * as reset.
+        */
+       pm_runtime_get_sync(&pdev->dev);
+
        /* The mux must be registered after parent MDIO
         * so after stmmac_dvr_probe()
         */
@@ -1272,12 +1261,15 @@ static int sun8i_dwmac_probe(struct platform_device *pdev)
                        goto dwmac_remove;
        }
 
+       pm_runtime_put(&pdev->dev);
+
        return 0;
 
 dwmac_mux:
        reset_control_put(gmac->rst_ephy);
        clk_put(gmac->ephy_clk);
 dwmac_remove:
+       pm_runtime_put_noidle(&pdev->dev);
        stmmac_dvr_remove(&pdev->dev);
 dwmac_exit:
        sun8i_dwmac_exit(pdev, gmac);
index bde76ea..b745d62 100644 (file)
@@ -938,105 +938,15 @@ static void stmmac_mac_flow_ctrl(struct stmmac_priv *priv, u32 duplex)
                        priv->pause, tx_cnt);
 }
 
-static void stmmac_validate(struct phylink_config *config,
-                           unsigned long *supported,
-                           struct phylink_link_state *state)
+static struct phylink_pcs *stmmac_mac_select_pcs(struct phylink_config *config,
+                                                phy_interface_t interface)
 {
        struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev));
-       __ETHTOOL_DECLARE_LINK_MODE_MASK(mac_supported) = { 0, };
-       __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
-       int tx_cnt = priv->plat->tx_queues_to_use;
-       int max_speed = priv->plat->max_speed;
-
-       phylink_set(mac_supported, 10baseT_Half);
-       phylink_set(mac_supported, 10baseT_Full);
-       phylink_set(mac_supported, 100baseT_Half);
-       phylink_set(mac_supported, 100baseT_Full);
-       phylink_set(mac_supported, 1000baseT_Half);
-       phylink_set(mac_supported, 1000baseT_Full);
-       phylink_set(mac_supported, 1000baseKX_Full);
-
-       phylink_set(mac_supported, Autoneg);
-       phylink_set(mac_supported, Pause);
-       phylink_set(mac_supported, Asym_Pause);
-       phylink_set_port_modes(mac_supported);
-
-       /* Cut down 1G if asked to */
-       if ((max_speed > 0) && (max_speed < 1000)) {
-               phylink_set(mask, 1000baseT_Full);
-               phylink_set(mask, 1000baseX_Full);
-       } else if (priv->plat->has_gmac4) {
-               if (!max_speed || max_speed >= 2500) {
-                       phylink_set(mac_supported, 2500baseT_Full);
-                       phylink_set(mac_supported, 2500baseX_Full);
-               }
-       } else if (priv->plat->has_xgmac) {
-               if (!max_speed || (max_speed >= 2500)) {
-                       phylink_set(mac_supported, 2500baseT_Full);
-                       phylink_set(mac_supported, 2500baseX_Full);
-               }
-               if (!max_speed || (max_speed >= 5000)) {
-                       phylink_set(mac_supported, 5000baseT_Full);
-               }
-               if (!max_speed || (max_speed >= 10000)) {
-                       phylink_set(mac_supported, 10000baseSR_Full);
-                       phylink_set(mac_supported, 10000baseLR_Full);
-                       phylink_set(mac_supported, 10000baseER_Full);
-                       phylink_set(mac_supported, 10000baseLRM_Full);
-                       phylink_set(mac_supported, 10000baseT_Full);
-                       phylink_set(mac_supported, 10000baseKX4_Full);
-                       phylink_set(mac_supported, 10000baseKR_Full);
-               }
-               if (!max_speed || (max_speed >= 25000)) {
-                       phylink_set(mac_supported, 25000baseCR_Full);
-                       phylink_set(mac_supported, 25000baseKR_Full);
-                       phylink_set(mac_supported, 25000baseSR_Full);
-               }
-               if (!max_speed || (max_speed >= 40000)) {
-                       phylink_set(mac_supported, 40000baseKR4_Full);
-                       phylink_set(mac_supported, 40000baseCR4_Full);
-                       phylink_set(mac_supported, 40000baseSR4_Full);
-                       phylink_set(mac_supported, 40000baseLR4_Full);
-               }
-               if (!max_speed || (max_speed >= 50000)) {
-                       phylink_set(mac_supported, 50000baseCR2_Full);
-                       phylink_set(mac_supported, 50000baseKR2_Full);
-                       phylink_set(mac_supported, 50000baseSR2_Full);
-                       phylink_set(mac_supported, 50000baseKR_Full);
-                       phylink_set(mac_supported, 50000baseSR_Full);
-                       phylink_set(mac_supported, 50000baseCR_Full);
-                       phylink_set(mac_supported, 50000baseLR_ER_FR_Full);
-                       phylink_set(mac_supported, 50000baseDR_Full);
-               }
-               if (!max_speed || (max_speed >= 100000)) {
-                       phylink_set(mac_supported, 100000baseKR4_Full);
-                       phylink_set(mac_supported, 100000baseSR4_Full);
-                       phylink_set(mac_supported, 100000baseCR4_Full);
-                       phylink_set(mac_supported, 100000baseLR4_ER4_Full);
-                       phylink_set(mac_supported, 100000baseKR2_Full);
-                       phylink_set(mac_supported, 100000baseSR2_Full);
-                       phylink_set(mac_supported, 100000baseCR2_Full);
-                       phylink_set(mac_supported, 100000baseLR2_ER2_FR2_Full);
-                       phylink_set(mac_supported, 100000baseDR2_Full);
-               }
-       }
-
-       /* Half-Duplex can only work with single queue */
-       if (tx_cnt > 1) {
-               phylink_set(mask, 10baseT_Half);
-               phylink_set(mask, 100baseT_Half);
-               phylink_set(mask, 1000baseT_Half);
-       }
-
-       linkmode_and(supported, supported, mac_supported);
-       linkmode_andnot(supported, supported, mask);
 
-       linkmode_and(state->advertising, state->advertising, mac_supported);
-       linkmode_andnot(state->advertising, state->advertising, mask);
+       if (!priv->hw->xpcs)
+               return NULL;
 
-       /* If PCS is supported, check which modes it supports. */
-       if (priv->hw->xpcs)
-               xpcs_validate(priv->hw->xpcs, supported, state);
+       return &priv->hw->xpcs->pcs;
 }
 
 static void stmmac_mac_config(struct phylink_config *config, unsigned int mode,
@@ -1175,7 +1085,8 @@ static void stmmac_mac_link_up(struct phylink_config *config,
 }
 
 static const struct phylink_mac_ops stmmac_phylink_mac_ops = {
-       .validate = stmmac_validate,
+       .validate = phylink_generic_validate,
+       .mac_select_pcs = stmmac_mac_select_pcs,
        .mac_config = stmmac_mac_config,
        .mac_link_down = stmmac_mac_link_down,
        .mac_link_up = stmmac_mac_link_up,
@@ -1255,12 +1166,12 @@ static int stmmac_phy_setup(struct stmmac_priv *priv)
 {
        struct stmmac_mdio_bus_data *mdio_bus_data = priv->plat->mdio_bus_data;
        struct fwnode_handle *fwnode = of_fwnode_handle(priv->plat->phylink_node);
+       int max_speed = priv->plat->max_speed;
        int mode = priv->plat->phy_interface;
        struct phylink *phylink;
 
        priv->phylink_config.dev = &priv->dev->dev;
        priv->phylink_config.type = PHYLINK_NETDEV;
-       priv->phylink_config.pcs_poll = true;
        if (priv->plat->mdio_bus_data)
                priv->phylink_config.ovr_an_inband =
                        mdio_bus_data->xpcs_an_inband;
@@ -1268,14 +1179,50 @@ static int stmmac_phy_setup(struct stmmac_priv *priv)
        if (!fwnode)
                fwnode = dev_fwnode(priv->device);
 
+       /* Set the platform/firmware specified interface mode */
+       __set_bit(mode, priv->phylink_config.supported_interfaces);
+
+       /* If we have an xpcs, it defines which PHY interfaces are supported. */
+       if (priv->hw->xpcs)
+               xpcs_get_interfaces(priv->hw->xpcs,
+                                   priv->phylink_config.supported_interfaces);
+
+       priv->phylink_config.mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
+               MAC_10 | MAC_100;
+
+       if (!max_speed || max_speed >= 1000)
+               priv->phylink_config.mac_capabilities |= MAC_1000;
+
+       if (priv->plat->has_gmac4) {
+               if (!max_speed || max_speed >= 2500)
+                       priv->phylink_config.mac_capabilities |= MAC_2500FD;
+       } else if (priv->plat->has_xgmac) {
+               if (!max_speed || max_speed >= 2500)
+                       priv->phylink_config.mac_capabilities |= MAC_2500FD;
+               if (!max_speed || max_speed >= 5000)
+                       priv->phylink_config.mac_capabilities |= MAC_5000FD;
+               if (!max_speed || max_speed >= 10000)
+                       priv->phylink_config.mac_capabilities |= MAC_10000FD;
+               if (!max_speed || max_speed >= 25000)
+                       priv->phylink_config.mac_capabilities |= MAC_25000FD;
+               if (!max_speed || max_speed >= 40000)
+                       priv->phylink_config.mac_capabilities |= MAC_40000FD;
+               if (!max_speed || max_speed >= 50000)
+                       priv->phylink_config.mac_capabilities |= MAC_50000FD;
+               if (!max_speed || max_speed >= 100000)
+                       priv->phylink_config.mac_capabilities |= MAC_100000FD;
+       }
+
+       /* Half-Duplex can only work with single queue */
+       if (priv->plat->tx_queues_to_use > 1)
+               priv->phylink_config.mac_capabilities &=
+                       ~(MAC_10HD | MAC_100HD | MAC_1000HD);
+
        phylink = phylink_create(&priv->phylink_config, fwnode,
                                 mode, &stmmac_phylink_mac_ops);
        if (IS_ERR(phylink))
                return PTR_ERR(phylink);
 
-       if (priv->hw->xpcs)
-               phylink_set_pcs(phylink, &priv->hw->xpcs->pcs);
-
        priv->phylink = phylink;
        return 0;
 }
index 5b4d153..4010896 100644 (file)
@@ -386,6 +386,7 @@ struct axidma_bd {
  * @phylink:   Pointer to phylink instance
  * @phylink_config: phylink configuration settings
  * @pcs_phy:   Reference to PCS/PMA PHY if used
+ * @pcs:       phylink pcs structure for PCS PHY
  * @switch_x_sgmii: Whether switchable 1000BaseX/SGMII mode is enabled in the core
  * @axi_clk:   AXI4-Lite bus clock
  * @misc_clks: Misc ethernet clocks (AXI4-Stream, Ref, MGT clocks)
@@ -434,6 +435,7 @@ struct axienet_local {
        struct phylink_config phylink_config;
 
        struct mdio_device *pcs_phy;
+       struct phylink_pcs pcs;
 
        bool switch_x_sgmii;
 
index 377c94e..de0a637 100644 (file)
@@ -1537,78 +1537,78 @@ static const struct ethtool_ops axienet_ethtool_ops = {
        .nway_reset     = axienet_ethtools_nway_reset,
 };
 
-static void axienet_mac_pcs_get_state(struct phylink_config *config,
-                                     struct phylink_link_state *state)
+static struct axienet_local *pcs_to_axienet_local(struct phylink_pcs *pcs)
 {
-       struct net_device *ndev = to_net_dev(config->dev);
-       struct axienet_local *lp = netdev_priv(ndev);
+       return container_of(pcs, struct axienet_local, pcs);
+}
 
-       switch (state->interface) {
-       case PHY_INTERFACE_MODE_SGMII:
-       case PHY_INTERFACE_MODE_1000BASEX:
-               phylink_mii_c22_pcs_get_state(lp->pcs_phy, state);
-               break;
-       default:
-               break;
-       }
+static void axienet_pcs_get_state(struct phylink_pcs *pcs,
+                                 struct phylink_link_state *state)
+{
+       struct mdio_device *pcs_phy = pcs_to_axienet_local(pcs)->pcs_phy;
+
+       phylink_mii_c22_pcs_get_state(pcs_phy, state);
 }
 
-static void axienet_mac_an_restart(struct phylink_config *config)
+static void axienet_pcs_an_restart(struct phylink_pcs *pcs)
 {
-       struct net_device *ndev = to_net_dev(config->dev);
-       struct axienet_local *lp = netdev_priv(ndev);
+       struct mdio_device *pcs_phy = pcs_to_axienet_local(pcs)->pcs_phy;
 
-       phylink_mii_c22_pcs_an_restart(lp->pcs_phy);
+       phylink_mii_c22_pcs_an_restart(pcs_phy);
 }
 
-static int axienet_mac_prepare(struct phylink_config *config, unsigned int mode,
-                              phy_interface_t iface)
+static int axienet_pcs_config(struct phylink_pcs *pcs, unsigned int mode,
+                             phy_interface_t interface,
+                             const unsigned long *advertising,
+                             bool permit_pause_to_mac)
 {
-       struct net_device *ndev = to_net_dev(config->dev);
+       struct mdio_device *pcs_phy = pcs_to_axienet_local(pcs)->pcs_phy;
+       struct net_device *ndev = pcs_to_axienet_local(pcs)->ndev;
        struct axienet_local *lp = netdev_priv(ndev);
        int ret;
 
-       switch (iface) {
-       case PHY_INTERFACE_MODE_SGMII:
-       case PHY_INTERFACE_MODE_1000BASEX:
-               if (!lp->switch_x_sgmii)
-                       return 0;
-
-               ret = mdiobus_write(lp->pcs_phy->bus,
-                                   lp->pcs_phy->addr,
-                                   XLNX_MII_STD_SELECT_REG,
-                                   iface == PHY_INTERFACE_MODE_SGMII ?
+       if (lp->switch_x_sgmii) {
+               ret = mdiodev_write(pcs_phy, XLNX_MII_STD_SELECT_REG,
+                                   interface == PHY_INTERFACE_MODE_SGMII ?
                                        XLNX_MII_STD_SELECT_SGMII : 0);
-               if (ret < 0)
-                       netdev_warn(ndev, "Failed to switch PHY interface: %d\n",
+               if (ret < 0) {
+                       netdev_warn(ndev,
+                                   "Failed to switch PHY interface: %d\n",
                                    ret);
-               return ret;
-       default:
-               return 0;
+                       return ret;
+               }
        }
+
+       ret = phylink_mii_c22_pcs_config(pcs_phy, mode, interface, advertising);
+       if (ret < 0)
+               netdev_warn(ndev, "Failed to configure PCS: %d\n", ret);
+
+       return ret;
 }
 
-static void axienet_mac_config(struct phylink_config *config, unsigned int mode,
-                              const struct phylink_link_state *state)
+static const struct phylink_pcs_ops axienet_pcs_ops = {
+       .pcs_get_state = axienet_pcs_get_state,
+       .pcs_config = axienet_pcs_config,
+       .pcs_an_restart = axienet_pcs_an_restart,
+};
+
+static struct phylink_pcs *axienet_mac_select_pcs(struct phylink_config *config,
+                                                 phy_interface_t interface)
 {
        struct net_device *ndev = to_net_dev(config->dev);
        struct axienet_local *lp = netdev_priv(ndev);
-       int ret;
 
-       switch (state->interface) {
-       case PHY_INTERFACE_MODE_SGMII:
-       case PHY_INTERFACE_MODE_1000BASEX:
-               ret = phylink_mii_c22_pcs_config(lp->pcs_phy, mode,
-                                                state->interface,
-                                                state->advertising);
-               if (ret < 0)
-                       netdev_warn(ndev, "Failed to configure PCS: %d\n",
-                                   ret);
-               break;
+       if (interface == PHY_INTERFACE_MODE_1000BASEX ||
+           interface ==  PHY_INTERFACE_MODE_SGMII)
+               return &lp->pcs;
 
-       default:
-               break;
-       }
+       return NULL;
+}
+
+static void axienet_mac_config(struct phylink_config *config, unsigned int mode,
+                              const struct phylink_link_state *state)
+{
+       /* nothing meaningful to do */
 }
 
 static void axienet_mac_link_down(struct phylink_config *config,
@@ -1663,9 +1663,7 @@ static void axienet_mac_link_up(struct phylink_config *config,
 
 static const struct phylink_mac_ops axienet_phylink_ops = {
        .validate = phylink_generic_validate,
-       .mac_pcs_get_state = axienet_mac_pcs_get_state,
-       .mac_an_restart = axienet_mac_an_restart,
-       .mac_prepare = axienet_mac_prepare,
+       .mac_select_pcs = axienet_mac_select_pcs,
        .mac_config = axienet_mac_config,
        .mac_link_down = axienet_mac_link_down,
        .mac_link_up = axienet_mac_link_up,
@@ -2079,12 +2077,12 @@ static int axienet_probe(struct platform_device *pdev)
                        ret = -EPROBE_DEFER;
                        goto cleanup_mdio;
                }
-               lp->phylink_config.pcs_poll = true;
+               lp->pcs.ops = &axienet_pcs_ops;
+               lp->pcs.poll = true;
        }
 
        lp->phylink_config.dev = &ndev->dev;
        lp->phylink_config.type = PHYLINK_NETDEV;
-       lp->phylink_config.legacy_pre_march2020 = true;
        lp->phylink_config.mac_capabilities = MAC_SYM_PAUSE | MAC_ASYM_PAUSE |
                MAC_10FD | MAC_100FD | MAC_1000FD;
 
index ebd2870..5805e4a 100644 (file)
@@ -1514,10 +1514,9 @@ acpi_find_extended_socket_device(acpi_handle obj_handle, u32 level,
 {
        struct acpi_device *device;
        bool *found = context;
-       int result;
 
-       result = acpi_bus_get_device(obj_handle, &device);
-       if (result)
+       device = acpi_fetch_acpi_dev(obj_handle);
+       if (!device)
                return AE_OK;
 
        if (strcmp(acpi_device_hid(device), ACPI_MOTHERBOARD_RESOURCE_HID))
index 06ddb85..8ff351a 100644 (file)
@@ -101,6 +101,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
                                .aggregation    = true,
                                .status_enable  = true,
                                .rx = {
+                                       .buffer_size    = 8192,
                                        .pad_align      = ilog2(sizeof(u32)),
                                },
                        },
@@ -148,6 +149,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
                                .qmap           = true,
                                .aggregation    = true,
                                .rx = {
+                                       .buffer_size    = 8192,
                                        .aggr_close_eof = true,
                                },
                        },
index 760c22b..d1c466a 100644 (file)
@@ -92,6 +92,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
                                .aggregation    = true,
                                .status_enable  = true,
                                .rx = {
+                                       .buffer_size    = 8192,
                                        .pad_align      = ilog2(sizeof(u32)),
                                },
                        },
@@ -140,6 +141,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
                                .qmap           = true,
                                .aggregation    = true,
                                .rx = {
+                                       .buffer_size    = 8192,
                                        .aggr_close_eof = true,
                                },
                        },
index fea9145..b1991cc 100644 (file)
@@ -86,6 +86,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
                                .aggregation    = true,
                                .status_enable  = true,
                                .rx = {
+                                       .buffer_size    = 8192,
                                        .pad_align      = ilog2(sizeof(u32)),
                                },
                        },
@@ -133,6 +134,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
                                .qmap           = true,
                                .aggregation    = true,
                                .rx = {
+                                       .buffer_size    = 32768,
                                        .aggr_close_eof = true,
                                },
                        },
index 2a231e7..1190a43 100644 (file)
@@ -82,6 +82,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
                                .aggregation    = true,
                                .status_enable  = true,
                                .rx = {
+                                       .buffer_size    = 8192,
                                        .pad_align      = ilog2(sizeof(u32)),
                                },
                        },
@@ -130,6 +131,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
                                .qmap           = true,
                                .aggregation    = true,
                                .rx = {
+                                       .buffer_size    = 8192,
                                        .aggr_close_eof = true,
                                },
                        },
index 2da2c41..944f72b 100644 (file)
@@ -95,6 +95,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
                                .aggregation    = true,
                                .status_enable  = true,
                                .rx = {
+                                       .buffer_size    = 8192,
                                        .pad_align      = ilog2(sizeof(u32)),
                                },
                        },
@@ -142,6 +143,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
                                .qmap           = true,
                                .aggregation    = true,
                                .rx = {
+                                       .buffer_size    = 8192,
                                        .aggr_close_eof = true,
                                },
                        },
index 2421b5a..16786bf 100644 (file)
@@ -87,6 +87,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
                                .aggregation    = true,
                                .status_enable  = true,
                                .rx = {
+                                       .buffer_size    = 8192,
                                        .pad_align      = ilog2(sizeof(u32)),
                                },
                        },
@@ -134,6 +135,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
                                .qmap           = true,
                                .aggregation    = true,
                                .rx = {
+                                       .buffer_size    = 8192,
                                        .aggr_close_eof = true,
                                },
                        },
index 6d329e9..dbbeecf 100644 (file)
@@ -112,6 +112,7 @@ struct ipa_endpoint_tx_data {
 
 /**
  * struct ipa_endpoint_rx_data - configuration data for RX endpoints
+ * @buffer_size: requested receive buffer size (bytes)
  * @pad_align: power-of-2 boundary to which packet payload is aligned
  * @aggr_close_eof: whether aggregation closes on end-of-frame
  *
@@ -125,6 +126,7 @@ struct ipa_endpoint_tx_data {
  * a "frame" consisting of several transfers has ended.
  */
 struct ipa_endpoint_rx_data {
+       u32 buffer_size;
        u32 pad_align;
        bool aggr_close_eof;
 };
index 68291a3..fffd0a7 100644 (file)
@@ -27,9 +27,6 @@
 
 #define IPA_REPLENISH_BATCH    16
 
-/* RX buffer is 1 page (or a power-of-2 contiguous pages) */
-#define IPA_RX_BUFFER_SIZE     8192    /* PAGE_SIZE > 4096 wastes a LOT */
-
 /* The amount of RX buffer space consumed by standard skb overhead */
 #define IPA_RX_BUFFER_OVERHEAD (PAGE_SIZE - SKB_MAX_ORDER(NET_SKB_PAD, 0))
 
@@ -75,6 +72,14 @@ struct ipa_status {
 #define IPA_STATUS_FLAGS1_RT_RULE_ID_FMASK     GENMASK(31, 22)
 #define IPA_STATUS_FLAGS2_TAG_FMASK            GENMASK_ULL(63, 16)
 
+static u32 aggr_byte_limit_max(enum ipa_version version)
+{
+       if (version < IPA_VERSION_4_5)
+               return field_max(aggr_byte_limit_fmask(true));
+
+       return field_max(aggr_byte_limit_fmask(false));
+}
+
 static bool ipa_endpoint_data_valid_one(struct ipa *ipa, u32 count,
                            const struct ipa_gsi_endpoint_data *all_data,
                            const struct ipa_gsi_endpoint_data *data)
@@ -87,6 +92,9 @@ static bool ipa_endpoint_data_valid_one(struct ipa *ipa, u32 count,
                return true;
 
        if (!data->toward_ipa) {
+               u32 buffer_size;
+               u32 limit;
+
                if (data->endpoint.filter_support) {
                        dev_err(dev, "filtering not supported for "
                                        "RX endpoint %u\n",
@@ -94,6 +102,41 @@ static bool ipa_endpoint_data_valid_one(struct ipa *ipa, u32 count,
                        return false;
                }
 
+               /* Nothing more to check for non-AP RX */
+               if (data->ee_id != GSI_EE_AP)
+                       return true;
+
+               buffer_size = data->endpoint.config.rx.buffer_size;
+               /* The buffer size must hold an MTU plus overhead */
+               limit = IPA_MTU + IPA_RX_BUFFER_OVERHEAD;
+               if (buffer_size < limit) {
+                       dev_err(dev, "RX buffer size too small for RX endpoint %u (%u < %u)\n",
+                               data->endpoint_id, buffer_size, limit);
+                       return false;
+               }
+
+               /* For an endpoint supporting receive aggregation, the
+                * aggregation byte limit defines the point at which an
+                * aggregation window will close.  It is programmed into the
+                * IPA hardware as a number of KB.  We don't use "hard byte
+                * limit" aggregation, so we need to supply enough space in
+                * a receive buffer to hold a complete MTU plus normal skb
+                * overhead *after* that aggregation byte limit has been
+                * crossed.
+                *
+                * This check just ensures the receive buffer size doesn't
+                * exceed what's representable in the aggregation limit field.
+                */
+               if (data->endpoint.config.aggregation) {
+                       limit += SZ_1K * aggr_byte_limit_max(ipa->version);
+                       if (buffer_size > limit) {
+                               dev_err(dev, "RX buffer size too large for aggregated RX endpoint %u (%u > %u)\n",
+                                       data->endpoint_id, buffer_size, limit);
+
+                               return false;
+                       }
+               }
+
                return true;    /* Nothing more to check for RX */
        }
 
@@ -156,21 +199,12 @@ static bool ipa_endpoint_data_valid_one(struct ipa *ipa, u32 count,
        return true;
 }
 
-static u32 aggr_byte_limit_max(enum ipa_version version)
-{
-       if (version < IPA_VERSION_4_5)
-               return field_max(aggr_byte_limit_fmask(true));
-
-       return field_max(aggr_byte_limit_fmask(false));
-}
-
 static bool ipa_endpoint_data_valid(struct ipa *ipa, u32 count,
                                    const struct ipa_gsi_endpoint_data *data)
 {
        const struct ipa_gsi_endpoint_data *dp = data;
        struct device *dev = &ipa->pdev->dev;
        enum ipa_endpoint_name name;
-       u32 limit;
 
        if (count > IPA_ENDPOINT_COUNT) {
                dev_err(dev, "too many endpoints specified (%u > %u)\n",
@@ -178,26 +212,6 @@ static bool ipa_endpoint_data_valid(struct ipa *ipa, u32 count,
                return false;
        }
 
-       /* The aggregation byte limit defines the point at which an
-        * aggregation window will close.  It is programmed into the
-        * IPA hardware as a number of KB.  We don't use "hard byte
-        * limit" aggregation, which means that we need to supply
-        * enough space in a receive buffer to hold a complete MTU
-        * plus normal skb overhead *after* that aggregation byte
-        * limit has been crossed.
-        *
-        * This check ensures we don't define a receive buffer size
-        * that would exceed what we can represent in the field that
-        * is used to program its size.
-        */
-       limit = aggr_byte_limit_max(ipa->version) * SZ_1K;
-       limit += IPA_MTU + IPA_RX_BUFFER_OVERHEAD;
-       if (limit < IPA_RX_BUFFER_SIZE) {
-               dev_err(dev, "buffer size too big for aggregation (%u > %u)\n",
-                       IPA_RX_BUFFER_SIZE, limit);
-               return false;
-       }
-
        /* Make sure needed endpoints have defined data */
        if (ipa_gsi_endpoint_data_empty(&data[IPA_ENDPOINT_AP_COMMAND_TX])) {
                dev_err(dev, "command TX endpoint not defined\n");
@@ -723,13 +737,15 @@ static void ipa_endpoint_init_aggr(struct ipa_endpoint *endpoint)
 
        if (endpoint->data->aggregation) {
                if (!endpoint->toward_ipa) {
+                       const struct ipa_endpoint_rx_data *rx_data;
                        bool close_eof;
                        u32 limit;
 
+                       rx_data = &endpoint->data->rx;
                        val |= u32_encode_bits(IPA_ENABLE_AGGR, AGGR_EN_FMASK);
                        val |= u32_encode_bits(IPA_GENERIC, AGGR_TYPE_FMASK);
 
-                       limit = ipa_aggr_size_kb(IPA_RX_BUFFER_SIZE);
+                       limit = ipa_aggr_size_kb(rx_data->buffer_size);
                        val |= aggr_byte_limit_encoded(version, limit);
 
                        limit = IPA_AGGR_TIME_LIMIT;
@@ -737,7 +753,7 @@ static void ipa_endpoint_init_aggr(struct ipa_endpoint *endpoint)
 
                        /* AGGR_PKT_LIMIT is 0 (unlimited) */
 
-                       close_eof = endpoint->data->rx.aggr_close_eof;
+                       close_eof = rx_data->aggr_close_eof;
                        val |= aggr_sw_eof_active_encoded(version, close_eof);
 
                        /* AGGR_HARD_BYTE_LIMIT_ENABLE is 0 */
@@ -1025,11 +1041,13 @@ static int ipa_endpoint_replenish_one(struct ipa_endpoint *endpoint)
        struct gsi_trans *trans;
        bool doorbell = false;
        struct page *page;
+       u32 buffer_size;
        u32 offset;
        u32 len;
        int ret;
 
-       page = dev_alloc_pages(get_order(IPA_RX_BUFFER_SIZE));
+       buffer_size = endpoint->data->rx.buffer_size;
+       page = dev_alloc_pages(get_order(buffer_size));
        if (!page)
                return -ENOMEM;
 
@@ -1039,7 +1057,7 @@ static int ipa_endpoint_replenish_one(struct ipa_endpoint *endpoint)
 
        /* Offset the buffer to make space for skb headroom */
        offset = NET_SKB_PAD;
-       len = IPA_RX_BUFFER_SIZE - offset;
+       len = buffer_size - offset;
 
        ret = gsi_trans_page_add(trans, page, len, offset);
        if (ret)
@@ -1058,7 +1076,7 @@ static int ipa_endpoint_replenish_one(struct ipa_endpoint *endpoint)
 err_trans_free:
        gsi_trans_free(trans);
 err_free_pages:
-       __free_pages(page, get_order(IPA_RX_BUFFER_SIZE));
+       __free_pages(page, get_order(buffer_size));
 
        return -ENOMEM;
 }
@@ -1183,15 +1201,16 @@ static void ipa_endpoint_skb_copy(struct ipa_endpoint *endpoint,
 static bool ipa_endpoint_skb_build(struct ipa_endpoint *endpoint,
                                   struct page *page, u32 len)
 {
+       u32 buffer_size = endpoint->data->rx.buffer_size;
        struct sk_buff *skb;
 
        /* Nothing to do if there's no netdev */
        if (!endpoint->netdev)
                return false;
 
-       WARN_ON(len > SKB_WITH_OVERHEAD(IPA_RX_BUFFER_SIZE - NET_SKB_PAD));
+       WARN_ON(len > SKB_WITH_OVERHEAD(buffer_size - NET_SKB_PAD));
 
-       skb = build_skb(page_address(page), IPA_RX_BUFFER_SIZE);
+       skb = build_skb(page_address(page), buffer_size);
        if (skb) {
                /* Reserve the headroom and account for the data */
                skb_reserve(skb, NET_SKB_PAD);
@@ -1289,8 +1308,9 @@ static bool ipa_endpoint_status_drop(struct ipa_endpoint *endpoint,
 static void ipa_endpoint_status_parse(struct ipa_endpoint *endpoint,
                                      struct page *page, u32 total_len)
 {
+       u32 buffer_size = endpoint->data->rx.buffer_size;
        void *data = page_address(page) + NET_SKB_PAD;
-       u32 unused = IPA_RX_BUFFER_SIZE - total_len;
+       u32 unused = buffer_size - total_len;
        u32 resid = total_len;
 
        while (resid) {
@@ -1398,8 +1418,11 @@ void ipa_endpoint_trans_release(struct ipa_endpoint *endpoint,
        } else {
                struct page *page = trans->data;
 
-               if (page)
-                       __free_pages(page, get_order(IPA_RX_BUFFER_SIZE));
+               if (page) {
+                       u32 buffer_size = endpoint->data->rx.buffer_size;
+
+                       __free_pages(page, get_order(buffer_size));
+               }
        }
 }
 
index 7ab4e26..7aafc22 100644 (file)
@@ -285,7 +285,8 @@ static acpi_status acpi_register_phy(acpi_handle handle, u32 lvl,
        const union acpi_object *obj;
        u32 phy_addr;
 
-       if (acpi_bus_get_device(handle, &adev))
+       adev = acpi_fetch_acpi_dev(handle);
+       if (!adev)
                return AE_OK;
 
        if (acpi_dev_get_property(adev, "phy-channel", ACPI_TYPE_INTEGER, &obj))
index cd6742e..61418d4 100644 (file)
@@ -632,35 +632,43 @@ static void xpcs_resolve_pma(struct dw_xpcs *xpcs,
        }
 }
 
-void xpcs_validate(struct dw_xpcs *xpcs, unsigned long *supported,
-                  struct phylink_link_state *state)
+static int xpcs_validate(struct phylink_pcs *pcs, unsigned long *supported,
+                        const struct phylink_link_state *state)
 {
-       __ETHTOOL_DECLARE_LINK_MODE_MASK(xpcs_supported);
+       __ETHTOOL_DECLARE_LINK_MODE_MASK(xpcs_supported) = { 0, };
        const struct xpcs_compat *compat;
+       struct dw_xpcs *xpcs;
        int i;
 
-       /* phylink expects us to report all supported modes with
-        * PHY_INTERFACE_MODE_NA, just don't limit the supported and
-        * advertising masks and exit.
-        */
-       if (state->interface == PHY_INTERFACE_MODE_NA)
-               return;
-
-       linkmode_zero(xpcs_supported);
-
+       xpcs = phylink_pcs_to_xpcs(pcs);
        compat = xpcs_find_compat(xpcs->id, state->interface);
 
-       /* Populate the supported link modes for this
-        * PHY interface type
+       /* Populate the supported link modes for this PHY interface type.
+        * FIXME: what about the port modes and autoneg bit? This masks
+        * all those away.
         */
        if (compat)
                for (i = 0; compat->supported[i] != __ETHTOOL_LINK_MODE_MASK_NBITS; i++)
                        set_bit(compat->supported[i], xpcs_supported);
 
        linkmode_and(supported, supported, xpcs_supported);
-       linkmode_and(state->advertising, state->advertising, xpcs_supported);
+
+       return 0;
+}
+
+void xpcs_get_interfaces(struct dw_xpcs *xpcs, unsigned long *interfaces)
+{
+       int i, j;
+
+       for (i = 0; i < DW_XPCS_INTERFACE_MAX; i++) {
+               const struct xpcs_compat *compat = &xpcs->id->compat[i];
+
+               for (j = 0; j < compat->num_interfaces; j++)
+                       if (compat->interface[j] < PHY_INTERFACE_MODE_MAX)
+                               __set_bit(compat->interface[j], interfaces);
+       }
 }
-EXPORT_SYMBOL_GPL(xpcs_validate);
+EXPORT_SYMBOL_GPL(xpcs_get_interfaces);
 
 int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns, int enable)
 {
@@ -1106,6 +1114,7 @@ static const struct xpcs_id xpcs_id_list[] = {
 };
 
 static const struct phylink_pcs_ops xpcs_phylink_ops = {
+       .pcs_validate = xpcs_validate,
        .pcs_config = xpcs_config,
        .pcs_get_state = xpcs_get_state,
        .pcs_link_up = xpcs_link_up,
index 968dd43..a8db1a1 100644 (file)
@@ -533,9 +533,7 @@ static int aqcs109_config_init(struct phy_device *phydev)
         * PMA speed ability bits are the same for all members of the family,
         * AQCS109 however supports speeds up to 2.5G only.
         */
-       ret = phy_set_max_speed(phydev, SPEED_2500);
-       if (ret)
-               return ret;
+       phy_set_max_speed(phydev, SPEED_2500);
 
        return aqr107_set_downshift(phydev, MDIO_AN_VEND_PROV_DOWNSHIFT_DFLT);
 }
index 29aa811..7392600 100644 (file)
@@ -19,6 +19,8 @@
 #include <linux/regulator/of_regulator.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/consumer.h>
+#include <linux/phylink.h>
+#include <linux/sfp.h>
 #include <dt-bindings/net/qca-ar803x.h>
 
 #define AT803X_SPECIFIC_FUNCTION_CONTROL       0x10
@@ -51,6 +53,8 @@
 #define AT803X_INTR_ENABLE_PAGE_RECEIVED       BIT(12)
 #define AT803X_INTR_ENABLE_LINK_FAIL           BIT(11)
 #define AT803X_INTR_ENABLE_LINK_SUCCESS                BIT(10)
+#define AT803X_INTR_ENABLE_LINK_FAIL_BX                BIT(8)
+#define AT803X_INTR_ENABLE_LINK_SUCCESS_BX     BIT(7)
 #define AT803X_INTR_ENABLE_WIRESPEED_DOWNGRADE BIT(5)
 #define AT803X_INTR_ENABLE_POLARITY_CHANGED    BIT(1)
 #define AT803X_INTR_ENABLE_WOL                 BIT(0)
 #define AT803X_DEBUG_DATA                      0x1E
 
 #define AT803X_MODE_CFG_MASK                   0x0F
-#define AT803X_MODE_CFG_SGMII                  0x01
+#define AT803X_MODE_CFG_BASET_RGMII            0x00
+#define AT803X_MODE_CFG_BASET_SGMII            0x01
+#define AT803X_MODE_CFG_BX1000_RGMII_50OHM     0x02
+#define AT803X_MODE_CFG_BX1000_RGMII_75OHM     0x03
+#define AT803X_MODE_CFG_BX1000_CONV_50OHM      0x04
+#define AT803X_MODE_CFG_BX1000_CONV_75OHM      0x05
+#define AT803X_MODE_CFG_FX100_RGMII_50OHM      0x06
+#define AT803X_MODE_CFG_FX100_CONV_50OHM       0x07
+#define AT803X_MODE_CFG_RGMII_AUTO_MDET                0x0B
+#define AT803X_MODE_CFG_FX100_RGMII_75OHM      0x0E
+#define AT803X_MODE_CFG_FX100_CONV_75OHM       0x0F
 
 #define AT803X_PSSR                            0x11    /*PHY-Specific Status Register*/
 #define AT803X_PSSR_MR_AN_COMPLETE             0x0200
@@ -283,6 +297,8 @@ struct at803x_priv {
        u16 clk_25m_mask;
        u8 smarteee_lpi_tw_1g;
        u8 smarteee_lpi_tw_100m;
+       bool is_fiber;
+       bool is_1000basex;
        struct regulator_dev *vddio_rdev;
        struct regulator_dev *vddh_rdev;
        struct regulator *vddio;
@@ -650,6 +666,55 @@ static int at8031_register_regulators(struct phy_device *phydev)
        return 0;
 }
 
+static int at803x_sfp_insert(void *upstream, const struct sfp_eeprom_id *id)
+{
+       struct phy_device *phydev = upstream;
+       __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_support);
+       __ETHTOOL_DECLARE_LINK_MODE_MASK(sfp_support);
+       phy_interface_t iface;
+
+       linkmode_zero(phy_support);
+       phylink_set(phy_support, 1000baseX_Full);
+       phylink_set(phy_support, 1000baseT_Full);
+       phylink_set(phy_support, Autoneg);
+       phylink_set(phy_support, Pause);
+       phylink_set(phy_support, Asym_Pause);
+
+       linkmode_zero(sfp_support);
+       sfp_parse_support(phydev->sfp_bus, id, sfp_support);
+       /* Some modules support 10G modes as well as others we support.
+        * Mask out non-supported modes so the correct interface is picked.
+        */
+       linkmode_and(sfp_support, phy_support, sfp_support);
+
+       if (linkmode_empty(sfp_support)) {
+               dev_err(&phydev->mdio.dev, "incompatible SFP module inserted\n");
+               return -EINVAL;
+       }
+
+       iface = sfp_select_interface(phydev->sfp_bus, sfp_support);
+
+       /* Only 1000Base-X is supported by AR8031/8033 as the downstream SerDes
+        * interface for use with SFP modules.
+        * However, some copper modules detected as having a preferred SGMII
+        * interface do default to and function in 1000Base-X mode, so just
+        * print a warning and allow such modules, as they may have some chance
+        * of working.
+        */
+       if (iface == PHY_INTERFACE_MODE_SGMII)
+               dev_warn(&phydev->mdio.dev, "module may not function if 1000Base-X not supported\n");
+       else if (iface != PHY_INTERFACE_MODE_1000BASEX)
+               return -EINVAL;
+
+       return 0;
+}
+
+static const struct sfp_upstream_ops at803x_sfp_ops = {
+       .attach = phy_sfp_attach,
+       .detach = phy_sfp_detach,
+       .module_insert = at803x_sfp_insert,
+};
+
 static int at803x_parse_dt(struct phy_device *phydev)
 {
        struct device_node *node = phydev->mdio.dev.of_node;
@@ -757,6 +822,11 @@ static int at803x_parse_dt(struct phy_device *phydev)
                        phydev_err(phydev, "failed to get VDDIO regulator\n");
                        return PTR_ERR(priv->vddio);
                }
+
+               /* Only AR8031/8033 support 1000Base-X for SFP modules */
+               ret = phy_sfp_probe(phydev, &at803x_sfp_ops);
+               if (ret < 0)
+                       return ret;
        }
 
        return 0;
@@ -784,16 +854,24 @@ static int at803x_probe(struct phy_device *phydev)
                        return ret;
        }
 
-       /* Some bootloaders leave the fiber page selected.
-        * Switch to the copper page, as otherwise we read
-        * the PHY capabilities from the fiber side.
-        */
        if (phydev->drv->phy_id == ATH8031_PHY_ID) {
-               phy_lock_mdio_bus(phydev);
-               ret = at803x_write_page(phydev, AT803X_PAGE_COPPER);
-               phy_unlock_mdio_bus(phydev);
-               if (ret)
+               int ccr = phy_read(phydev, AT803X_REG_CHIP_CONFIG);
+               int mode_cfg;
+
+               if (ccr < 0)
                        goto err;
+               mode_cfg = ccr & AT803X_MODE_CFG_MASK;
+
+               switch (mode_cfg) {
+               case AT803X_MODE_CFG_BX1000_RGMII_50OHM:
+               case AT803X_MODE_CFG_BX1000_RGMII_75OHM:
+                       priv->is_1000basex = true;
+                       fallthrough;
+               case AT803X_MODE_CFG_FX100_RGMII_50OHM:
+               case AT803X_MODE_CFG_FX100_RGMII_75OHM:
+                       priv->is_fiber = true;
+                       break;
+               }
        }
 
        return 0;
@@ -815,6 +893,7 @@ static void at803x_remove(struct phy_device *phydev)
 
 static int at803x_get_features(struct phy_device *phydev)
 {
+       struct at803x_priv *priv = phydev->priv;
        int err;
 
        err = genphy_read_abilities(phydev);
@@ -841,12 +920,13 @@ static int at803x_get_features(struct phy_device *phydev)
         * As a result of that, ESTATUS_1000_XFULL is set
         * to 1 even when operating in copper TP mode.
         *
-        * Remove this mode from the supported link modes,
-        * as this driver currently only supports copper
-        * operation.
+        * Remove this mode from the supported link modes
+        * when not operating in 1000BaseX mode.
         */
-       linkmode_clear_bit(ETHTOOL_LINK_MODE_1000baseX_Full_BIT,
-                          phydev->supported);
+       if (!priv->is_1000basex)
+               linkmode_clear_bit(ETHTOOL_LINK_MODE_1000baseX_Full_BIT,
+                                  phydev->supported);
+
        return 0;
 }
 
@@ -910,8 +990,27 @@ static int at8031_pll_config(struct phy_device *phydev)
 
 static int at803x_config_init(struct phy_device *phydev)
 {
+       struct at803x_priv *priv = phydev->priv;
        int ret;
 
+       if (phydev->drv->phy_id == ATH8031_PHY_ID) {
+               /* Some bootloaders leave the fiber page selected.
+                * Switch to the appropriate page (fiber or copper), as otherwise we
+                * read the PHY capabilities from the wrong page.
+                */
+               phy_lock_mdio_bus(phydev);
+               ret = at803x_write_page(phydev,
+                                       priv->is_fiber ? AT803X_PAGE_FIBER :
+                                                        AT803X_PAGE_COPPER);
+               phy_unlock_mdio_bus(phydev);
+               if (ret)
+                       return ret;
+
+               ret = at8031_pll_config(phydev);
+               if (ret < 0)
+                       return ret;
+       }
+
        /* The RX and TX delay default is:
         *   after HW reset: RX delay enabled and TX delay disabled
         *   after SW reset: RX delay enabled, while TX delay retains the
@@ -941,12 +1040,6 @@ static int at803x_config_init(struct phy_device *phydev)
        if (ret < 0)
                return ret;
 
-       if (phydev->drv->phy_id == ATH8031_PHY_ID) {
-               ret = at8031_pll_config(phydev);
-               if (ret < 0)
-                       return ret;
-       }
-
        /* Ar803x extended next page bit is enabled by default. Cisco
         * multigig switches read this bit and attempt to negotiate 10Gbps
         * rates even if the next page bit is disabled. This is incorrect
@@ -967,6 +1060,7 @@ static int at803x_ack_interrupt(struct phy_device *phydev)
 
 static int at803x_config_intr(struct phy_device *phydev)
 {
+       struct at803x_priv *priv = phydev->priv;
        int err;
        int value;
 
@@ -983,6 +1077,10 @@ static int at803x_config_intr(struct phy_device *phydev)
                value |= AT803X_INTR_ENABLE_DUPLEX_CHANGED;
                value |= AT803X_INTR_ENABLE_LINK_FAIL;
                value |= AT803X_INTR_ENABLE_LINK_SUCCESS;
+               if (priv->is_fiber) {
+                       value |= AT803X_INTR_ENABLE_LINK_FAIL_BX;
+                       value |= AT803X_INTR_ENABLE_LINK_SUCCESS_BX;
+               }
 
                err = phy_write(phydev, AT803X_INTR_ENABLE, value);
        } else {
@@ -1115,8 +1213,12 @@ static int at803x_read_specific_status(struct phy_device *phydev)
 
 static int at803x_read_status(struct phy_device *phydev)
 {
+       struct at803x_priv *priv = phydev->priv;
        int err, old_link = phydev->link;
 
+       if (priv->is_1000basex)
+               return genphy_c37_read_status(phydev);
+
        /* Update the link, but return if there was an error */
        err = genphy_update_link(phydev);
        if (err)
@@ -1170,6 +1272,7 @@ static int at803x_config_mdix(struct phy_device *phydev, u8 ctrl)
 
 static int at803x_config_aneg(struct phy_device *phydev)
 {
+       struct at803x_priv *priv = phydev->priv;
        int ret;
 
        ret = at803x_config_mdix(phydev, phydev->mdix_ctrl);
@@ -1186,6 +1289,9 @@ static int at803x_config_aneg(struct phy_device *phydev)
                        return ret;
        }
 
+       if (priv->is_1000basex)
+               return genphy_c37_config_aneg(phydev);
+
        /* Do not restart auto-negotiation by setting ret to 0 defautly,
         * when calling __genphy_config_aneg later.
         */
index 271fc01..2001f33 100644 (file)
@@ -243,7 +243,7 @@ size_t phy_speeds(unsigned int *speeds, size_t size,
        return count;
 }
 
-static int __set_linkmode_max_speed(u32 max_speed, unsigned long *addr)
+static void __set_linkmode_max_speed(u32 max_speed, unsigned long *addr)
 {
        const struct phy_setting *p;
        int i;
@@ -254,13 +254,11 @@ static int __set_linkmode_max_speed(u32 max_speed, unsigned long *addr)
                else
                        break;
        }
-
-       return 0;
 }
 
-static int __set_phy_supported(struct phy_device *phydev, u32 max_speed)
+static void __set_phy_supported(struct phy_device *phydev, u32 max_speed)
 {
-       return __set_linkmode_max_speed(max_speed, phydev->supported);
+       __set_linkmode_max_speed(max_speed, phydev->supported);
 }
 
 /**
@@ -273,17 +271,11 @@ static int __set_phy_supported(struct phy_device *phydev, u32 max_speed)
  * is connected to a 1G PHY. This function allows the MAC to indicate its
  * maximum speed, and so limit what the PHY will advertise.
  */
-int phy_set_max_speed(struct phy_device *phydev, u32 max_speed)
+void phy_set_max_speed(struct phy_device *phydev, u32 max_speed)
 {
-       int err;
-
-       err = __set_phy_supported(phydev, max_speed);
-       if (err)
-               return err;
+       __set_phy_supported(phydev, max_speed);
 
        phy_advertise_supported(phydev);
-
-       return 0;
 }
 EXPORT_SYMBOL(phy_set_max_speed);
 
@@ -440,7 +432,9 @@ int phy_speed_down_core(struct phy_device *phydev)
        if (min_common_speed == SPEED_UNKNOWN)
                return -EINVAL;
 
-       return __set_linkmode_max_speed(min_common_speed, phydev->advertising);
+       __set_linkmode_max_speed(min_common_speed, phydev->advertising);
+
+       return 0;
 }
 
 static void mmd_phy_indirect(struct mii_bus *bus, int phy_addr, int devad,
index 4514d35..9b72334 100644 (file)
@@ -858,7 +858,6 @@ static int marvell_phy_init(struct usbnet *dev)
                reg = asix_mdio_read(dev->net, dev->mii.phy_id,
                        MII_MARVELL_LED_CTRL);
                netdev_dbg(dev->net, "MII_MARVELL_LED_CTRL (2) = 0x%04x\n", reg);
-               reg &= 0xfc0f;
        }
 
        return 0;
index 30d2912..6335d7a 100644 (file)
@@ -456,7 +456,7 @@ static const struct nfc_vendor_cmd st_nci_vendor_cmds[] = {
 
 int st_nci_vendor_cmds_init(struct nci_dev *ndev)
 {
-       return nfc_set_vendor_cmds(ndev->nfc_dev, st_nci_vendor_cmds,
+       return nci_set_vendor_cmds(ndev, st_nci_vendor_cmds,
                                   sizeof(st_nci_vendor_cmds));
 }
 EXPORT_SYMBOL(st_nci_vendor_cmds_init);
index 7488286..bfa418d 100644 (file)
@@ -358,7 +358,7 @@ int st21nfca_vendor_cmds_init(struct nfc_hci_dev *hdev)
        struct st21nfca_hci_info *info = nfc_hci_get_clientdata(hdev);
 
        init_completion(&info->vendor_info.req_completion);
-       return nfc_set_vendor_cmds(hdev->ndev, st21nfca_vendor_cmds,
-                                  sizeof(st21nfca_vendor_cmds));
+       return nfc_hci_set_vendor_cmds(hdev, st21nfca_vendor_cmds,
+                                      sizeof(st21nfca_vendor_cmds));
 }
 EXPORT_SYMBOL(st21nfca_vendor_cmds_init);
index 0e4bc8b..b6f2cfd 100644 (file)
@@ -317,11 +317,18 @@ no_memory:
 }
 EXPORT_SYMBOL(ptp_clock_register);
 
+static int unregister_vclock(struct device *dev, void *data)
+{
+       struct ptp_clock *ptp = dev_get_drvdata(dev);
+
+       ptp_vclock_unregister(info_to_vclock(ptp->info));
+       return 0;
+}
+
 int ptp_clock_unregister(struct ptp_clock *ptp)
 {
        if (ptp_vclock_in_use(ptp)) {
-               pr_err("ptp: virtual clock in use\n");
-               return -EBUSY;
+               device_for_each_child(&ptp->dev, NULL, unregister_vclock);
        }
 
        ptp->defunct = 1;
index 41b92dc..9233bfe 100644 (file)
@@ -14,7 +14,7 @@ static ssize_t clock_name_show(struct device *dev,
                               struct device_attribute *attr, char *page)
 {
        struct ptp_clock *ptp = dev_get_drvdata(dev);
-       return snprintf(page, PAGE_SIZE-1, "%s\n", ptp->info->name);
+       return sysfs_emit(page, "%s\n", ptp->info->name);
 }
 static DEVICE_ATTR_RO(clock_name);
 
@@ -387,7 +387,7 @@ static ssize_t ptp_pin_show(struct device *dev, struct device_attribute *attr,
 
        mutex_unlock(&ptp->pincfg_mux);
 
-       return snprintf(page, PAGE_SIZE, "%u %u\n", func, chan);
+       return sysfs_emit(page, "%u %u\n", func, chan);
 }
 
 static ssize_t ptp_pin_store(struct device *dev, struct device_attribute *attr,
index ab1d233..cb179a3 100644 (file)
@@ -57,6 +57,30 @@ static int ptp_vclock_gettime(struct ptp_clock_info *ptp,
        return 0;
 }
 
+static int ptp_vclock_gettimex(struct ptp_clock_info *ptp,
+                              struct timespec64 *ts,
+                              struct ptp_system_timestamp *sts)
+{
+       struct ptp_vclock *vclock = info_to_vclock(ptp);
+       struct ptp_clock *pptp = vclock->pclock;
+       struct timespec64 pts;
+       unsigned long flags;
+       int err;
+       u64 ns;
+
+       err = pptp->info->gettimex64(pptp->info, &pts, sts);
+       if (err)
+               return err;
+
+       spin_lock_irqsave(&vclock->lock, flags);
+       ns = timecounter_cyc2time(&vclock->tc, timespec64_to_ns(&pts));
+       spin_unlock_irqrestore(&vclock->lock, flags);
+
+       *ts = ns_to_timespec64(ns);
+
+       return 0;
+}
+
 static int ptp_vclock_settime(struct ptp_clock_info *ptp,
                              const struct timespec64 *ts)
 {
@@ -71,6 +95,28 @@ static int ptp_vclock_settime(struct ptp_clock_info *ptp,
        return 0;
 }
 
+static int ptp_vclock_getcrosststamp(struct ptp_clock_info *ptp,
+                                    struct system_device_crosststamp *xtstamp)
+{
+       struct ptp_vclock *vclock = info_to_vclock(ptp);
+       struct ptp_clock *pptp = vclock->pclock;
+       unsigned long flags;
+       int err;
+       u64 ns;
+
+       err = pptp->info->getcrosststamp(pptp->info, xtstamp);
+       if (err)
+               return err;
+
+       spin_lock_irqsave(&vclock->lock, flags);
+       ns = timecounter_cyc2time(&vclock->tc, ktime_to_ns(xtstamp->device));
+       spin_unlock_irqrestore(&vclock->lock, flags);
+
+       xtstamp->device = ns_to_ktime(ns);
+
+       return 0;
+}
+
 static long ptp_vclock_refresh(struct ptp_clock_info *ptp)
 {
        struct ptp_vclock *vclock = info_to_vclock(ptp);
@@ -84,11 +130,9 @@ static long ptp_vclock_refresh(struct ptp_clock_info *ptp)
 static const struct ptp_clock_info ptp_vclock_info = {
        .owner          = THIS_MODULE,
        .name           = "ptp virtual clock",
-       /* The maximum ppb value that long scaled_ppm can support */
-       .max_adj        = 32767999,
+       .max_adj        = 500000000,
        .adjfine        = ptp_vclock_adjfine,
        .adjtime        = ptp_vclock_adjtime,
-       .gettime64      = ptp_vclock_gettime,
        .settime64      = ptp_vclock_settime,
        .do_aux_work    = ptp_vclock_refresh,
 };
@@ -124,6 +168,12 @@ struct ptp_vclock *ptp_vclock_register(struct ptp_clock *pclock)
 
        vclock->pclock = pclock;
        vclock->info = ptp_vclock_info;
+       if (pclock->info->gettimex64)
+               vclock->info.gettimex64 = ptp_vclock_gettimex;
+       else
+               vclock->info.gettime64 = ptp_vclock_gettime;
+       if (pclock->info->getcrosststamp)
+               vclock->info.getcrosststamp = ptp_vclock_getcrosststamp;
        vclock->cc = ptp_vclock_cc;
 
        snprintf(vclock->info.name, PTP_CLOCK_NAME_LEN, "ptp%d_virt",
index fa517ae..8c92c97 100644 (file)
@@ -194,6 +194,17 @@ struct bpf_map {
        struct work_struct work;
        struct mutex freeze_mutex;
        atomic64_t writecnt;
+       /* 'Ownership' of program-containing map is claimed by the first program
+        * that is going to use this map or by the first program which FD is
+        * stored in the map to make sure that all callers and callees have the
+        * same prog type, JITed flag and xdp_has_frags flag.
+        */
+       struct {
+               spinlock_t lock;
+               enum bpf_prog_type type;
+               bool jited;
+               bool xdp_has_frags;
+       } owner;
 };
 
 static inline bool map_value_has_spin_lock(const struct bpf_map *map)
@@ -578,7 +589,6 @@ struct bpf_verifier_ops {
                                 const struct btf_type *t, int off, int size,
                                 enum bpf_access_type atype,
                                 u32 *next_btf_id);
-       bool (*check_kfunc_call)(u32 kfunc_btf_id, struct module *owner);
 };
 
 struct bpf_prog_offload_ops {
@@ -939,6 +949,7 @@ struct bpf_prog_aux {
        bool func_proto_unreliable;
        bool sleepable;
        bool tail_call_reachable;
+       bool xdp_has_frags;
        struct hlist_node tramp_hlist;
        /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
        const struct btf_type *attach_func_proto;
@@ -999,16 +1010,6 @@ struct bpf_prog_aux {
 };
 
 struct bpf_array_aux {
-       /* 'Ownership' of prog array is claimed by the first program that
-        * is going to use this map or by the first program which FD is
-        * stored in the map to make sure that all callers and callees have
-        * the same prog type and JITed flag.
-        */
-       struct {
-               spinlock_t lock;
-               enum bpf_prog_type type;
-               bool jited;
-       } owner;
        /* Programs with direct jumps into programs part of this array. */
        struct list_head poke_progs;
        struct bpf_map *map;
@@ -1183,7 +1184,14 @@ struct bpf_event_entry {
        struct rcu_head rcu;
 };
 
-bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp);
+static inline bool map_type_contains_progs(struct bpf_map *map)
+{
+       return map->map_type == BPF_MAP_TYPE_PROG_ARRAY ||
+              map->map_type == BPF_MAP_TYPE_DEVMAP ||
+              map->map_type == BPF_MAP_TYPE_CPUMAP;
+}
+
+bool bpf_prog_map_compatible(struct bpf_map *map, const struct bpf_prog *fp);
 int bpf_prog_calc_tag(struct bpf_prog *fp);
 
 const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
@@ -1251,6 +1259,7 @@ struct bpf_run_ctx {};
 struct bpf_cg_run_ctx {
        struct bpf_run_ctx run_ctx;
        const struct bpf_prog_array_item *prog_item;
+       int retval;
 };
 
 struct bpf_trace_run_ctx {
@@ -1283,19 +1292,19 @@ static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx)
 
 typedef u32 (*bpf_prog_run_fn)(const struct bpf_prog *prog, const void *ctx);
 
-static __always_inline u32
+static __always_inline int
 BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu,
                            const void *ctx, bpf_prog_run_fn run_prog,
-                           u32 *ret_flags)
+                           int retval, u32 *ret_flags)
 {
        const struct bpf_prog_array_item *item;
        const struct bpf_prog *prog;
        const struct bpf_prog_array *array;
        struct bpf_run_ctx *old_run_ctx;
        struct bpf_cg_run_ctx run_ctx;
-       u32 ret = 1;
        u32 func_ret;
 
+       run_ctx.retval = retval;
        migrate_disable();
        rcu_read_lock();
        array = rcu_dereference(array_rcu);
@@ -1304,27 +1313,29 @@ BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu,
        while ((prog = READ_ONCE(item->prog))) {
                run_ctx.prog_item = item;
                func_ret = run_prog(prog, ctx);
-               ret &= (func_ret & 1);
+               if (!(func_ret & 1) && !IS_ERR_VALUE((long)run_ctx.retval))
+                       run_ctx.retval = -EPERM;
                *(ret_flags) |= (func_ret >> 1);
                item++;
        }
        bpf_reset_run_ctx(old_run_ctx);
        rcu_read_unlock();
        migrate_enable();
-       return ret;
+       return run_ctx.retval;
 }
 
-static __always_inline u32
+static __always_inline int
 BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu,
-                     const void *ctx, bpf_prog_run_fn run_prog)
+                     const void *ctx, bpf_prog_run_fn run_prog,
+                     int retval)
 {
        const struct bpf_prog_array_item *item;
        const struct bpf_prog *prog;
        const struct bpf_prog_array *array;
        struct bpf_run_ctx *old_run_ctx;
        struct bpf_cg_run_ctx run_ctx;
-       u32 ret = 1;
 
+       run_ctx.retval = retval;
        migrate_disable();
        rcu_read_lock();
        array = rcu_dereference(array_rcu);
@@ -1332,13 +1343,14 @@ BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu,
        old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
        while ((prog = READ_ONCE(item->prog))) {
                run_ctx.prog_item = item;
-               ret &= run_prog(prog, ctx);
+               if (!run_prog(prog, ctx) && !IS_ERR_VALUE((long)run_ctx.retval))
+                       run_ctx.retval = -EPERM;
                item++;
        }
        bpf_reset_run_ctx(old_run_ctx);
        rcu_read_unlock();
        migrate_enable();
-       return ret;
+       return run_ctx.retval;
 }
 
 static __always_inline u32
@@ -1391,19 +1403,21 @@ out:
  *   0: NET_XMIT_SUCCESS  skb should be transmitted
  *   1: NET_XMIT_DROP     skb should be dropped and cn
  *   2: NET_XMIT_CN       skb should be transmitted and cn
- *   3: -EPERM            skb should be dropped
+ *   3: -err              skb should be dropped
  */
 #define BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(array, ctx, func)                \
        ({                                              \
                u32 _flags = 0;                         \
                bool _cn;                               \
                u32 _ret;                               \
-               _ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, &_flags); \
+               _ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, 0, &_flags); \
                _cn = _flags & BPF_RET_SET_CN;          \
-               if (_ret)                               \
+               if (_ret && !IS_ERR_VALUE((long)_ret))  \
+                       _ret = -EFAULT;                 \
+               if (!_ret)                              \
                        _ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS);  \
                else                                    \
-                       _ret = (_cn ? NET_XMIT_DROP : -EPERM);          \
+                       _ret = (_cn ? NET_XMIT_DROP : _ret);            \
                _ret;                                   \
        })
 
@@ -1724,7 +1738,6 @@ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
 int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog,
                                const union bpf_attr *kattr,
                                union bpf_attr __user *uattr);
-bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, struct module *owner);
 bool btf_ctx_access(int off, int size, enum bpf_access_type type,
                    const struct bpf_prog *prog,
                    struct bpf_insn_access_aux *info);
@@ -1976,12 +1989,6 @@ static inline int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog,
        return -ENOTSUPP;
 }
 
-static inline bool bpf_prog_test_check_kfunc_call(u32 kfunc_id,
-                                                 struct module *owner)
-{
-       return false;
-}
-
 static inline void bpf_map_put(struct bpf_map *map)
 {
 }
@@ -2076,6 +2083,9 @@ int bpf_prog_test_run_syscall(struct bpf_prog *prog,
 int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
 int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype);
 int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags);
+int sock_map_bpf_prog_query(const union bpf_attr *attr,
+                           union bpf_attr __user *uattr);
+
 void sock_map_unhash(struct sock *sk);
 void sock_map_close(struct sock *sk, long timeout);
 #else
@@ -2129,6 +2139,12 @@ static inline int sock_map_update_elem_sys(struct bpf_map *map, void *key, void
 {
        return -EOPNOTSUPP;
 }
+
+static inline int sock_map_bpf_prog_query(const union bpf_attr *attr,
+                                         union bpf_attr __user *uattr)
+{
+       return -EINVAL;
+}
 #endif /* CONFIG_BPF_SYSCALL */
 #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
 
index e999317..7a7be8c 100644 (file)
@@ -521,6 +521,8 @@ bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt);
 
 int check_ptr_off_reg(struct bpf_verifier_env *env,
                      const struct bpf_reg_state *reg, int regno);
+int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
+                            u32 regno);
 int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
                   u32 regno, u32 mem_size);
 
@@ -564,4 +566,9 @@ static inline u32 type_flag(u32 type)
        return type & ~BPF_BASE_TYPE_MASK;
 }
 
+static inline enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog)
+{
+       return prog->aux->dst_prog ? prog->aux->dst_prog->type : prog->type;
+}
+
 #endif /* _LINUX_BPF_VERIFIER_H */
index 0c74348..b12cfe3 100644 (file)
 #define BTF_TYPE_EMIT(type) ((void)(type *)0)
 #define BTF_TYPE_EMIT_ENUM(enum_val) ((void)enum_val)
 
+enum btf_kfunc_type {
+       BTF_KFUNC_TYPE_CHECK,
+       BTF_KFUNC_TYPE_ACQUIRE,
+       BTF_KFUNC_TYPE_RELEASE,
+       BTF_KFUNC_TYPE_RET_NULL,
+       BTF_KFUNC_TYPE_MAX,
+};
+
 struct btf;
 struct btf_member;
 struct btf_type;
 union bpf_attr;
 struct btf_show;
+struct btf_id_set;
+
+struct btf_kfunc_id_set {
+       struct module *owner;
+       union {
+               struct {
+                       struct btf_id_set *check_set;
+                       struct btf_id_set *acquire_set;
+                       struct btf_id_set *release_set;
+                       struct btf_id_set *ret_null_set;
+               };
+               struct btf_id_set *sets[BTF_KFUNC_TYPE_MAX];
+       };
+};
 
 extern const struct file_operations btf_fops;
 
@@ -307,6 +329,11 @@ const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
 const char *btf_name_by_offset(const struct btf *btf, u32 offset);
 struct btf *btf_parse_vmlinux(void);
 struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog);
+bool btf_kfunc_id_set_contains(const struct btf *btf,
+                              enum bpf_prog_type prog_type,
+                              enum btf_kfunc_type type, u32 kfunc_btf_id);
+int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
+                             const struct btf_kfunc_id_set *s);
 #else
 static inline const struct btf_type *btf_type_by_id(const struct btf *btf,
                                                    u32 type_id)
@@ -318,50 +345,18 @@ static inline const char *btf_name_by_offset(const struct btf *btf,
 {
        return NULL;
 }
-#endif
-
-struct kfunc_btf_id_set {
-       struct list_head list;
-       struct btf_id_set *set;
-       struct module *owner;
-};
-
-struct kfunc_btf_id_list {
-       struct list_head list;
-       struct mutex mutex;
-};
-
-#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
-void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
-                              struct kfunc_btf_id_set *s);
-void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
-                                struct kfunc_btf_id_set *s);
-bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id,
-                             struct module *owner);
-
-extern struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list;
-extern struct kfunc_btf_id_list prog_test_kfunc_list;
-#else
-static inline void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
-                                            struct kfunc_btf_id_set *s)
-{
-}
-static inline void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
-                                              struct kfunc_btf_id_set *s)
+static inline bool btf_kfunc_id_set_contains(const struct btf *btf,
+                                            enum bpf_prog_type prog_type,
+                                            enum btf_kfunc_type type,
+                                            u32 kfunc_btf_id)
 {
+       return false;
 }
-static inline bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist,
-                                           u32 kfunc_id, struct module *owner)
+static inline int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
+                                           const struct btf_kfunc_id_set *s)
 {
-       return false;
+       return 0;
 }
-
-static struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list __maybe_unused;
-static struct kfunc_btf_id_list prog_test_kfunc_list __maybe_unused;
 #endif
 
-#define DEFINE_KFUNC_BTF_ID_SET(set, name)                                     \
-       struct kfunc_btf_id_set name = { LIST_HEAD_INIT(name.list), (set),     \
-                                        THIS_MODULE }
-
 #endif
index 919c0fd..bc5d9cc 100644 (file)
@@ -11,6 +11,7 @@ struct btf_id_set {
 #ifdef CONFIG_DEBUG_INFO_BTF
 
 #include <linux/compiler.h> /* for __PASTE */
+#include <linux/compiler_attributes.h> /* for __maybe_unused */
 
 /*
  * Following macros help to define lists of BTF IDs placed
@@ -146,14 +147,14 @@ extern struct btf_id_set name;
 
 #else
 
-#define BTF_ID_LIST(name) static u32 name[5];
+#define BTF_ID_LIST(name) static u32 __maybe_unused name[5];
 #define BTF_ID(prefix, name)
 #define BTF_ID_UNUSED
-#define BTF_ID_LIST_GLOBAL(name, n) u32 name[n];
-#define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 name[1];
-#define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) u32 name[1];
-#define BTF_SET_START(name) static struct btf_id_set name = { 0 };
-#define BTF_SET_START_GLOBAL(name) static struct btf_id_set name = { 0 };
+#define BTF_ID_LIST_GLOBAL(name, n) u32 __maybe_unused name[n];
+#define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 __maybe_unused name[1];
+#define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) u32 __maybe_unused name[1];
+#define BTF_SET_START(name) static struct btf_id_set __maybe_unused name = { 0 };
+#define BTF_SET_START_GLOBAL(name) static struct btf_id_set __maybe_unused name = { 0 };
 #define BTF_SET_END(name)
 
 #endif /* CONFIG_DEBUG_INFO_BTF */
diff --git a/include/linux/dsa/tag_qca.h b/include/linux/dsa/tag_qca.h
new file mode 100644 (file)
index 0000000..4359fb0
--- /dev/null
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __TAG_QCA_H
+#define __TAG_QCA_H
+
+#define QCA_HDR_LEN    2
+#define QCA_HDR_VERSION        0x2
+
+#define QCA_HDR_RECV_VERSION           GENMASK(15, 14)
+#define QCA_HDR_RECV_PRIORITY          GENMASK(13, 11)
+#define QCA_HDR_RECV_TYPE              GENMASK(10, 6)
+#define QCA_HDR_RECV_FRAME_IS_TAGGED   BIT(3)
+#define QCA_HDR_RECV_SOURCE_PORT       GENMASK(2, 0)
+
+/* Packet type for recv */
+#define QCA_HDR_RECV_TYPE_NORMAL       0x0
+#define QCA_HDR_RECV_TYPE_MIB          0x1
+#define QCA_HDR_RECV_TYPE_RW_REG_ACK   0x2
+
+#define QCA_HDR_XMIT_VERSION           GENMASK(15, 14)
+#define QCA_HDR_XMIT_PRIORITY          GENMASK(13, 11)
+#define QCA_HDR_XMIT_CONTROL           GENMASK(10, 8)
+#define QCA_HDR_XMIT_FROM_CPU          BIT(7)
+#define QCA_HDR_XMIT_DP_BIT            GENMASK(6, 0)
+
+/* Packet type for xmit */
+#define QCA_HDR_XMIT_TYPE_NORMAL       0x0
+#define QCA_HDR_XMIT_TYPE_RW_REG       0x1
+
+/* Check code for a valid mgmt packet. Switch will ignore the packet
+ * with this wrong.
+ */
+#define QCA_HDR_MGMT_CHECK_CODE_VAL    0x5
+
+/* Specific define for in-band MDIO read/write with Ethernet packet */
+#define QCA_HDR_MGMT_SEQ_LEN           4 /* 4 byte for the seq */
+#define QCA_HDR_MGMT_COMMAND_LEN       4 /* 4 byte for the command */
+#define QCA_HDR_MGMT_DATA1_LEN         4 /* First 4 byte for the mdio data */
+#define QCA_HDR_MGMT_HEADER_LEN                (QCA_HDR_MGMT_SEQ_LEN + \
+                                       QCA_HDR_MGMT_COMMAND_LEN + \
+                                       QCA_HDR_MGMT_DATA1_LEN)
+
+#define QCA_HDR_MGMT_DATA2_LEN         12 /* Other 12 byte for the mdio data */
+#define QCA_HDR_MGMT_PADDING_LEN       34 /* Padding to reach the min Ethernet packet */
+
+#define QCA_HDR_MGMT_PKT_LEN           (QCA_HDR_MGMT_HEADER_LEN + \
+                                       QCA_HDR_LEN + \
+                                       QCA_HDR_MGMT_DATA2_LEN + \
+                                       QCA_HDR_MGMT_PADDING_LEN)
+
+#define QCA_HDR_MGMT_SEQ_NUM           GENMASK(31, 0)  /* 63, 32 */
+#define QCA_HDR_MGMT_CHECK_CODE                GENMASK(31, 29) /* 31, 29 */
+#define QCA_HDR_MGMT_CMD               BIT(28)         /* 28 */
+#define QCA_HDR_MGMT_LENGTH            GENMASK(23, 20) /* 23, 20 */
+#define QCA_HDR_MGMT_ADDR              GENMASK(18, 0)  /* 18, 0 */
+
+/* Special struct emulating a Ethernet header */
+struct qca_mgmt_ethhdr {
+       u32 command;            /* command bit 31:0 */
+       u32 seq;                /* seq 63:32 */
+       u32 mdio_data;          /* first 4byte mdio */
+       __be16 hdr;             /* qca hdr */
+} __packed;
+
+enum mdio_cmd {
+       MDIO_WRITE = 0x0,
+       MDIO_READ
+};
+
+struct mib_ethhdr {
+       u32 data[3];            /* first 3 mib counter */
+       __be16 hdr;             /* qca hdr */
+} __packed;
+
+struct qca_tagger_data {
+       void (*rw_reg_ack_handler)(struct dsa_switch *ds,
+                                  struct sk_buff *skb);
+       void (*mib_autocast_handler)(struct dsa_switch *ds,
+                                    struct sk_buff *skb);
+};
+
+#endif /* __TAG_QCA_H */
index 11efc45..e0853f4 100644 (file)
@@ -70,9 +70,11 @@ enum {
 /**
  * struct kernel_ethtool_ringparam - RX/TX ring configuration
  * @rx_buf_len: Current length of buffers on the rx ring.
+ * @tcp_data_split: Scatter packet headers and data to separate buffers
  */
 struct kernel_ethtool_ringparam {
        u32     rx_buf_len;
+       u8      tcp_data_split;
 };
 
 /**
index 71fa57b..d23e999 100644 (file)
@@ -1356,7 +1356,10 @@ struct bpf_sockopt_kern {
        s32             level;
        s32             optname;
        s32             optlen;
-       s32             retval;
+       /* for retval in struct bpf_cg_run_ctx */
+       struct task_struct *current_task;
+       /* Temporary "register" for indirect stores to ppos. */
+       u64             tmp_reg;
 };
 
 int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len);
index a59d25f..1e0f8a3 100644 (file)
@@ -371,19 +371,12 @@ static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk)
        return NULL;
 }
 
-static inline struct inet6_request_sock *
-                       inet6_rsk(const struct request_sock *rsk)
-{
-       return NULL;
-}
-
 static inline struct raw6_sock *raw6_sk(const struct sock *sk)
 {
        return NULL;
 }
 
 #define inet6_rcv_saddr(__sk)  NULL
-#define tcp_twsk_ipv6only(__sk)                0
 #define inet_v6_ipv6only(__sk)         0
 #endif /* IS_ENABLED(CONFIG_IPV6) */
 #endif /* _IPV6_H */
index f8397f3..15e0e02 100644 (file)
@@ -66,11 +66,6 @@ static inline void linkmode_mod_bit(int nr, volatile unsigned long *addr,
                linkmode_clear_bit(nr, addr);
 }
 
-static inline void linkmode_change_bit(int nr, volatile unsigned long *addr)
-{
-       __change_bit(nr, addr);
-}
-
 static inline int linkmode_test_bit(int nr, const volatile unsigned long *addr)
 {
        return test_bit(nr, addr);
index 12ea29e..5ee1308 100644 (file)
@@ -355,56 +355,6 @@ static inline u32 mii_adv_to_ethtool_adv_x(u32 adv)
 }
 
 /**
- * mii_lpa_mod_linkmode_adv_sgmii
- * @lp_advertising: pointer to destination link mode.
- * @lpa: value of the MII_LPA register
- *
- * A small helper function that translates MII_LPA bits to
- * linkmode advertisement settings for SGMII.
- * Leaves other bits unchanged.
- */
-static inline void
-mii_lpa_mod_linkmode_lpa_sgmii(unsigned long *lp_advertising, u32 lpa)
-{
-       u32 speed_duplex = lpa & LPA_SGMII_DPX_SPD_MASK;
-
-       linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, lp_advertising,
-                        speed_duplex == LPA_SGMII_1000HALF);
-
-       linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, lp_advertising,
-                        speed_duplex == LPA_SGMII_1000FULL);
-
-       linkmode_mod_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, lp_advertising,
-                        speed_duplex == LPA_SGMII_100HALF);
-
-       linkmode_mod_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, lp_advertising,
-                        speed_duplex == LPA_SGMII_100FULL);
-
-       linkmode_mod_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, lp_advertising,
-                        speed_duplex == LPA_SGMII_10HALF);
-
-       linkmode_mod_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, lp_advertising,
-                        speed_duplex == LPA_SGMII_10FULL);
-}
-
-/**
- * mii_lpa_to_linkmode_adv_sgmii
- * @advertising: pointer to destination link mode.
- * @lpa: value of the MII_LPA register
- *
- * A small helper function that translates MII_ADVERTISE bits
- * to linkmode advertisement settings when in SGMII mode.
- * Clears the old value of advertising.
- */
-static inline void mii_lpa_to_linkmode_lpa_sgmii(unsigned long *lp_advertising,
-                                                u32 lpa)
-{
-       linkmode_zero(lp_advertising);
-
-       mii_lpa_mod_linkmode_lpa_sgmii(lp_advertising, lpa);
-}
-
-/**
  * mii_adv_mod_linkmode_adv_t
  * @advertising:pointer to destination link mode.
  * @adv: value of the MII_ADVERTISE register
index 598ac3b..27145c4 100644 (file)
@@ -64,13 +64,6 @@ enum {
 };
 
 enum {
-       MLX5_MODIFY_TIR_BITMASK_LRO                   = 0x0,
-       MLX5_MODIFY_TIR_BITMASK_INDIRECT_TABLE        = 0x1,
-       MLX5_MODIFY_TIR_BITMASK_HASH                  = 0x2,
-       MLX5_MODIFY_TIR_BITMASK_TUNNELED_OFFLOAD_EN   = 0x3
-};
-
-enum {
        MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE        = 0x0,
        MLX5_SET_HCA_CAP_OP_MOD_ODP                   = 0x2,
        MLX5_SET_HCA_CAP_OP_MOD_ATOMIC                = 0x3,
index 1ec6318..bda1c38 100644 (file)
@@ -135,15 +135,6 @@ static inline void nl_set_extack_cookie_u64(struct netlink_ext_ack *extack,
        extack->cookie_len = sizeof(cookie);
 }
 
-static inline void nl_set_extack_cookie_u32(struct netlink_ext_ack *extack,
-                                           u32 cookie)
-{
-       if (!extack)
-               return;
-       memcpy(extack->cookie, &cookie, sizeof(cookie));
-       extack->cookie_len = sizeof(cookie);
-}
-
 void netlink_kernel_release(struct sock *sk);
 int __netlink_change_ngroups(struct sock *sk, unsigned int groups);
 int netlink_change_ngroups(struct sock *sk, unsigned int groups);
index add077a..266eb26 100644 (file)
@@ -31,8 +31,7 @@ void xpcs_link_up(struct phylink_pcs *pcs, unsigned int mode,
                  phy_interface_t interface, int speed, int duplex);
 int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface,
                   unsigned int mode);
-void xpcs_validate(struct dw_xpcs *xpcs, unsigned long *supported,
-                  struct phylink_link_state *state);
+void xpcs_get_interfaces(struct dw_xpcs *xpcs, unsigned long *interfaces);
 int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns,
                    int enable);
 struct dw_xpcs *xpcs_create(struct mdio_device *mdiodev,
index 6de8d7a..cd08cf1 100644 (file)
@@ -1661,7 +1661,7 @@ int phy_disable_interrupts(struct phy_device *phydev);
 void phy_request_interrupt(struct phy_device *phydev);
 void phy_free_interrupt(struct phy_device *phydev);
 void phy_print_status(struct phy_device *phydev);
-int phy_set_max_speed(struct phy_device *phydev, u32 max_speed);
+void phy_set_max_speed(struct phy_device *phydev, u32 max_speed);
 void phy_remove_link_mode(struct phy_device *phydev, u32 link_mode);
 void phy_advertise_supported(struct phy_device *phydev);
 void phy_support_sym_pause(struct phy_device *phydev);
index 8a636e6..a27bcc4 100644 (file)
@@ -557,6 +557,7 @@ struct skb_shared_info {
         * Warning : all fields before dataref are cleared in __alloc_skb()
         */
        atomic_t        dataref;
+       unsigned int    xdp_frags_size;
 
        /* Intermediate layers must ensure that destructor_arg
         * remains valid until skb destructor */
@@ -3898,11 +3899,6 @@ static inline ktime_t net_timedelta(ktime_t t)
        return ktime_sub(ktime_get_real(), t);
 }
 
-static inline ktime_t net_invalid_timestamp(void)
-{
-       return 0;
-}
-
 static inline u8 skb_metadata_len(const struct sk_buff *skb)
 {
        return skb_shinfo(skb)->meta_len;
index 571f605..382af90 100644 (file)
@@ -88,6 +88,7 @@ struct svc_xprt {
        struct list_head        xpt_users;      /* callbacks on free */
 
        struct net              *xpt_net;
+       netns_tracker           ns_tracker;
        const struct cred       *xpt_cred;
        struct rpc_xprt         *xpt_bc_xprt;   /* NFSv4.1 backchannel */
        struct rpc_xprt_switch  *xpt_bc_xps;    /* NFSv4.1 backchannel */
index 955ea4d..3cdc8d8 100644 (file)
@@ -284,6 +284,7 @@ struct rpc_xprt {
        } stat;
 
        struct net              *xprt_net;
+       netns_tracker           ns_tracker;
        const char              *servername;
        const char              *address_strings[RPC_DISPLAY_MAX];
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
index ae66dad..254a265 100644 (file)
@@ -23,11 +23,6 @@ static inline struct udphdr *udp_hdr(const struct sk_buff *skb)
        return (struct udphdr *)skb_transport_header(skb);
 }
 
-static inline struct udphdr *inner_udp_hdr(const struct sk_buff *skb)
-{
-       return (struct udphdr *)skb_inner_transport_header(skb);
-}
-
 #define UDP_HTABLE_SIZE_MIN            (CONFIG_BASE_SMALL ? 128 : 256)
 
 static inline u32 udp_hashfn(const struct net *net, u32 num, u32 mask)
index 8221af1..0f9790c 100644 (file)
@@ -187,18 +187,12 @@ typedef struct {
 
 typedef struct ax25_route {
        struct ax25_route       *next;
-       refcount_t              refcount;
        ax25_address            callsign;
        struct net_device       *dev;
        ax25_digi               *digipeat;
        char                    ip_mode;
 } ax25_route;
 
-static inline void ax25_hold_route(ax25_route *ax25_rt)
-{
-       refcount_inc(&ax25_rt->refcount);
-}
-
 void __ax25_put_route(ax25_route *ax25_rt);
 
 extern rwlock_t ax25_route_lock;
@@ -213,12 +207,6 @@ static inline void ax25_route_lock_unuse(void)
        read_unlock(&ax25_route_lock);
 }
 
-static inline void ax25_put_route(ax25_route *ax25_rt)
-{
-       if (refcount_dec_and_test(&ax25_rt->refcount))
-               __ax25_put_route(ax25_rt);
-}
-
 typedef struct {
        char                    slave;                  /* slave_mode?   */
        struct timer_list       slave_timer;            /* timeout timer */
index 586f69d..f5caff1 100644 (file)
@@ -258,6 +258,15 @@ struct adv_info {
 
 #define HCI_ADV_TX_POWER_NO_PREFERENCE 0x7F
 
+struct monitored_device {
+       struct list_head list;
+
+       bdaddr_t bdaddr;
+       __u8     addr_type;
+       __u16    handle;
+       bool     notified;
+};
+
 struct adv_pattern {
        struct list_head list;
        __u8 ad_type;
@@ -294,6 +303,9 @@ struct adv_monitor {
 
 #define HCI_MAX_SHORT_NAME_LENGTH      10
 
+#define HCI_CONN_HANDLE_UNSET          0xffff
+#define HCI_CONN_HANDLE_MAX            0x0eff
+
 /* Min encryption key size to match with SMP */
 #define HCI_MIN_ENC_KEY_SIZE           7
 
@@ -591,6 +603,9 @@ struct hci_dev {
 
        struct delayed_work     interleave_scan;
 
+       struct list_head        monitored_devices;
+       bool                    advmon_pend_notify;
+
 #if IS_ENABLED(CONFIG_BT_LEDS)
        struct led_trigger      *power_led;
 #endif
@@ -1847,6 +1862,8 @@ void mgmt_adv_monitor_removed(struct hci_dev *hdev, u16 handle);
 int mgmt_phy_configuration_changed(struct hci_dev *hdev, struct sock *skip);
 int mgmt_add_adv_patterns_monitor_complete(struct hci_dev *hdev, u8 status);
 int mgmt_remove_adv_monitor_complete(struct hci_dev *hdev, u8 status);
+void mgmt_adv_monitor_device_lost(struct hci_dev *hdev, u16 handle,
+                                 bdaddr_t *bdaddr, u8 addr_type);
 
 u8 hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency,
                      u16 to_multiplier);
index 107b25d..99266f7 100644 (file)
@@ -1104,3 +1104,19 @@ struct mgmt_ev_controller_resume {
 #define MGMT_WAKE_REASON_NON_BT_WAKE           0x0
 #define MGMT_WAKE_REASON_UNEXPECTED            0x1
 #define MGMT_WAKE_REASON_REMOTE_WAKE           0x2
+
+#define MGMT_EV_ADV_MONITOR_DEVICE_FOUND       0x002f
+struct mgmt_ev_adv_monitor_device_found {
+       __le16 monitor_handle;
+       struct mgmt_addr_info addr;
+       __s8   rssi;
+       __le32 flags;
+       __le16 eir_len;
+       __u8   eir[0];
+} __packed;
+
+#define MGMT_EV_ADV_MONITOR_DEVICE_LOST                0x0030
+struct mgmt_ev_adv_monitor_device_lost {
+       __le16 monitor_handle;
+       struct mgmt_addr_info addr;
+} __packed;
index 83cfd2d..7dead85 100644 (file)
@@ -699,20 +699,6 @@ static inline struct slave *bond_slave_has_mac(struct bonding *bond,
 }
 
 /* Caller must hold rcu_read_lock() for read */
-static inline struct slave *bond_slave_has_mac_rcu(struct bonding *bond,
-                                              const u8 *mac)
-{
-       struct list_head *iter;
-       struct slave *tmp;
-
-       bond_for_each_slave_rcu(bond, tmp, iter)
-               if (ether_addr_equal_64bits(mac, tmp->dev->dev_addr))
-                       return tmp;
-
-       return NULL;
-}
-
-/* Caller must hold rcu_read_lock() for read */
 static inline bool bond_slave_has_mac_rx(struct bonding *bond, const u8 *mac)
 {
        struct list_head *iter;
index 57b3e4e..ca8c14b 100644 (file)
@@ -278,6 +278,10 @@ struct dsa_port {
 
        u8                      devlink_port_setup:1;
 
+       /* Master state bits, valid only on CPU ports */
+       u8                      master_admin_up:1;
+       u8                      master_oper_up:1;
+
        u8                      setup:1;
 
        struct device_node      *dn;
@@ -478,6 +482,12 @@ static inline bool dsa_port_is_unused(struct dsa_port *dp)
        return dp->type == DSA_PORT_TYPE_UNUSED;
 }
 
+static inline bool dsa_port_master_is_operational(struct dsa_port *dp)
+{
+       return dsa_port_is_cpu(dp) && dp->master_admin_up &&
+              dp->master_oper_up;
+}
+
 static inline bool dsa_is_unused_port(struct dsa_switch *ds, int p)
 {
        return dsa_to_port(ds, p)->type == DSA_PORT_TYPE_UNUSED;
@@ -581,6 +591,24 @@ static inline bool dsa_is_upstream_port(struct dsa_switch *ds, int port)
        return port == dsa_upstream_port(ds, port);
 }
 
+/* Return true if this is a DSA port leading away from the CPU */
+static inline bool dsa_is_downstream_port(struct dsa_switch *ds, int port)
+{
+       return dsa_is_dsa_port(ds, port) && !dsa_is_upstream_port(ds, port);
+}
+
+/* Return the local port used to reach the CPU port */
+static inline unsigned int dsa_switch_upstream_port(struct dsa_switch *ds)
+{
+       struct dsa_port *dp;
+
+       dsa_switch_for_each_available_port(dp, ds) {
+               return dsa_upstream_port(ds, dp->index);
+       }
+
+       return ds->num_ports;
+}
+
 /* Return true if @upstream_ds is an upstream switch of @downstream_ds, meaning
  * that the routing port from @downstream_ds to @upstream_ds is also the port
  * which @downstream_ds uses to reach its dedicated CPU.
@@ -1036,6 +1064,13 @@ struct dsa_switch_ops {
        int     (*tag_8021q_vlan_add)(struct dsa_switch *ds, int port, u16 vid,
                                      u16 flags);
        int     (*tag_8021q_vlan_del)(struct dsa_switch *ds, int port, u16 vid);
+
+       /*
+        * DSA master tracking operations
+        */
+       void    (*master_state_change)(struct dsa_switch *ds,
+                                      const struct net_device *master,
+                                      bool operational);
 };
 
 #define DSA_DEVLINK_PARAM_DRIVER(_id, _name, _type, _cmodes)           \
index 4ad47d9..3908296 100644 (file)
@@ -285,6 +285,14 @@ static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk)
 bool inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req);
 void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req);
 
+static inline unsigned long
+reqsk_timeout(struct request_sock *req, unsigned long max_timeout)
+{
+       u64 timeout = (u64)req->timeout << req->num_timeout;
+
+       return (unsigned long)min_t(u64, timeout, max_timeout);
+}
+
 static inline void inet_csk_prepare_for_destroy_sock(struct sock *sk)
 {
        /* The below has to be done to allow calling inet_csk_destroy_sock */
index dfd919b..463ae5d 100644 (file)
@@ -65,13 +65,13 @@ struct inet_timewait_sock {
        /* these three are in inet_sock */
        __be16                  tw_sport;
        /* And these are ours. */
-       unsigned int            tw_kill         : 1,
-                               tw_transparent  : 1,
+       unsigned int            tw_transparent  : 1,
                                tw_flowlabel    : 20,
-                               tw_pad          : 2,    /* 2 bits hole */
+                               tw_pad          : 3,    /* 3 bits hole */
                                tw_tos          : 8;
        u32                     tw_txhash;
        u32                     tw_priority;
+       u32                     tw_bslot; /* bind bucket slot */
        struct timer_list       tw_timer;
        struct inet_bind_bucket *tw_tb;
 };
@@ -110,8 +110,6 @@ static inline void inet_twsk_reschedule(struct inet_timewait_sock *tw, int timeo
 
 void inet_twsk_deschedule_put(struct inet_timewait_sock *tw);
 
-void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family);
-
 static inline
 struct net *twsk_net(const struct inet_timewait_sock *twsk)
 {
index b51bae4..3984f2c 100644 (file)
@@ -517,7 +517,6 @@ void ip_dst_metrics_put(struct dst_entry *dst)
                kfree(p);
 }
 
-u32 ip_idents_reserve(u32 hash, int segs);
 void __ip_select_ident(struct net *net, struct iphdr *iph, int segs);
 
 static inline void ip_select_ident_segs(struct net *net, struct sk_buff *skb,
@@ -712,7 +711,7 @@ int ip_forward(struct sk_buff *skb);
  */
 
 void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
-                     __be32 daddr, struct rtable *rt, int is_frag);
+                     __be32 daddr, struct rtable *rt);
 
 int __ip_options_echo(struct net *net, struct ip_options *dopt,
                      struct sk_buff *skb, const struct ip_options *sopt);
index 3afcb12..082f302 100644 (file)
@@ -437,8 +437,16 @@ struct ipv6_txoptions *ipv6_renew_options(struct sock *sk,
                                          struct ipv6_txoptions *opt,
                                          int newtype,
                                          struct ipv6_opt_hdr *newopt);
-struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space,
-                                         struct ipv6_txoptions *opt);
+struct ipv6_txoptions *__ipv6_fixup_options(struct ipv6_txoptions *opt_space,
+                                           struct ipv6_txoptions *opt);
+
+static inline struct ipv6_txoptions *
+ipv6_fixup_options(struct ipv6_txoptions *opt_space, struct ipv6_txoptions *opt)
+{
+       if (!opt)
+               return NULL;
+       return __ipv6_fixup_options(opt_space, opt);
+}
 
 bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb,
                       const struct inet6_skb_parm *opt);
@@ -1020,7 +1028,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
                             int getfrag(void *from, char *to, int offset,
                                         int len, int odd, struct sk_buff *skb),
                             void *from, int length, int transhdrlen,
-                            struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
+                            struct ipcm6_cookie *ipc6,
                             struct rt6_info *rt, unsigned int flags,
                             struct inet_cork_full *cork);
 
diff --git a/include/net/netfilter/nf_conntrack_bpf.h b/include/net/netfilter/nf_conntrack_bpf.h
new file mode 100644 (file)
index 0000000..a473b56
--- /dev/null
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _NF_CONNTRACK_BPF_H
+#define _NF_CONNTRACK_BPF_H
+
+#include <linux/btf.h>
+#include <linux/kconfig.h>
+
+#if (IS_BUILTIN(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \
+    (IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES))
+
+extern int register_nf_conntrack_bpf(void);
+
+#else
+
+static inline int register_nf_conntrack_bpf(void)
+{
+       return 0;
+}
+
+#endif
+
+#endif /* _NF_CONNTRACK_BPF_H */
index 552bc25..388244e 100644 (file)
@@ -10,6 +10,7 @@ struct netns_core {
        struct ctl_table_header *sysctl_hdr;
 
        int     sysctl_somaxconn;
+       u8      sysctl_txrehash;
 
 #ifdef CONFIG_PROC_FS
        struct prot_inuse __percpu *prot_inuse;
index 7855764..f068786 100644 (file)
@@ -31,18 +31,16 @@ struct ping_group_range {
 struct inet_hashinfo;
 
 struct inet_timewait_death_row {
-       atomic_t                tw_count;
-       char                    tw_pad[L1_CACHE_BYTES - sizeof(atomic_t)];
+       refcount_t              tw_refcount;
 
-       struct inet_hashinfo    *hashinfo;
+       struct inet_hashinfo    *hashinfo ____cacheline_aligned_in_smp;
        int                     sysctl_max_tw_buckets;
 };
 
 struct tcp_fastopen_context;
 
 struct netns_ipv4 {
-       /* Please keep tcp_death_row at first field in netns_ipv4 */
-       struct inet_timewait_death_row tcp_death_row ____cacheline_aligned_in_smp;
+       struct inet_timewait_death_row *tcp_death_row;
 
 #ifdef CONFIG_SYSCTL
        struct ctl_table_header *forw_hdr;
@@ -70,11 +68,9 @@ struct netns_ipv4 {
        struct hlist_head       *fib_table_hash;
        struct sock             *fibnl;
 
-       struct sock  * __percpu *icmp_sk;
        struct sock             *mc_autojoin_sk;
 
        struct inet_peer_base   *peers;
-       struct sock  * __percpu *tcp_sk;
        struct fqdir            *fqdir;
 
        u8 sysctl_icmp_echo_ignore_all;
@@ -87,6 +83,7 @@ struct netns_ipv4 {
 
        u32 ip_rt_min_pmtu;
        int ip_rt_mtu_expires;
+       int ip_rt_min_advmss;
 
        struct local_ports ip_local_ports;
 
index a4b5503..30cdfc4 100644 (file)
@@ -88,7 +88,6 @@ struct netns_ipv6 {
        struct fib6_table       *fib6_local_tbl;
        struct fib_rules_ops    *fib6_rules_ops;
 #endif
-       struct sock * __percpu  *icmp_sk;
        struct sock             *ndisc_sk;
        struct sock             *tcp_sk;
        struct sock             *igmp_sk;
index 79a8055..97c3c19 100644 (file)
@@ -201,21 +201,67 @@ static inline void page_pool_put_page_bulk(struct page_pool *pool, void **data,
 }
 #endif
 
-void page_pool_put_page(struct page_pool *pool, struct page *page,
-                       unsigned int dma_sync_size, bool allow_direct);
+void page_pool_put_defragged_page(struct page_pool *pool, struct page *page,
+                                 unsigned int dma_sync_size,
+                                 bool allow_direct);
 
-/* Same as above but will try to sync the entire area pool->max_len */
-static inline void page_pool_put_full_page(struct page_pool *pool,
-                                          struct page *page, bool allow_direct)
+static inline void page_pool_fragment_page(struct page *page, long nr)
+{
+       atomic_long_set(&page->pp_frag_count, nr);
+}
+
+static inline long page_pool_defrag_page(struct page *page, long nr)
+{
+       long ret;
+
+       /* If nr == pp_frag_count then we have cleared all remaining
+        * references to the page. No need to actually overwrite it, instead
+        * we can leave this to be overwritten by the calling function.
+        *
+        * The main advantage to doing this is that an atomic_read is
+        * generally a much cheaper operation than an atomic update,
+        * especially when dealing with a page that may be partitioned
+        * into only 2 or 3 pieces.
+        */
+       if (atomic_long_read(&page->pp_frag_count) == nr)
+               return 0;
+
+       ret = atomic_long_sub_return(nr, &page->pp_frag_count);
+       WARN_ON(ret < 0);
+       return ret;
+}
+
+static inline bool page_pool_is_last_frag(struct page_pool *pool,
+                                         struct page *page)
+{
+       /* If fragments aren't enabled or count is 0 we were the last user */
+       return !(pool->p.flags & PP_FLAG_PAGE_FRAG) ||
+              (page_pool_defrag_page(page, 1) == 0);
+}
+
+static inline void page_pool_put_page(struct page_pool *pool,
+                                     struct page *page,
+                                     unsigned int dma_sync_size,
+                                     bool allow_direct)
 {
        /* When page_pool isn't compiled-in, net/core/xdp.c doesn't
         * allow registering MEM_TYPE_PAGE_POOL, but shield linker.
         */
 #ifdef CONFIG_PAGE_POOL
-       page_pool_put_page(pool, page, -1, allow_direct);
+       if (!page_pool_is_last_frag(pool, page))
+               return;
+
+       page_pool_put_defragged_page(pool, page, dma_sync_size, allow_direct);
 #endif
 }
 
+/* Same as above but will try to sync the entire area pool->max_len */
+static inline void page_pool_put_full_page(struct page_pool *pool,
+                                          struct page *page, bool allow_direct)
+{
+       page_pool_put_page(pool, page, -1, allow_direct);
+}
+
 /* Same as above but the caller must guarantee safe context. e.g NAPI */
 static inline void page_pool_recycle_direct(struct page_pool *pool,
                                            struct page *page)
@@ -243,30 +289,6 @@ static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
                page->dma_addr_upper = upper_32_bits(addr);
 }
 
-static inline void page_pool_set_frag_count(struct page *page, long nr)
-{
-       atomic_long_set(&page->pp_frag_count, nr);
-}
-
-static inline long page_pool_atomic_sub_frag_count_return(struct page *page,
-                                                         long nr)
-{
-       long ret;
-
-       /* As suggested by Alexander, atomic_long_read() may cover up the
-        * reference count errors, so avoid calling atomic_long_read() in
-        * the cases of freeing or draining the page_frags, where we would
-        * not expect it to match or that are slowpath anyway.
-        */
-       if (__builtin_constant_p(nr) &&
-           atomic_long_read(&page->pp_frag_count) == nr)
-               return 0;
-
-       ret = atomic_long_sub_return(nr, &page->pp_frag_count);
-       WARN_ON(ret < 0);
-       return ret;
-}
-
 static inline bool is_page_pool_compiled_in(void)
 {
 #ifdef CONFIG_PAGE_POOL
index 9e7b21c..44a3553 100644 (file)
@@ -63,12 +63,6 @@ static inline psched_time_t psched_get_time(void)
        return PSCHED_NS2TICKS(ktime_get_ns());
 }
 
-static inline psched_tdiff_t
-psched_tdiff_bounded(psched_time_t tv1, psched_time_t tv2, psched_time_t bound)
-{
-       return min(tv1 - tv2, bound);
-}
-
 struct qdisc_watchdog {
        u64             last_expires;
        struct hrtimer  timer;
index 29e41ff..144c39d 100644 (file)
@@ -70,6 +70,7 @@ struct request_sock {
        struct saved_syn                *saved_syn;
        u32                             secid;
        u32                             peer_secid;
+       u32                             timeout;
 };
 
 static inline struct request_sock *inet_reqsk(const struct sock *sk)
@@ -104,6 +105,7 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener,
        sk_node_init(&req_to_sk(req)->sk_node);
        sk_tx_queue_clear(req_to_sk(req));
        req->saved_syn = NULL;
+       req->timeout = 0;
        req->num_timeout = 0;
        req->num_retrans = 0;
        req->sk = NULL;
index 472843e..9bab396 100644 (file)
@@ -518,11 +518,6 @@ static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz)
        BUILD_BUG_ON(sizeof(qcb->data) < sz);
 }
 
-static inline int qdisc_qlen_cpu(const struct Qdisc *q)
-{
-       return this_cpu_ptr(q->cpu_qstats)->qlen;
-}
-
 static inline int qdisc_qlen(const struct Qdisc *q)
 {
        return q->q.qlen;
index ff9b508..d6c13f0 100644 (file)
@@ -316,6 +316,7 @@ struct sk_filter;
   *    @sk_rcvtimeo: %SO_RCVTIMEO setting
   *    @sk_sndtimeo: %SO_SNDTIMEO setting
   *    @sk_txhash: computed flow hash for use on transmit
+  *    @sk_txrehash: enable TX hash rethink
   *    @sk_filter: socket filtering instructions
   *    @sk_timer: sock cleanup timer
   *    @sk_stamp: time stamp of last packet received
@@ -491,6 +492,7 @@ struct sock {
        u32                     sk_ack_backlog;
        u32                     sk_max_ack_backlog;
        kuid_t                  sk_uid;
+       u8                      sk_txrehash;
 #ifdef CONFIG_NET_RX_BUSY_POLL
        u8                      sk_prefer_busy_poll;
        u16                     sk_busy_poll_budget;
@@ -587,6 +589,18 @@ static inline bool sk_user_data_is_nocopy(const struct sock *sk)
                           __tmp | SK_USER_DATA_NOCOPY);                \
 })
 
+static inline
+struct net *sock_net(const struct sock *sk)
+{
+       return read_pnet(&sk->sk_net);
+}
+
+static inline
+void sock_net_set(struct sock *sk, struct net *net)
+{
+       write_pnet(&sk->sk_net, net);
+}
+
 /*
  * SK_CAN_REUSE and SK_NO_REUSE on a socket mean that the socket is OK
  * or not whether his port will be reused by someone else. SK_FORCE_REUSE
@@ -2054,7 +2068,7 @@ static inline void sk_set_txhash(struct sock *sk)
 
 static inline bool sk_rethink_txhash(struct sock *sk)
 {
-       if (sk->sk_txhash) {
+       if (sk->sk_txhash && sk->sk_txrehash == SOCK_TXREHASH_ENABLED) {
                sk_set_txhash(sk);
                return true;
        }
@@ -2704,18 +2718,6 @@ static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb)
        __kfree_skb(skb);
 }
 
-static inline
-struct net *sock_net(const struct sock *sk)
-{
-       return read_pnet(&sk->sk_net);
-}
-
-static inline
-void sock_net_set(struct sock *sk, struct net *net)
-{
-       write_pnet(&sk->sk_net, net);
-}
-
 static inline bool
 skb_sk_is_prefetched(struct sk_buff *skb)
 {
index b9fc978..eff2487 100644 (file)
@@ -2358,7 +2358,7 @@ static inline u32 tcp_timeout_init(struct sock *sk)
 
        if (timeout <= 0)
                timeout = TCP_TIMEOUT_INIT;
-       return timeout;
+       return min_t(int, timeout, TCP_RTO_MAX);
 }
 
 static inline u32 tcp_rwnd_init_bpf(struct sock *sk)
index 9185e45..a3c5311 100644 (file)
@@ -70,49 +70,6 @@ static inline int udplite_checksum_init(struct sk_buff *skb, struct udphdr *uh)
        return 0;
 }
 
-/* Slow-path computation of checksum. Socket is locked. */
-static inline __wsum udplite_csum_outgoing(struct sock *sk, struct sk_buff *skb)
-{
-       const struct udp_sock *up = udp_sk(skb->sk);
-       int cscov = up->len;
-       __wsum csum = 0;
-
-       if (up->pcflag & UDPLITE_SEND_CC) {
-               /*
-                * Sender has set `partial coverage' option on UDP-Lite socket.
-                * The special case "up->pcslen == 0" signifies full coverage.
-                */
-               if (up->pcslen < up->len) {
-                       if (0 < up->pcslen)
-                               cscov = up->pcslen;
-                       udp_hdr(skb)->len = htons(up->pcslen);
-               }
-               /*
-                * NOTE: Causes for the error case  `up->pcslen > up->len':
-                *        (i)  Application error (will not be penalized).
-                *       (ii)  Payload too big for send buffer: data is split
-                *             into several packets, each with its own header.
-                *             In this case (e.g. last segment), coverage may
-                *             exceed packet length.
-                *       Since packets with coverage length > packet length are
-                *       illegal, we fall back to the defaults here.
-                */
-       }
-
-       skb->ip_summed = CHECKSUM_NONE;     /* no HW support for checksumming */
-
-       skb_queue_walk(&sk->sk_write_queue, skb) {
-               const int off = skb_transport_offset(skb);
-               const int len = skb->len - off;
-
-               csum = skb_checksum(skb, off, (cscov > len)? len : cscov, csum);
-
-               if ((cscov -= len) <= 0)
-                       break;
-       }
-       return csum;
-}
-
 /* Fast-path computation of checksum. Socket may not be locked. */
 static inline __wsum udplite_csum(struct sk_buff *skb)
 {
index 8f0812e..b7721c3 100644 (file)
@@ -60,12 +60,20 @@ struct xdp_rxq_info {
        u32 reg_state;
        struct xdp_mem_info mem;
        unsigned int napi_id;
+       u32 frag_size;
 } ____cacheline_aligned; /* perf critical, avoid false-sharing */
 
 struct xdp_txq_info {
        struct net_device *dev;
 };
 
+enum xdp_buff_flags {
+       XDP_FLAGS_HAS_FRAGS             = BIT(0), /* non-linear xdp buff */
+       XDP_FLAGS_FRAGS_PF_MEMALLOC     = BIT(1), /* xdp paged memory is under
+                                                  * pressure
+                                                  */
+};
+
 struct xdp_buff {
        void *data;
        void *data_end;
@@ -74,13 +82,40 @@ struct xdp_buff {
        struct xdp_rxq_info *rxq;
        struct xdp_txq_info *txq;
        u32 frame_sz; /* frame size to deduce data_hard_end/reserved tailroom*/
+       u32 flags; /* supported values defined in xdp_buff_flags */
 };
 
+static __always_inline bool xdp_buff_has_frags(struct xdp_buff *xdp)
+{
+       return !!(xdp->flags & XDP_FLAGS_HAS_FRAGS);
+}
+
+static __always_inline void xdp_buff_set_frags_flag(struct xdp_buff *xdp)
+{
+       xdp->flags |= XDP_FLAGS_HAS_FRAGS;
+}
+
+static __always_inline void xdp_buff_clear_frags_flag(struct xdp_buff *xdp)
+{
+       xdp->flags &= ~XDP_FLAGS_HAS_FRAGS;
+}
+
+static __always_inline bool xdp_buff_is_frag_pfmemalloc(struct xdp_buff *xdp)
+{
+       return !!(xdp->flags & XDP_FLAGS_FRAGS_PF_MEMALLOC);
+}
+
+static __always_inline void xdp_buff_set_frag_pfmemalloc(struct xdp_buff *xdp)
+{
+       xdp->flags |= XDP_FLAGS_FRAGS_PF_MEMALLOC;
+}
+
 static __always_inline void
 xdp_init_buff(struct xdp_buff *xdp, u32 frame_sz, struct xdp_rxq_info *rxq)
 {
        xdp->frame_sz = frame_sz;
        xdp->rxq = rxq;
+       xdp->flags = 0;
 }
 
 static __always_inline void
@@ -111,6 +146,20 @@ xdp_get_shared_info_from_buff(struct xdp_buff *xdp)
        return (struct skb_shared_info *)xdp_data_hard_end(xdp);
 }
 
+static __always_inline unsigned int xdp_get_buff_len(struct xdp_buff *xdp)
+{
+       unsigned int len = xdp->data_end - xdp->data;
+       struct skb_shared_info *sinfo;
+
+       if (likely(!xdp_buff_has_frags(xdp)))
+               goto out;
+
+       sinfo = xdp_get_shared_info_from_buff(xdp);
+       len += sinfo->xdp_frags_size;
+out:
+       return len;
+}
+
 struct xdp_frame {
        void *data;
        u16 len;
@@ -122,8 +171,19 @@ struct xdp_frame {
         */
        struct xdp_mem_info mem;
        struct net_device *dev_rx; /* used by cpumap */
+       u32 flags; /* supported values defined in xdp_buff_flags */
 };
 
+static __always_inline bool xdp_frame_has_frags(struct xdp_frame *frame)
+{
+       return !!(frame->flags & XDP_FLAGS_HAS_FRAGS);
+}
+
+static __always_inline bool xdp_frame_is_frag_pfmemalloc(struct xdp_frame *frame)
+{
+       return !!(frame->flags & XDP_FLAGS_FRAGS_PF_MEMALLOC);
+}
+
 #define XDP_BULK_QUEUE_SIZE    16
 struct xdp_frame_bulk {
        int count;
@@ -159,6 +219,19 @@ static inline void xdp_scrub_frame(struct xdp_frame *frame)
        frame->dev_rx = NULL;
 }
 
+static inline void
+xdp_update_skb_shared_info(struct sk_buff *skb, u8 nr_frags,
+                          unsigned int size, unsigned int truesize,
+                          bool pfmemalloc)
+{
+       skb_shinfo(skb)->nr_frags = nr_frags;
+
+       skb->len += size;
+       skb->data_len += size;
+       skb->truesize += truesize;
+       skb->pfmemalloc |= pfmemalloc;
+}
+
 /* Avoids inlining WARN macro in fast-path */
 void xdp_warn(const char *msg, const char *func, const int line);
 #define XDP_WARN(msg) xdp_warn(msg, __func__, __LINE__)
@@ -180,6 +253,7 @@ void xdp_convert_frame_to_buff(struct xdp_frame *frame, struct xdp_buff *xdp)
        xdp->data_end = frame->data + frame->len;
        xdp->data_meta = frame->data - frame->metasize;
        xdp->frame_sz = frame->frame_sz;
+       xdp->flags = frame->flags;
 }
 
 static inline
@@ -206,6 +280,7 @@ int xdp_update_frame_from_buff(struct xdp_buff *xdp,
        xdp_frame->headroom = headroom - sizeof(*xdp_frame);
        xdp_frame->metasize = metasize;
        xdp_frame->frame_sz = xdp->frame_sz;
+       xdp_frame->flags = xdp->flags;
 
        return 0;
 }
@@ -230,6 +305,8 @@ struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff *xdp)
        return xdp_frame;
 }
 
+void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
+                 struct xdp_buff *xdp);
 void xdp_return_frame(struct xdp_frame *xdpf);
 void xdp_return_frame_rx_napi(struct xdp_frame *xdpf);
 void xdp_return_buff(struct xdp_buff *xdp);
@@ -246,14 +323,37 @@ void __xdp_release_frame(void *data, struct xdp_mem_info *mem);
 static inline void xdp_release_frame(struct xdp_frame *xdpf)
 {
        struct xdp_mem_info *mem = &xdpf->mem;
+       struct skb_shared_info *sinfo;
+       int i;
 
        /* Curr only page_pool needs this */
-       if (mem->type == MEM_TYPE_PAGE_POOL)
-               __xdp_release_frame(xdpf->data, mem);
+       if (mem->type != MEM_TYPE_PAGE_POOL)
+               return;
+
+       if (likely(!xdp_frame_has_frags(xdpf)))
+               goto out;
+
+       sinfo = xdp_get_shared_info_from_frame(xdpf);
+       for (i = 0; i < sinfo->nr_frags; i++) {
+               struct page *page = skb_frag_page(&sinfo->frags[i]);
+
+               __xdp_release_frame(page_address(page), mem);
+       }
+out:
+       __xdp_release_frame(xdpf->data, mem);
+}
+
+int __xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
+                      struct net_device *dev, u32 queue_index,
+                      unsigned int napi_id, u32 frag_size);
+static inline int
+xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
+                struct net_device *dev, u32 queue_index,
+                unsigned int napi_id)
+{
+       return __xdp_rxq_info_reg(xdp_rxq, dev, queue_index, napi_id, 0);
 }
 
-int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
-                    struct net_device *dev, u32 queue_index, unsigned int napi_id);
 void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq);
 void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq);
 bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq);
index c77a131..467ca2f 100644 (file)
 
 #define SO_RESERVE_MEM         73
 
+#define SO_TXREHASH            74
+
 #if !defined(__KERNEL__)
 
 #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
index b0383d3..16a7574 100644 (file)
@@ -330,6 +330,8 @@ union bpf_iter_link_info {
  *                     *ctx_out*, *data_in* and *data_out* must be NULL.
  *                     *repeat* must be zero.
  *
+ *             BPF_PROG_RUN is an alias for BPF_PROG_TEST_RUN.
+ *
  *     Return
  *             Returns zero on success. On error, -1 is returned and *errno*
  *             is set appropriately.
@@ -1111,6 +1113,11 @@ enum bpf_link_type {
  */
 #define BPF_F_SLEEPABLE                (1U << 4)
 
+/* If BPF_F_XDP_HAS_FRAGS is used in BPF_PROG_LOAD command, the loaded program
+ * fully support xdp frags.
+ */
+#define BPF_F_XDP_HAS_FRAGS    (1U << 5)
+
 /* When BPF ldimm64's insn[0].src_reg != 0 then this can have
  * the following extensions:
  *
@@ -1775,6 +1782,8 @@ union bpf_attr {
  *             0 on success, or a negative error in case of failure.
  *
  * u64 bpf_get_current_pid_tgid(void)
+ *     Description
+ *             Get the current pid and tgid.
  *     Return
  *             A 64-bit integer containing the current tgid and pid, and
  *             created as such:
@@ -1782,6 +1791,8 @@ union bpf_attr {
  *             *current_task*\ **->pid**.
  *
  * u64 bpf_get_current_uid_gid(void)
+ *     Description
+ *             Get the current uid and gid.
  *     Return
  *             A 64-bit integer containing the current GID and UID, and
  *             created as such: *current_gid* **<< 32 \|** *current_uid*.
@@ -2256,6 +2267,8 @@ union bpf_attr {
  *             The 32-bit hash.
  *
  * u64 bpf_get_current_task(void)
+ *     Description
+ *             Get the current task.
  *     Return
  *             A pointer to the current task struct.
  *
@@ -2369,6 +2382,8 @@ union bpf_attr {
  *             indicate that the hash is outdated and to trigger a
  *             recalculation the next time the kernel tries to access this
  *             hash or when the **bpf_get_hash_recalc**\ () helper is called.
+ *     Return
+ *             void.
  *
  * long bpf_get_numa_node_id(void)
  *     Description
@@ -2466,6 +2481,8 @@ union bpf_attr {
  *             A 8-byte long unique number or 0 if *sk* is NULL.
  *
  * u32 bpf_get_socket_uid(struct sk_buff *skb)
+ *     Description
+ *             Get the owner UID of the socked associated to *skb*.
  *     Return
  *             The owner UID of the socket associated to *skb*. If the socket
  *             is **NULL**, or if it is not a full socket (i.e. if it is a
@@ -3240,6 +3257,9 @@ union bpf_attr {
  *             The id is returned or 0 in case the id could not be retrieved.
  *
  * u64 bpf_get_current_cgroup_id(void)
+ *     Description
+ *             Get the current cgroup id based on the cgroup within which
+ *             the current task is running.
  *     Return
  *             A 64-bit integer containing the current cgroup id based
  *             on the cgroup within which the current task is running.
@@ -5018,6 +5038,44 @@ union bpf_attr {
  *
  *     Return
  *             The number of arguments of the traced function.
+ *
+ * int bpf_get_retval(void)
+ *     Description
+ *             Get the syscall's return value that will be returned to userspace.
+ *
+ *             This helper is currently supported by cgroup programs only.
+ *     Return
+ *             The syscall's return value.
+ *
+ * int bpf_set_retval(int retval)
+ *     Description
+ *             Set the syscall's return value that will be returned to userspace.
+ *
+ *             This helper is currently supported by cgroup programs only.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * u64 bpf_xdp_get_buff_len(struct xdp_buff *xdp_md)
+ *     Description
+ *             Get the total size of a given xdp buff (linear and paged area)
+ *     Return
+ *             The total size of a given xdp buffer.
+ *
+ * long bpf_xdp_load_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len)
+ *     Description
+ *             This helper is provided as an easy way to load data from a
+ *             xdp buffer. It can be used to load *len* bytes from *offset* from
+ *             the frame associated to *xdp_md*, into the buffer pointed by
+ *             *buf*.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * long bpf_xdp_store_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len)
+ *     Description
+ *             Store *len* bytes from buffer *buf* into the frame
+ *             associated to *xdp_md*, at *offset*.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)          \
        FN(unspec),                     \
@@ -5206,6 +5264,11 @@ union bpf_attr {
        FN(get_func_arg),               \
        FN(get_func_ret),               \
        FN(get_func_arg_cnt),           \
+       FN(get_retval),                 \
+       FN(set_retval),                 \
+       FN(xdp_get_buff_len),           \
+       FN(xdp_load_bytes),             \
+       FN(xdp_store_bytes),            \
        /* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
index cca6e47..417d428 100644 (file)
@@ -319,6 +319,12 @@ enum {
 /* RINGS */
 
 enum {
+       ETHTOOL_TCP_DATA_SPLIT_UNKNOWN = 0,
+       ETHTOOL_TCP_DATA_SPLIT_DISABLED,
+       ETHTOOL_TCP_DATA_SPLIT_ENABLED,
+};
+
+enum {
        ETHTOOL_A_RINGS_UNSPEC,
        ETHTOOL_A_RINGS_HEADER,                         /* nest - _A_HEADER_* */
        ETHTOOL_A_RINGS_RX_MAX,                         /* u32 */
@@ -330,6 +336,7 @@ enum {
        ETHTOOL_A_RINGS_RX_JUMBO,                       /* u32 */
        ETHTOOL_A_RINGS_TX,                             /* u32 */
        ETHTOOL_A_RINGS_RX_BUF_LEN,                     /* u32 */
+       ETHTOOL_A_RINGS_TCP_DATA_SPLIT,                 /* u8 */
 
        /* add new constants above here */
        __ETHTOOL_A_RINGS_CNT,
index eb0a9a5..51d6bb2 100644 (file)
@@ -31,4 +31,8 @@ struct __kernel_sockaddr_storage {
 
 #define SOCK_BUF_LOCK_MASK (SOCK_SNDBUF_LOCK | SOCK_RCVBUF_LOCK)
 
+#define SOCK_TXREHASH_DEFAULT  ((u8)-1)
+#define SOCK_TXREHASH_DISABLED 0
+#define SOCK_TXREHASH_ENABLED  1
+
 #endif /* _UAPI_LINUX_SOCKET_H */
index c7a5be3..7f145ae 100644 (file)
@@ -837,13 +837,12 @@ static int fd_array_map_delete_elem(struct bpf_map *map, void *key)
 static void *prog_fd_array_get_ptr(struct bpf_map *map,
                                   struct file *map_file, int fd)
 {
-       struct bpf_array *array = container_of(map, struct bpf_array, map);
        struct bpf_prog *prog = bpf_prog_get(fd);
 
        if (IS_ERR(prog))
                return prog;
 
-       if (!bpf_prog_array_compatible(array, prog)) {
+       if (!bpf_prog_map_compatible(map, prog)) {
                bpf_prog_put(prog);
                return ERR_PTR(-EINVAL);
        }
@@ -1071,7 +1070,6 @@ static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr)
        INIT_WORK(&aux->work, prog_array_map_clear_deferred);
        INIT_LIST_HEAD(&aux->poke_progs);
        mutex_init(&aux->poke_mutex);
-       spin_lock_init(&aux->owner.lock);
 
        map = array_map_alloc(attr);
        if (IS_ERR(map)) {
index e16dafe..a1c44c1 100644 (file)
 DEFINE_IDR(btf_idr);
 DEFINE_SPINLOCK(btf_idr_lock);
 
+enum btf_kfunc_hook {
+       BTF_KFUNC_HOOK_XDP,
+       BTF_KFUNC_HOOK_TC,
+       BTF_KFUNC_HOOK_STRUCT_OPS,
+       BTF_KFUNC_HOOK_MAX,
+};
+
+enum {
+       BTF_KFUNC_SET_MAX_CNT = 32,
+};
+
+struct btf_kfunc_set_tab {
+       struct btf_id_set *sets[BTF_KFUNC_HOOK_MAX][BTF_KFUNC_TYPE_MAX];
+};
+
 struct btf {
        void *data;
        struct btf_type **types;
@@ -212,6 +227,7 @@ struct btf {
        refcount_t refcnt;
        u32 id;
        struct rcu_head rcu;
+       struct btf_kfunc_set_tab *kfunc_set_tab;
 
        /* split BTF support */
        struct btf *base_btf;
@@ -1531,8 +1547,30 @@ static void btf_free_id(struct btf *btf)
        spin_unlock_irqrestore(&btf_idr_lock, flags);
 }
 
+static void btf_free_kfunc_set_tab(struct btf *btf)
+{
+       struct btf_kfunc_set_tab *tab = btf->kfunc_set_tab;
+       int hook, type;
+
+       if (!tab)
+               return;
+       /* For module BTF, we directly assign the sets being registered, so
+        * there is nothing to free except kfunc_set_tab.
+        */
+       if (btf_is_module(btf))
+               goto free_tab;
+       for (hook = 0; hook < ARRAY_SIZE(tab->sets); hook++) {
+               for (type = 0; type < ARRAY_SIZE(tab->sets[0]); type++)
+                       kfree(tab->sets[hook][type]);
+       }
+free_tab:
+       kfree(tab);
+       btf->kfunc_set_tab = NULL;
+}
+
 static void btf_free(struct btf *btf)
 {
+       btf_free_kfunc_set_tab(btf);
        kvfree(btf->types);
        kvfree(btf->resolved_sizes);
        kvfree(btf->resolved_ids);
@@ -5616,17 +5654,45 @@ static bool __btf_type_is_scalar_struct(struct bpf_verifier_log *log,
        return true;
 }
 
+static bool is_kfunc_arg_mem_size(const struct btf *btf,
+                                 const struct btf_param *arg,
+                                 const struct bpf_reg_state *reg)
+{
+       int len, sfx_len = sizeof("__sz") - 1;
+       const struct btf_type *t;
+       const char *param_name;
+
+       t = btf_type_skip_modifiers(btf, arg->type, NULL);
+       if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
+               return false;
+
+       /* In the future, this can be ported to use BTF tagging */
+       param_name = btf_name_by_offset(btf, arg->name_off);
+       if (str_is_empty(param_name))
+               return false;
+       len = strlen(param_name);
+       if (len < sfx_len)
+               return false;
+       param_name += len - sfx_len;
+       if (strncmp(param_name, "__sz", sfx_len))
+               return false;
+
+       return true;
+}
+
 static int btf_check_func_arg_match(struct bpf_verifier_env *env,
                                    const struct btf *btf, u32 func_id,
                                    struct bpf_reg_state *regs,
                                    bool ptr_to_mem_ok)
 {
        struct bpf_verifier_log *log = &env->log;
+       u32 i, nargs, ref_id, ref_obj_id = 0;
        bool is_kfunc = btf_is_kernel(btf);
        const char *func_name, *ref_tname;
        const struct btf_type *t, *ref_t;
        const struct btf_param *args;
-       u32 i, nargs, ref_id;
+       int ref_regno = 0;
+       bool rel = false;
 
        t = btf_type_by_id(btf, func_id);
        if (!t || !btf_type_is_func(t)) {
@@ -5704,6 +5770,16 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
                        if (reg->type == PTR_TO_BTF_ID) {
                                reg_btf = reg->btf;
                                reg_ref_id = reg->btf_id;
+                               /* Ensure only one argument is referenced PTR_TO_BTF_ID */
+                               if (reg->ref_obj_id) {
+                                       if (ref_obj_id) {
+                                               bpf_log(log, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
+                                                       regno, reg->ref_obj_id, ref_obj_id);
+                                               return -EFAULT;
+                                       }
+                                       ref_regno = regno;
+                                       ref_obj_id = reg->ref_obj_id;
+                               }
                        } else {
                                reg_btf = btf_vmlinux;
                                reg_ref_id = *reg2btf_ids[reg->type];
@@ -5727,17 +5803,33 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
                        u32 type_size;
 
                        if (is_kfunc) {
+                               bool arg_mem_size = i + 1 < nargs && is_kfunc_arg_mem_size(btf, &args[i + 1], &regs[regno + 1]);
+
                                /* Permit pointer to mem, but only when argument
                                 * type is pointer to scalar, or struct composed
                                 * (recursively) of scalars.
+                                * When arg_mem_size is true, the pointer can be
+                                * void *.
                                 */
                                if (!btf_type_is_scalar(ref_t) &&
-                                   !__btf_type_is_scalar_struct(log, btf, ref_t, 0)) {
+                                   !__btf_type_is_scalar_struct(log, btf, ref_t, 0) &&
+                                   (arg_mem_size ? !btf_type_is_void(ref_t) : 1)) {
                                        bpf_log(log,
-                                               "arg#%d pointer type %s %s must point to scalar or struct with scalar\n",
-                                               i, btf_type_str(ref_t), ref_tname);
+                                               "arg#%d pointer type %s %s must point to %sscalar, or struct with scalar\n",
+                                               i, btf_type_str(ref_t), ref_tname, arg_mem_size ? "void, " : "");
                                        return -EINVAL;
                                }
+
+                               /* Check for mem, len pair */
+                               if (arg_mem_size) {
+                                       if (check_kfunc_mem_size_reg(env, &regs[regno + 1], regno + 1)) {
+                                               bpf_log(log, "arg#%d arg#%d memory, len pair leads to invalid memory access\n",
+                                                       i, i + 1);
+                                               return -EINVAL;
+                                       }
+                                       i++;
+                                       continue;
+                               }
                        }
 
                        resolve_ret = btf_resolve_size(btf, ref_t, &type_size);
@@ -5758,7 +5850,23 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
                }
        }
 
-       return 0;
+       /* Either both are set, or neither */
+       WARN_ON_ONCE((ref_obj_id && !ref_regno) || (!ref_obj_id && ref_regno));
+       if (is_kfunc) {
+               rel = btf_kfunc_id_set_contains(btf, resolve_prog_type(env->prog),
+                                               BTF_KFUNC_TYPE_RELEASE, func_id);
+               /* We already made sure ref_obj_id is set only for one argument */
+               if (rel && !ref_obj_id) {
+                       bpf_log(log, "release kernel function %s expects refcounted PTR_TO_BTF_ID\n",
+                               func_name);
+                       return -EINVAL;
+               }
+               /* Allow (!rel && ref_obj_id), so that passing such referenced PTR_TO_BTF_ID to
+                * other kfuncs works
+                */
+       }
+       /* returns argument register number > 0 in case of reference release kfunc */
+       return rel ? ref_regno : 0;
 }
 
 /* Compare BTF of a function with given bpf_reg_state.
@@ -6200,12 +6308,17 @@ bool btf_id_set_contains(const struct btf_id_set *set, u32 id)
        return bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func) != NULL;
 }
 
+enum {
+       BTF_MODULE_F_LIVE = (1 << 0),
+};
+
 #ifdef CONFIG_DEBUG_INFO_BTF_MODULES
 struct btf_module {
        struct list_head list;
        struct module *module;
        struct btf *btf;
        struct bin_attribute *sysfs_attr;
+       int flags;
 };
 
 static LIST_HEAD(btf_modules);
@@ -6233,7 +6346,8 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op,
        int err = 0;
 
        if (mod->btf_data_size == 0 ||
-           (op != MODULE_STATE_COMING && op != MODULE_STATE_GOING))
+           (op != MODULE_STATE_COMING && op != MODULE_STATE_LIVE &&
+            op != MODULE_STATE_GOING))
                goto out;
 
        switch (op) {
@@ -6292,6 +6406,17 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op,
                }
 
                break;
+       case MODULE_STATE_LIVE:
+               mutex_lock(&btf_module_mutex);
+               list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) {
+                       if (btf_mod->module != module)
+                               continue;
+
+                       btf_mod->flags |= BTF_MODULE_F_LIVE;
+                       break;
+               }
+               mutex_unlock(&btf_module_mutex);
+               break;
        case MODULE_STATE_GOING:
                mutex_lock(&btf_module_mutex);
                list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) {
@@ -6338,7 +6463,12 @@ struct module *btf_try_get_module(const struct btf *btf)
                if (btf_mod->btf != btf)
                        continue;
 
-               if (try_module_get(btf_mod->module))
+               /* We must only consider module whose __init routine has
+                * finished, hence we must check for BTF_MODULE_F_LIVE flag,
+                * which is set from the notifier callback for
+                * MODULE_STATE_LIVE.
+                */
+               if ((btf_mod->flags & BTF_MODULE_F_LIVE) && try_module_get(btf_mod->module))
                        res = btf_mod->module;
 
                break;
@@ -6349,6 +6479,36 @@ struct module *btf_try_get_module(const struct btf *btf)
        return res;
 }
 
+/* Returns struct btf corresponding to the struct module
+ *
+ * This function can return NULL or ERR_PTR. Note that caller must
+ * release reference for struct btf iff btf_is_module is true.
+ */
+static struct btf *btf_get_module_btf(const struct module *module)
+{
+       struct btf *btf = NULL;
+#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
+       struct btf_module *btf_mod, *tmp;
+#endif
+
+       if (!module)
+               return bpf_get_btf_vmlinux();
+#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
+       mutex_lock(&btf_module_mutex);
+       list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) {
+               if (btf_mod->module != module)
+                       continue;
+
+               btf_get(btf_mod->btf);
+               btf = btf_mod->btf;
+               break;
+       }
+       mutex_unlock(&btf_module_mutex);
+#endif
+
+       return btf;
+}
+
 BPF_CALL_4(bpf_btf_find_by_name_kind, char *, name, int, name_sz, u32, kind, int, flags)
 {
        struct btf *btf;
@@ -6416,53 +6576,181 @@ BTF_ID_LIST_GLOBAL(btf_tracing_ids, MAX_BTF_TRACING_TYPE)
 BTF_TRACING_TYPE_xxx
 #undef BTF_TRACING_TYPE
 
-/* BTF ID set registration API for modules */
-
-#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
+/* Kernel Function (kfunc) BTF ID set registration API */
 
-void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
-                              struct kfunc_btf_id_set *s)
+static int __btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
+                                   enum btf_kfunc_type type,
+                                   struct btf_id_set *add_set, bool vmlinux_set)
 {
-       mutex_lock(&l->mutex);
-       list_add(&s->list, &l->list);
-       mutex_unlock(&l->mutex);
+       struct btf_kfunc_set_tab *tab;
+       struct btf_id_set *set;
+       u32 set_cnt;
+       int ret;
+
+       if (hook >= BTF_KFUNC_HOOK_MAX || type >= BTF_KFUNC_TYPE_MAX) {
+               ret = -EINVAL;
+               goto end;
+       }
+
+       if (!add_set->cnt)
+               return 0;
+
+       tab = btf->kfunc_set_tab;
+       if (!tab) {
+               tab = kzalloc(sizeof(*tab), GFP_KERNEL | __GFP_NOWARN);
+               if (!tab)
+                       return -ENOMEM;
+               btf->kfunc_set_tab = tab;
+       }
+
+       set = tab->sets[hook][type];
+       /* Warn when register_btf_kfunc_id_set is called twice for the same hook
+        * for module sets.
+        */
+       if (WARN_ON_ONCE(set && !vmlinux_set)) {
+               ret = -EINVAL;
+               goto end;
+       }
+
+       /* We don't need to allocate, concatenate, and sort module sets, because
+        * only one is allowed per hook. Hence, we can directly assign the
+        * pointer and return.
+        */
+       if (!vmlinux_set) {
+               tab->sets[hook][type] = add_set;
+               return 0;
+       }
+
+       /* In case of vmlinux sets, there may be more than one set being
+        * registered per hook. To create a unified set, we allocate a new set
+        * and concatenate all individual sets being registered. While each set
+        * is individually sorted, they may become unsorted when concatenated,
+        * hence re-sorting the final set again is required to make binary
+        * searching the set using btf_id_set_contains function work.
+        */
+       set_cnt = set ? set->cnt : 0;
+
+       if (set_cnt > U32_MAX - add_set->cnt) {
+               ret = -EOVERFLOW;
+               goto end;
+       }
+
+       if (set_cnt + add_set->cnt > BTF_KFUNC_SET_MAX_CNT) {
+               ret = -E2BIG;
+               goto end;
+       }
+
+       /* Grow set */
+       set = krealloc(tab->sets[hook][type],
+                      offsetof(struct btf_id_set, ids[set_cnt + add_set->cnt]),
+                      GFP_KERNEL | __GFP_NOWARN);
+       if (!set) {
+               ret = -ENOMEM;
+               goto end;
+       }
+
+       /* For newly allocated set, initialize set->cnt to 0 */
+       if (!tab->sets[hook][type])
+               set->cnt = 0;
+       tab->sets[hook][type] = set;
+
+       /* Concatenate the two sets */
+       memcpy(set->ids + set->cnt, add_set->ids, add_set->cnt * sizeof(set->ids[0]));
+       set->cnt += add_set->cnt;
+
+       sort(set->ids, set->cnt, sizeof(set->ids[0]), btf_id_cmp_func, NULL);
+
+       return 0;
+end:
+       btf_free_kfunc_set_tab(btf);
+       return ret;
 }
-EXPORT_SYMBOL_GPL(register_kfunc_btf_id_set);
 
-void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
-                                struct kfunc_btf_id_set *s)
+static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
+                                 const struct btf_kfunc_id_set *kset)
 {
-       mutex_lock(&l->mutex);
-       list_del_init(&s->list);
-       mutex_unlock(&l->mutex);
+       bool vmlinux_set = !btf_is_module(btf);
+       int type, ret;
+
+       for (type = 0; type < ARRAY_SIZE(kset->sets); type++) {
+               if (!kset->sets[type])
+                       continue;
+
+               ret = __btf_populate_kfunc_set(btf, hook, type, kset->sets[type], vmlinux_set);
+               if (ret)
+                       break;
+       }
+       return ret;
 }
-EXPORT_SYMBOL_GPL(unregister_kfunc_btf_id_set);
 
-bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id,
-                             struct module *owner)
+static bool __btf_kfunc_id_set_contains(const struct btf *btf,
+                                       enum btf_kfunc_hook hook,
+                                       enum btf_kfunc_type type,
+                                       u32 kfunc_btf_id)
 {
-       struct kfunc_btf_id_set *s;
+       struct btf_id_set *set;
 
-       mutex_lock(&klist->mutex);
-       list_for_each_entry(s, &klist->list, list) {
-               if (s->owner == owner && btf_id_set_contains(s->set, kfunc_id)) {
-                       mutex_unlock(&klist->mutex);
-                       return true;
-               }
+       if (hook >= BTF_KFUNC_HOOK_MAX || type >= BTF_KFUNC_TYPE_MAX)
+               return false;
+       if (!btf->kfunc_set_tab)
+               return false;
+       set = btf->kfunc_set_tab->sets[hook][type];
+       if (!set)
+               return false;
+       return btf_id_set_contains(set, kfunc_btf_id);
+}
+
+static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type)
+{
+       switch (prog_type) {
+       case BPF_PROG_TYPE_XDP:
+               return BTF_KFUNC_HOOK_XDP;
+       case BPF_PROG_TYPE_SCHED_CLS:
+               return BTF_KFUNC_HOOK_TC;
+       case BPF_PROG_TYPE_STRUCT_OPS:
+               return BTF_KFUNC_HOOK_STRUCT_OPS;
+       default:
+               return BTF_KFUNC_HOOK_MAX;
        }
-       mutex_unlock(&klist->mutex);
-       return false;
 }
 
-#define DEFINE_KFUNC_BTF_ID_LIST(name)                                         \
-       struct kfunc_btf_id_list name = { LIST_HEAD_INIT(name.list),           \
-                                         __MUTEX_INITIALIZER(name.mutex) };   \
-       EXPORT_SYMBOL_GPL(name)
+/* Caution:
+ * Reference to the module (obtained using btf_try_get_module) corresponding to
+ * the struct btf *MUST* be held when calling this function from verifier
+ * context. This is usually true as we stash references in prog's kfunc_btf_tab;
+ * keeping the reference for the duration of the call provides the necessary
+ * protection for looking up a well-formed btf->kfunc_set_tab.
+ */
+bool btf_kfunc_id_set_contains(const struct btf *btf,
+                              enum bpf_prog_type prog_type,
+                              enum btf_kfunc_type type, u32 kfunc_btf_id)
+{
+       enum btf_kfunc_hook hook;
 
-DEFINE_KFUNC_BTF_ID_LIST(bpf_tcp_ca_kfunc_list);
-DEFINE_KFUNC_BTF_ID_LIST(prog_test_kfunc_list);
+       hook = bpf_prog_type_to_kfunc_hook(prog_type);
+       return __btf_kfunc_id_set_contains(btf, hook, type, kfunc_btf_id);
+}
 
-#endif
+/* This function must be invoked only from initcalls/module init functions */
+int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
+                             const struct btf_kfunc_id_set *kset)
+{
+       enum btf_kfunc_hook hook;
+       struct btf *btf;
+       int ret;
+
+       btf = btf_get_module_btf(kset->owner);
+       if (IS_ERR_OR_NULL(btf))
+               return btf ? PTR_ERR(btf) : -ENOENT;
+
+       hook = bpf_prog_type_to_kfunc_hook(prog_type);
+       ret = btf_populate_kfunc_set(btf, hook, kset);
+       /* reference is only taken for module BTF */
+       if (btf_is_module(btf))
+               btf_put(btf);
+       return ret;
+}
+EXPORT_SYMBOL_GPL(register_btf_kfunc_id_set);
 
 int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
                              const struct btf *targ_btf, __u32 targ_id)
index 514b468..279ebbe 100644 (file)
@@ -1044,7 +1044,7 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr,
  *   NET_XMIT_DROP       (1)   - drop packet and notify TCP to call cwr
  *   NET_XMIT_CN         (2)   - continue with packet output and notify TCP
  *                               to call cwr
- *   -EPERM                    - drop packet
+ *   -err                      - drop packet
  *
  * For ingress packets, this function will return -EPERM if any
  * attached program was found and if it returned != 1 during execution.
@@ -1079,8 +1079,9 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
                        cgrp->bpf.effective[atype], skb, __bpf_prog_run_save_cb);
        } else {
                ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], skb,
-                                           __bpf_prog_run_save_cb);
-               ret = (ret == 1 ? 0 : -EPERM);
+                                           __bpf_prog_run_save_cb, 0);
+               if (ret && !IS_ERR_VALUE((long)ret))
+                       ret = -EFAULT;
        }
        bpf_restore_data_end(skb, saved_data_end);
        __skb_pull(skb, offset);
@@ -1107,10 +1108,9 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
                               enum cgroup_bpf_attach_type atype)
 {
        struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
-       int ret;
 
-       ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sk, bpf_prog_run);
-       return ret == 1 ? 0 : -EPERM;
+       return BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sk,
+                                    bpf_prog_run, 0);
 }
 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
 
@@ -1142,7 +1142,6 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
        };
        struct sockaddr_storage unspec;
        struct cgroup *cgrp;
-       int ret;
 
        /* Check socket family since not all sockets represent network
         * endpoint (e.g. AF_UNIX).
@@ -1156,10 +1155,8 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
        }
 
        cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
-       ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(cgrp->bpf.effective[atype], &ctx,
-                                         bpf_prog_run, flags);
-
-       return ret == 1 ? 0 : -EPERM;
+       return BPF_PROG_RUN_ARRAY_CG_FLAGS(cgrp->bpf.effective[atype], &ctx,
+                                          bpf_prog_run, 0, flags);
 }
 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);
 
@@ -1184,11 +1181,9 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
                                     enum cgroup_bpf_attach_type atype)
 {
        struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
-       int ret;
 
-       ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sock_ops,
-                                   bpf_prog_run);
-       return ret == 1 ? 0 : -EPERM;
+       return BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sock_ops,
+                                    bpf_prog_run, 0);
 }
 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);
 
@@ -1201,17 +1196,47 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
                .major = major,
                .minor = minor,
        };
-       int allow;
+       int ret;
 
        rcu_read_lock();
        cgrp = task_dfl_cgroup(current);
-       allow = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx,
-                                     bpf_prog_run);
+       ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx,
+                                   bpf_prog_run, 0);
        rcu_read_unlock();
 
-       return !allow;
+       return ret;
 }
 
+BPF_CALL_0(bpf_get_retval)
+{
+       struct bpf_cg_run_ctx *ctx =
+               container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx);
+
+       return ctx->retval;
+}
+
+static const struct bpf_func_proto bpf_get_retval_proto = {
+       .func           = bpf_get_retval,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+};
+
+BPF_CALL_1(bpf_set_retval, int, retval)
+{
+       struct bpf_cg_run_ctx *ctx =
+               container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx);
+
+       ctx->retval = retval;
+       return 0;
+}
+
+static const struct bpf_func_proto bpf_set_retval_proto = {
+       .func           = bpf_set_retval,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_ANYTHING,
+};
+
 static const struct bpf_func_proto *
 cgroup_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
@@ -1224,6 +1249,10 @@ cgroup_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                return &bpf_get_current_cgroup_id_proto;
        case BPF_FUNC_perf_event_output:
                return &bpf_event_output_data_proto;
+       case BPF_FUNC_get_retval:
+               return &bpf_get_retval_proto;
+       case BPF_FUNC_set_retval:
+               return &bpf_set_retval_proto;
        default:
                return bpf_base_func_proto(func_id);
        }
@@ -1337,7 +1366,8 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
 
        rcu_read_lock();
        cgrp = task_dfl_cgroup(current);
-       ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx, bpf_prog_run);
+       ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx,
+                                   bpf_prog_run, 0);
        rcu_read_unlock();
 
        kfree(ctx.cur_val);
@@ -1350,7 +1380,7 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
                kfree(ctx.new_val);
        }
 
-       return ret == 1 ? 0 : -EPERM;
+       return ret;
 }
 
 #ifdef CONFIG_NET
@@ -1452,13 +1482,11 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
 
        lock_sock(sk);
        ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_SETSOCKOPT],
-                                   &ctx, bpf_prog_run);
+                                   &ctx, bpf_prog_run, 0);
        release_sock(sk);
 
-       if (!ret) {
-               ret = -EPERM;
+       if (ret)
                goto out;
-       }
 
        if (ctx.optlen == -1) {
                /* optlen set to -1, bypass kernel */
@@ -1518,7 +1546,7 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
                .sk = sk,
                .level = level,
                .optname = optname,
-               .retval = retval,
+               .current_task = current,
        };
        int ret;
 
@@ -1562,27 +1590,17 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
 
        lock_sock(sk);
        ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT],
-                                   &ctx, bpf_prog_run);
+                                   &ctx, bpf_prog_run, retval);
        release_sock(sk);
 
-       if (!ret) {
-               ret = -EPERM;
+       if (ret < 0)
                goto out;
-       }
 
        if (ctx.optlen > max_optlen || ctx.optlen < 0) {
                ret = -EFAULT;
                goto out;
        }
 
-       /* BPF programs only allowed to set retval to 0, not some
-        * arbitrary value.
-        */
-       if (ctx.retval != 0 && ctx.retval != retval) {
-               ret = -EFAULT;
-               goto out;
-       }
-
        if (ctx.optlen != 0) {
                if (copy_to_user(optval, ctx.optval, ctx.optlen) ||
                    put_user(ctx.optlen, optlen)) {
@@ -1591,8 +1609,6 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
                }
        }
 
-       ret = ctx.retval;
-
 out:
        sockopt_free_buf(&ctx, &buf);
        return ret;
@@ -1607,10 +1623,10 @@ int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level,
                .sk = sk,
                .level = level,
                .optname = optname,
-               .retval = retval,
                .optlen = *optlen,
                .optval = optval,
                .optval_end = optval + *optlen,
+               .current_task = current,
        };
        int ret;
 
@@ -1623,25 +1639,19 @@ int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level,
         */
 
        ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT],
-                                   &ctx, bpf_prog_run);
-       if (!ret)
-               return -EPERM;
+                                   &ctx, bpf_prog_run, retval);
+       if (ret < 0)
+               return ret;
 
        if (ctx.optlen > *optlen)
                return -EFAULT;
 
-       /* BPF programs only allowed to set retval to 0, not some
-        * arbitrary value.
-        */
-       if (ctx.retval != 0 && ctx.retval != retval)
-               return -EFAULT;
-
        /* BPF programs can shrink the buffer, export the modifications.
         */
        if (ctx.optlen != 0)
                *optlen = ctx.optlen;
 
-       return ctx.retval;
+       return ret;
 }
 #endif
 
@@ -2057,10 +2067,39 @@ static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type,
                        *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optlen);
                break;
        case offsetof(struct bpf_sockopt, retval):
-               if (type == BPF_WRITE)
-                       *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, retval);
-               else
-                       *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, retval);
+               BUILD_BUG_ON(offsetof(struct bpf_cg_run_ctx, run_ctx) != 0);
+
+               if (type == BPF_WRITE) {
+                       int treg = BPF_REG_9;
+
+                       if (si->src_reg == treg || si->dst_reg == treg)
+                               --treg;
+                       if (si->src_reg == treg || si->dst_reg == treg)
+                               --treg;
+                       *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, treg,
+                                             offsetof(struct bpf_sockopt_kern, tmp_reg));
+                       *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, current_task),
+                                             treg, si->dst_reg,
+                                             offsetof(struct bpf_sockopt_kern, current_task));
+                       *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx),
+                                             treg, treg,
+                                             offsetof(struct task_struct, bpf_ctx));
+                       *insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval),
+                                             treg, si->src_reg,
+                                             offsetof(struct bpf_cg_run_ctx, retval));
+                       *insn++ = BPF_LDX_MEM(BPF_DW, treg, si->dst_reg,
+                                             offsetof(struct bpf_sockopt_kern, tmp_reg));
+               } else {
+                       *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, current_task),
+                                             si->dst_reg, si->src_reg,
+                                             offsetof(struct bpf_sockopt_kern, current_task));
+                       *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx),
+                                             si->dst_reg, si->dst_reg,
+                                             offsetof(struct task_struct, bpf_ctx));
+                       *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval),
+                                             si->dst_reg, si->dst_reg,
+                                             offsetof(struct bpf_cg_run_ctx, retval));
+               }
                break;
        case offsetof(struct bpf_sockopt, optval):
                *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optval);
index de3e5bc..0a1cfd8 100644 (file)
@@ -1829,28 +1829,30 @@ static unsigned int __bpf_prog_ret0_warn(const void *ctx,
 }
 #endif
 
-bool bpf_prog_array_compatible(struct bpf_array *array,
-                              const struct bpf_prog *fp)
+bool bpf_prog_map_compatible(struct bpf_map *map,
+                            const struct bpf_prog *fp)
 {
        bool ret;
 
        if (fp->kprobe_override)
                return false;
 
-       spin_lock(&array->aux->owner.lock);
-
-       if (!array->aux->owner.type) {
+       spin_lock(&map->owner.lock);
+       if (!map->owner.type) {
                /* There's no owner yet where we could check for
                 * compatibility.
                 */
-               array->aux->owner.type  = fp->type;
-               array->aux->owner.jited = fp->jited;
+               map->owner.type  = fp->type;
+               map->owner.jited = fp->jited;
+               map->owner.xdp_has_frags = fp->aux->xdp_has_frags;
                ret = true;
        } else {
-               ret = array->aux->owner.type  == fp->type &&
-                     array->aux->owner.jited == fp->jited;
+               ret = map->owner.type  == fp->type &&
+                     map->owner.jited == fp->jited &&
+                     map->owner.xdp_has_frags == fp->aux->xdp_has_frags;
        }
-       spin_unlock(&array->aux->owner.lock);
+       spin_unlock(&map->owner.lock);
+
        return ret;
 }
 
@@ -1862,13 +1864,11 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
        mutex_lock(&aux->used_maps_mutex);
        for (i = 0; i < aux->used_map_cnt; i++) {
                struct bpf_map *map = aux->used_maps[i];
-               struct bpf_array *array;
 
-               if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
+               if (!map_type_contains_progs(map))
                        continue;
 
-               array = container_of(map, struct bpf_array, map);
-               if (!bpf_prog_array_compatible(array, fp)) {
+               if (!bpf_prog_map_compatible(map, fp)) {
                        ret = -EINVAL;
                        goto out;
                }
index b3e6b94..650e5d2 100644 (file)
@@ -397,7 +397,8 @@ static int cpu_map_kthread_run(void *data)
        return 0;
 }
 
-static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu, int fd)
+static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu,
+                                     struct bpf_map *map, int fd)
 {
        struct bpf_prog *prog;
 
@@ -405,7 +406,8 @@ static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu, int fd)
        if (IS_ERR(prog))
                return PTR_ERR(prog);
 
-       if (prog->expected_attach_type != BPF_XDP_CPUMAP) {
+       if (prog->expected_attach_type != BPF_XDP_CPUMAP ||
+           !bpf_prog_map_compatible(map, prog)) {
                bpf_prog_put(prog);
                return -EINVAL;
        }
@@ -457,7 +459,7 @@ __cpu_map_entry_alloc(struct bpf_map *map, struct bpf_cpumap_val *value,
        rcpu->map_id = map->id;
        rcpu->value.qsize  = value->qsize;
 
-       if (fd > 0 && __cpu_map_load_bpf_program(rcpu, fd))
+       if (fd > 0 && __cpu_map_load_bpf_program(rcpu, map, fd))
                goto free_ptr_ring;
 
        /* Setup kthread */
index fe019db..038f6d7 100644 (file)
@@ -858,7 +858,8 @@ static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net,
                                             BPF_PROG_TYPE_XDP, false);
                if (IS_ERR(prog))
                        goto err_put_dev;
-               if (prog->expected_attach_type != BPF_XDP_DEVMAP)
+               if (prog->expected_attach_type != BPF_XDP_DEVMAP ||
+                   !bpf_prog_map_compatible(&dtab->map, prog))
                        goto err_put_prog;
        }
 
index fa4505f..72ce1ed 100644 (file)
@@ -556,16 +556,14 @@ static unsigned long bpf_map_memory_footprint(const struct bpf_map *map)
 
 static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
 {
-       const struct bpf_map *map = filp->private_data;
-       const struct bpf_array *array;
+       struct bpf_map *map = filp->private_data;
        u32 type = 0, jited = 0;
 
-       if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) {
-               array = container_of(map, struct bpf_array, map);
-               spin_lock(&array->aux->owner.lock);
-               type  = array->aux->owner.type;
-               jited = array->aux->owner.jited;
-               spin_unlock(&array->aux->owner.lock);
+       if (map_type_contains_progs(map)) {
+               spin_lock(&map->owner.lock);
+               type  = map->owner.type;
+               jited = map->owner.jited;
+               spin_unlock(&map->owner.lock);
        }
 
        seq_printf(m,
@@ -874,6 +872,7 @@ static int map_create(union bpf_attr *attr)
        atomic64_set(&map->refcnt, 1);
        atomic64_set(&map->usercnt, 1);
        mutex_init(&map->freeze_mutex);
+       spin_lock_init(&map->owner.lock);
 
        map->spin_lock_off = -EINVAL;
        map->timer_off = -EINVAL;
@@ -2217,7 +2216,8 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr)
                                 BPF_F_ANY_ALIGNMENT |
                                 BPF_F_TEST_STATE_FREQ |
                                 BPF_F_SLEEPABLE |
-                                BPF_F_TEST_RND_HI32))
+                                BPF_F_TEST_RND_HI32 |
+                                BPF_F_XDP_HAS_FRAGS))
                return -EINVAL;
 
        if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
@@ -2303,6 +2303,7 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr)
        prog->aux->dst_prog = dst_prog;
        prog->aux->offload_requested = !!attr->prog_ifindex;
        prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE;
+       prog->aux->xdp_has_frags = attr->prog_flags & BPF_F_XDP_HAS_FRAGS;
 
        err = security_bpf_prog_alloc(prog->aux);
        if (err)
@@ -3318,6 +3319,11 @@ static int bpf_prog_query(const union bpf_attr *attr,
        case BPF_FLOW_DISSECTOR:
        case BPF_SK_LOOKUP:
                return netns_bpf_prog_query(attr, uattr);
+       case BPF_SK_SKB_STREAM_PARSER:
+       case BPF_SK_SKB_STREAM_VERDICT:
+       case BPF_SK_MSG_VERDICT:
+       case BPF_SK_SKB_VERDICT:
+               return sock_map_bpf_prog_query(attr, uattr);
        default:
                return -EINVAL;
        }
index a39eede..dcf065e 100644 (file)
@@ -452,7 +452,8 @@ static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type)
 {
        return base_type(type) == PTR_TO_SOCKET ||
                base_type(type) == PTR_TO_TCP_SOCK ||
-               base_type(type) == PTR_TO_MEM;
+               base_type(type) == PTR_TO_MEM ||
+               base_type(type) == PTR_TO_BTF_ID;
 }
 
 static bool type_is_rdonly_mem(u32 type)
@@ -1743,7 +1744,7 @@ find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset)
 }
 
 static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
-                                        s16 offset, struct module **btf_modp)
+                                        s16 offset)
 {
        struct bpf_kfunc_btf kf_btf = { .offset = offset };
        struct bpf_kfunc_btf_tab *tab;
@@ -1797,8 +1798,6 @@ static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
                sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
                     kfunc_btf_cmp_by_off, NULL);
        }
-       if (btf_modp)
-               *btf_modp = b->module;
        return b->btf;
 }
 
@@ -1815,8 +1814,7 @@ void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab)
 }
 
 static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env,
-                                      u32 func_id, s16 offset,
-                                      struct module **btf_modp)
+                                      u32 func_id, s16 offset)
 {
        if (offset) {
                if (offset < 0) {
@@ -1827,7 +1825,7 @@ static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env,
                        return ERR_PTR(-EINVAL);
                }
 
-               return __find_kfunc_desc_btf(env, offset, btf_modp);
+               return __find_kfunc_desc_btf(env, offset);
        }
        return btf_vmlinux ?: ERR_PTR(-ENOENT);
 }
@@ -1890,7 +1888,7 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
                prog_aux->kfunc_btf_tab = btf_tab;
        }
 
-       desc_btf = find_kfunc_desc_btf(env, func_id, offset, NULL);
+       desc_btf = find_kfunc_desc_btf(env, func_id, offset);
        if (IS_ERR(desc_btf)) {
                verbose(env, "failed to find BTF for kernel function\n");
                return PTR_ERR(desc_btf);
@@ -2351,7 +2349,7 @@ static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn)
        if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL)
                return NULL;
 
-       desc_btf = find_kfunc_desc_btf(data, insn->imm, insn->off, NULL);
+       desc_btf = find_kfunc_desc_btf(data, insn->imm, insn->off);
        if (IS_ERR(desc_btf))
                return "<error>";
 
@@ -3498,11 +3496,6 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
 
 #define MAX_PACKET_OFF 0xffff
 
-static enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog)
-{
-       return prog->aux->dst_prog ? prog->aux->dst_prog->type : prog->type;
-}
-
 static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
                                       const struct bpf_call_arg_meta *meta,
                                       enum bpf_access_type t)
@@ -4877,6 +4870,62 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
        }
 }
 
+static int check_mem_size_reg(struct bpf_verifier_env *env,
+                             struct bpf_reg_state *reg, u32 regno,
+                             bool zero_size_allowed,
+                             struct bpf_call_arg_meta *meta)
+{
+       int err;
+
+       /* This is used to refine r0 return value bounds for helpers
+        * that enforce this value as an upper bound on return values.
+        * See do_refine_retval_range() for helpers that can refine
+        * the return value. C type of helper is u32 so we pull register
+        * bound from umax_value however, if negative verifier errors
+        * out. Only upper bounds can be learned because retval is an
+        * int type and negative retvals are allowed.
+        */
+       if (meta)
+               meta->msize_max_value = reg->umax_value;
+
+       /* The register is SCALAR_VALUE; the access check
+        * happens using its boundaries.
+        */
+       if (!tnum_is_const(reg->var_off))
+               /* For unprivileged variable accesses, disable raw
+                * mode so that the program is required to
+                * initialize all the memory that the helper could
+                * just partially fill up.
+                */
+               meta = NULL;
+
+       if (reg->smin_value < 0) {
+               verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
+                       regno);
+               return -EACCES;
+       }
+
+       if (reg->umin_value == 0) {
+               err = check_helper_mem_access(env, regno - 1, 0,
+                                             zero_size_allowed,
+                                             meta);
+               if (err)
+                       return err;
+       }
+
+       if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
+               verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
+                       regno);
+               return -EACCES;
+       }
+       err = check_helper_mem_access(env, regno - 1,
+                                     reg->umax_value,
+                                     zero_size_allowed, meta);
+       if (!err)
+               err = mark_chain_precision(env, regno);
+       return err;
+}
+
 int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
                   u32 regno, u32 mem_size)
 {
@@ -4900,6 +4949,28 @@ int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
        return check_helper_mem_access(env, regno, mem_size, true, NULL);
 }
 
+int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
+                            u32 regno)
+{
+       struct bpf_reg_state *mem_reg = &cur_regs(env)[regno - 1];
+       bool may_be_null = type_may_be_null(mem_reg->type);
+       struct bpf_reg_state saved_reg;
+       int err;
+
+       WARN_ON_ONCE(regno < BPF_REG_2 || regno > BPF_REG_5);
+
+       if (may_be_null) {
+               saved_reg = *mem_reg;
+               mark_ptr_not_null_reg(mem_reg);
+       }
+
+       err = check_mem_size_reg(env, reg, regno, true, NULL);
+
+       if (may_be_null)
+               *mem_reg = saved_reg;
+       return err;
+}
+
 /* Implementation details:
  * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL
  * Two bpf_map_lookups (even with the same key) will have different reg->id.
@@ -5439,51 +5510,7 @@ skip_type_check:
        } else if (arg_type_is_mem_size(arg_type)) {
                bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
 
-               /* This is used to refine r0 return value bounds for helpers
-                * that enforce this value as an upper bound on return values.
-                * See do_refine_retval_range() for helpers that can refine
-                * the return value. C type of helper is u32 so we pull register
-                * bound from umax_value however, if negative verifier errors
-                * out. Only upper bounds can be learned because retval is an
-                * int type and negative retvals are allowed.
-                */
-               meta->msize_max_value = reg->umax_value;
-
-               /* The register is SCALAR_VALUE; the access check
-                * happens using its boundaries.
-                */
-               if (!tnum_is_const(reg->var_off))
-                       /* For unprivileged variable accesses, disable raw
-                        * mode so that the program is required to
-                        * initialize all the memory that the helper could
-                        * just partially fill up.
-                        */
-                       meta = NULL;
-
-               if (reg->smin_value < 0) {
-                       verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
-                               regno);
-                       return -EACCES;
-               }
-
-               if (reg->umin_value == 0) {
-                       err = check_helper_mem_access(env, regno - 1, 0,
-                                                     zero_size_allowed,
-                                                     meta);
-                       if (err)
-                               return err;
-               }
-
-               if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
-                       verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
-                               regno);
-                       return -EACCES;
-               }
-               err = check_helper_mem_access(env, regno - 1,
-                                             reg->umax_value,
-                                             zero_size_allowed, meta);
-               if (!err)
-                       err = mark_chain_precision(env, regno);
+               err = check_mem_size_reg(env, reg, regno, zero_size_allowed, meta);
        } else if (arg_type_is_alloc_size(arg_type)) {
                if (!tnum_is_const(reg->var_off)) {
                        verbose(env, "R%d is not a known constant'\n",
@@ -6842,22 +6869,23 @@ static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno,
        }
 }
 
-static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn)
+static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
+                           int *insn_idx_p)
 {
        const struct btf_type *t, *func, *func_proto, *ptr_type;
        struct bpf_reg_state *regs = cur_regs(env);
        const char *func_name, *ptr_type_name;
        u32 i, nargs, func_id, ptr_type_id;
-       struct module *btf_mod = NULL;
+       int err, insn_idx = *insn_idx_p;
        const struct btf_param *args;
        struct btf *desc_btf;
-       int err;
+       bool acq;
 
        /* skip for now, but return error when we find this in fixup_kfunc_call */
        if (!insn->imm)
                return 0;
 
-       desc_btf = find_kfunc_desc_btf(env, insn->imm, insn->off, &btf_mod);
+       desc_btf = find_kfunc_desc_btf(env, insn->imm, insn->off);
        if (IS_ERR(desc_btf))
                return PTR_ERR(desc_btf);
 
@@ -6866,23 +6894,43 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn)
        func_name = btf_name_by_offset(desc_btf, func->name_off);
        func_proto = btf_type_by_id(desc_btf, func->type);
 
-       if (!env->ops->check_kfunc_call ||
-           !env->ops->check_kfunc_call(func_id, btf_mod)) {
+       if (!btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog),
+                                     BTF_KFUNC_TYPE_CHECK, func_id)) {
                verbose(env, "calling kernel function %s is not allowed\n",
                        func_name);
                return -EACCES;
        }
 
+       acq = btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog),
+                                       BTF_KFUNC_TYPE_ACQUIRE, func_id);
+
        /* Check the arguments */
        err = btf_check_kfunc_arg_match(env, desc_btf, func_id, regs);
-       if (err)
+       if (err < 0)
                return err;
+       /* In case of release function, we get register number of refcounted
+        * PTR_TO_BTF_ID back from btf_check_kfunc_arg_match, do the release now
+        */
+       if (err) {
+               err = release_reference(env, regs[err].ref_obj_id);
+               if (err) {
+                       verbose(env, "kfunc %s#%d reference has not been acquired before\n",
+                               func_name, func_id);
+                       return err;
+               }
+       }
 
        for (i = 0; i < CALLER_SAVED_REGS; i++)
                mark_reg_not_init(env, regs, caller_saved[i]);
 
        /* Check return type */
        t = btf_type_skip_modifiers(desc_btf, func_proto->type, NULL);
+
+       if (acq && !btf_type_is_ptr(t)) {
+               verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n");
+               return -EINVAL;
+       }
+
        if (btf_type_is_scalar(t)) {
                mark_reg_unknown(env, regs, BPF_REG_0);
                mark_btf_func_reg_size(env, BPF_REG_0, t->size);
@@ -6901,7 +6949,21 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn)
                regs[BPF_REG_0].btf = desc_btf;
                regs[BPF_REG_0].type = PTR_TO_BTF_ID;
                regs[BPF_REG_0].btf_id = ptr_type_id;
+               if (btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog),
+                                             BTF_KFUNC_TYPE_RET_NULL, func_id)) {
+                       regs[BPF_REG_0].type |= PTR_MAYBE_NULL;
+                       /* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */
+                       regs[BPF_REG_0].id = ++env->id_gen;
+               }
                mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *));
+               if (acq) {
+                       int id = acquire_reference_state(env, insn_idx);
+
+                       if (id < 0)
+                               return id;
+                       regs[BPF_REG_0].id = id;
+                       regs[BPF_REG_0].ref_obj_id = id;
+               }
        } /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */
 
        nargs = btf_type_vlen(func_proto);
@@ -11549,7 +11611,7 @@ static int do_check(struct bpf_verifier_env *env)
                                if (insn->src_reg == BPF_PSEUDO_CALL)
                                        err = check_func_call(env, insn, &env->insn_idx);
                                else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL)
-                                       err = check_kfunc_call(env, insn);
+                                       err = check_kfunc_call(env, insn, &env->insn_idx);
                                else
                                        err = check_helper_call(env, insn, &env->insn_idx);
                                if (err)
index 21aa306..06a9e22 100644 (file)
@@ -1562,6 +1562,7 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = {
 
 extern const struct bpf_func_proto bpf_skb_output_proto;
 extern const struct bpf_func_proto bpf_xdp_output_proto;
+extern const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto;
 
 BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args,
           struct bpf_map *, map, u64, flags)
@@ -1661,6 +1662,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                return &bpf_sock_from_file_proto;
        case BPF_FUNC_get_socket_cookie:
                return &bpf_get_socket_ptr_cookie_proto;
+       case BPF_FUNC_xdp_get_buff_len:
+               return &bpf_xdp_get_buff_len_trace_proto;
 #endif
        case BPF_FUNC_seq_printf:
                return prog->expected_attach_type == BPF_TRACE_ITER ?
index 9751207..b7c4d65 100644 (file)
@@ -116,7 +116,6 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
                return -ENOMEM;
        }
 
-       refcount_set(&ax25_rt->refcount, 1);
        ax25_rt->callsign     = route->dest_addr;
        ax25_rt->dev          = ax25_dev->dev;
        ax25_rt->digipeat     = NULL;
@@ -167,12 +166,12 @@ static int ax25_rt_del(struct ax25_routes_struct *route)
                    ax25cmp(&route->dest_addr, &s->callsign) == 0) {
                        if (ax25_route_list == s) {
                                ax25_route_list = s->next;
-                               ax25_put_route(s);
+                               __ax25_put_route(s);
                        } else {
                                for (t = ax25_route_list; t != NULL; t = t->next) {
                                        if (t->next == s) {
                                                t->next = s->next;
-                                               ax25_put_route(s);
+                                               __ax25_put_route(s);
                                                break;
                                        }
                                }
index 04ebe90..d106511 100644 (file)
@@ -689,6 +689,7 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
 
        bacpy(&conn->dst, dst);
        bacpy(&conn->src, &hdev->bdaddr);
+       conn->handle = HCI_CONN_HANDLE_UNSET;
        conn->hdev  = hdev;
        conn->type  = type;
        conn->role  = role;
index 2b7bd36..5bde0ec 100644 (file)
@@ -2503,6 +2503,7 @@ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv)
        INIT_LIST_HEAD(&hdev->conn_hash.list);
        INIT_LIST_HEAD(&hdev->adv_instances);
        INIT_LIST_HEAD(&hdev->blocked_keys);
+       INIT_LIST_HEAD(&hdev->monitored_devices);
 
        INIT_LIST_HEAD(&hdev->local_codecs);
        INIT_WORK(&hdev->rx_work, hci_rx_work);
@@ -3666,8 +3667,8 @@ static void hci_scodata_packet(struct hci_dev *hdev, struct sk_buff *skb)
                sco_recv_scodata(conn, skb);
                return;
        } else {
-               bt_dev_err(hdev, "SCO packet for unknown connection handle %d",
-                          handle);
+               bt_dev_err_ratelimited(hdev, "SCO packet for unknown connection handle %d",
+                                      handle);
        }
 
        kfree_skb(skb);
index fc30f4c..63b9259 100644 (file)
@@ -3068,6 +3068,11 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data,
        struct hci_ev_conn_complete *ev = data;
        struct hci_conn *conn;
 
+       if (__le16_to_cpu(ev->handle) > HCI_CONN_HANDLE_MAX) {
+               bt_dev_err(hdev, "Ignoring HCI_Connection_Complete for invalid handle");
+               return;
+       }
+
        bt_dev_dbg(hdev, "status 0x%2.2x", ev->status);
 
        hci_dev_lock(hdev);
@@ -3106,6 +3111,17 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data,
                }
        }
 
+       /* The HCI_Connection_Complete event is only sent once per connection.
+        * Processing it more than once per connection can corrupt kernel memory.
+        *
+        * As the connection handle is set here for the first time, it indicates
+        * whether the connection is already set up.
+        */
+       if (conn->handle != HCI_CONN_HANDLE_UNSET) {
+               bt_dev_err(hdev, "Ignoring HCI_Connection_Complete for existing connection");
+               goto unlock;
+       }
+
        if (!ev->status) {
                conn->handle = __le16_to_cpu(ev->handle);
 
@@ -4534,7 +4550,7 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, void *edata,
                        if (!info) {
                                bt_dev_err(hdev, "Malformed HCI Event: 0x%2.2x",
                                           HCI_EV_INQUIRY_RESULT_WITH_RSSI);
-                               return;
+                               goto unlock;
                        }
 
                        bacpy(&data.bdaddr, &info->bdaddr);
@@ -4565,7 +4581,7 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, void *edata,
                        if (!info) {
                                bt_dev_err(hdev, "Malformed HCI Event: 0x%2.2x",
                                           HCI_EV_INQUIRY_RESULT_WITH_RSSI);
-                               return;
+                               goto unlock;
                        }
 
                        bacpy(&data.bdaddr, &info->bdaddr);
@@ -4587,7 +4603,7 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, void *edata,
                bt_dev_err(hdev, "Malformed HCI Event: 0x%2.2x",
                           HCI_EV_INQUIRY_RESULT_WITH_RSSI);
        }
-
+unlock:
        hci_dev_unlock(hdev);
 }
 
@@ -4661,6 +4677,24 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data,
        struct hci_ev_sync_conn_complete *ev = data;
        struct hci_conn *conn;
 
+       switch (ev->link_type) {
+       case SCO_LINK:
+       case ESCO_LINK:
+               break;
+       default:
+               /* As per Core 5.3 Vol 4 Part E 7.7.35 (p.2219), Link_Type
+                * for HCI_Synchronous_Connection_Complete is limited to
+                * either SCO or eSCO
+                */
+               bt_dev_err(hdev, "Ignoring connect complete event for invalid link type");
+               return;
+       }
+
+       if (__le16_to_cpu(ev->handle) > HCI_CONN_HANDLE_MAX) {
+               bt_dev_err(hdev, "Ignoring HCI_Sync_Conn_Complete for invalid handle");
+               return;
+       }
+
        bt_dev_dbg(hdev, "status 0x%2.2x", ev->status);
 
        hci_dev_lock(hdev);
@@ -4684,23 +4718,19 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data,
                        goto unlock;
        }
 
+       /* The HCI_Synchronous_Connection_Complete event is only sent once per connection.
+        * Processing it more than once per connection can corrupt kernel memory.
+        *
+        * As the connection handle is set here for the first time, it indicates
+        * whether the connection is already set up.
+        */
+       if (conn->handle != HCI_CONN_HANDLE_UNSET) {
+               bt_dev_err(hdev, "Ignoring HCI_Sync_Conn_Complete event for existing connection");
+               goto unlock;
+       }
+
        switch (ev->status) {
        case 0x00:
-               /* The synchronous connection complete event should only be
-                * sent once per new connection. Receiving a successful
-                * complete event when the connection status is already
-                * BT_CONNECTED means that the device is misbehaving and sent
-                * multiple complete event packets for the same new connection.
-                *
-                * Registering the device more than once can corrupt kernel
-                * memory, hence upon detecting this invalid event, we report
-                * an error and ignore the packet.
-                */
-               if (conn->state == BT_CONNECTED) {
-                       bt_dev_err(hdev, "Ignoring connect complete event for existing connection");
-                       goto unlock;
-               }
-
                conn->handle = __le16_to_cpu(ev->handle);
                conn->state  = BT_CONNECTED;
                conn->type   = ev->link_type;
@@ -5496,6 +5526,11 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
        struct smp_irk *irk;
        u8 addr_type;
 
+       if (handle > HCI_CONN_HANDLE_MAX) {
+               bt_dev_err(hdev, "Ignoring HCI_LE_Connection_Complete for invalid handle");
+               return;
+       }
+
        hci_dev_lock(hdev);
 
        /* All controllers implicitly stop advertising in the event of a
@@ -5537,6 +5572,17 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
                cancel_delayed_work(&conn->le_conn_timeout);
        }
 
+       /* The HCI_LE_Connection_Complete event is only sent once per connection.
+        * Processing it more than once per connection can corrupt kernel memory.
+        *
+        * As the connection handle is set here for the first time, it indicates
+        * whether the connection is already set up.
+        */
+       if (conn->handle != HCI_CONN_HANDLE_UNSET) {
+               bt_dev_err(hdev, "Ignoring HCI_Connection_Complete for existing connection");
+               goto unlock;
+       }
+
        le_conn_update_addr(conn, bdaddr, bdaddr_type, local_rpa);
 
        /* Lookup the identity address from the stored connection
@@ -6798,7 +6844,7 @@ static const struct hci_ev {
        HCI_EV(HCI_EV_NUM_COMP_BLOCKS, hci_num_comp_blocks_evt,
               sizeof(struct hci_ev_num_comp_blocks)),
        /* [0xff = HCI_EV_VENDOR] */
-       HCI_EV(HCI_EV_VENDOR, msft_vendor_evt, 0),
+       HCI_EV_VL(HCI_EV_VENDOR, msft_vendor_evt, 0, HCI_MAX_EVENT_SIZE),
 };
 
 static void hci_event_func(struct hci_dev *hdev, u8 event, struct sk_buff *skb,
@@ -6823,8 +6869,9 @@ static void hci_event_func(struct hci_dev *hdev, u8 event, struct sk_buff *skb,
         * decide if that is acceptable.
         */
        if (skb->len > ev->max_len)
-               bt_dev_warn(hdev, "unexpected event 0x%2.2x length: %u > %u",
-                           event, skb->len, ev->max_len);
+               bt_dev_warn_ratelimited(hdev,
+                                       "unexpected event 0x%2.2x length: %u > %u",
+                                       event, skb->len, ev->max_len);
 
        data = hci_ev_skb_pull(hdev, skb, event, ev->min_len);
        if (!data)
index 0feb68f..6e71aa6 100644 (file)
@@ -382,6 +382,9 @@ int hci_cmd_sync_queue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func,
 {
        struct hci_cmd_sync_work_entry *entry;
 
+       if (hci_dev_test_flag(hdev, HCI_UNREGISTER))
+               return -ENODEV;
+
        entry = kmalloc(sizeof(*entry), GFP_KERNEL);
        if (!entry)
                return -ENOMEM;
@@ -5140,8 +5143,8 @@ static void set_ext_conn_params(struct hci_conn *conn,
        p->max_ce_len = cpu_to_le16(0x0000);
 }
 
-int hci_le_ext_create_conn_sync(struct hci_dev *hdev, struct hci_conn *conn,
-                               u8 own_addr_type)
+static int hci_le_ext_create_conn_sync(struct hci_dev *hdev,
+                                      struct hci_conn *conn, u8 own_addr_type)
 {
        struct hci_cp_le_ext_create_conn *cp;
        struct hci_cp_le_ext_conn_param *p;
index 37087cf..5dd684e 100644 (file)
@@ -42,7 +42,7 @@
 #include "aosp.h"
 
 #define MGMT_VERSION   1
-#define MGMT_REVISION  21
+#define MGMT_REVISION  22
 
 static const u16 mgmt_commands[] = {
        MGMT_OP_READ_INDEX_LIST,
@@ -174,6 +174,8 @@ static const u16 mgmt_events[] = {
        MGMT_EV_ADV_MONITOR_REMOVED,
        MGMT_EV_CONTROLLER_SUSPEND,
        MGMT_EV_CONTROLLER_RESUME,
+       MGMT_EV_ADV_MONITOR_DEVICE_FOUND,
+       MGMT_EV_ADV_MONITOR_DEVICE_LOST,
 };
 
 static const u16 mgmt_untrusted_commands[] = {
@@ -9589,12 +9591,116 @@ static bool is_filter_match(struct hci_dev *hdev, s8 rssi, u8 *eir,
        return true;
 }
 
+void mgmt_adv_monitor_device_lost(struct hci_dev *hdev, u16 handle,
+                                 bdaddr_t *bdaddr, u8 addr_type)
+{
+       struct mgmt_ev_adv_monitor_device_lost ev;
+
+       ev.monitor_handle = cpu_to_le16(handle);
+       bacpy(&ev.addr.bdaddr, bdaddr);
+       ev.addr.type = addr_type;
+
+       mgmt_event(MGMT_EV_ADV_MONITOR_DEVICE_LOST, hdev, &ev, sizeof(ev),
+                  NULL);
+}
+
+static void mgmt_adv_monitor_device_found(struct hci_dev *hdev,
+                                         bdaddr_t *bdaddr, bool report_device,
+                                         struct sk_buff *skb,
+                                         struct sock *skip_sk)
+{
+       struct sk_buff *advmon_skb;
+       size_t advmon_skb_len;
+       __le16 *monitor_handle;
+       struct monitored_device *dev, *tmp;
+       bool matched = false;
+       bool notify = false;
+
+       /* We have received the Advertisement Report because:
+        * 1. the kernel has initiated active discovery
+        * 2. if not, we have pend_le_reports > 0 in which case we are doing
+        *    passive scanning
+        * 3. if none of the above is true, we have one or more active
+        *    Advertisement Monitor
+        *
+        * For case 1 and 2, report all advertisements via MGMT_EV_DEVICE_FOUND
+        * and report ONLY one advertisement per device for the matched Monitor
+        * via MGMT_EV_ADV_MONITOR_DEVICE_FOUND event.
+        *
+        * For case 3, since we are not active scanning and all advertisements
+        * received are due to a matched Advertisement Monitor, report all
+        * advertisements ONLY via MGMT_EV_ADV_MONITOR_DEVICE_FOUND event.
+        */
+       if (report_device && !hdev->advmon_pend_notify) {
+               mgmt_event_skb(skb, skip_sk);
+               return;
+       }
+
+       advmon_skb_len = (sizeof(struct mgmt_ev_adv_monitor_device_found) -
+                         sizeof(struct mgmt_ev_device_found)) + skb->len;
+       advmon_skb = mgmt_alloc_skb(hdev, MGMT_EV_ADV_MONITOR_DEVICE_FOUND,
+                                   advmon_skb_len);
+       if (!advmon_skb) {
+               if (report_device)
+                       mgmt_event_skb(skb, skip_sk);
+               else
+                       kfree_skb(skb);
+               return;
+       }
+
+       /* ADV_MONITOR_DEVICE_FOUND is similar to DEVICE_FOUND event except
+        * that it also has 'monitor_handle'. Make a copy of DEVICE_FOUND and
+        * store monitor_handle of the matched monitor.
+        */
+       monitor_handle = skb_put(advmon_skb, sizeof(*monitor_handle));
+       skb_put_data(advmon_skb, skb->data, skb->len);
+
+       hdev->advmon_pend_notify = false;
+
+       list_for_each_entry_safe(dev, tmp, &hdev->monitored_devices, list) {
+               if (!bacmp(&dev->bdaddr, bdaddr)) {
+                       matched = true;
+
+                       if (!dev->notified) {
+                               *monitor_handle = cpu_to_le16(dev->handle);
+                               notify = true;
+                               dev->notified = true;
+                       }
+               }
+
+               if (!dev->notified)
+                       hdev->advmon_pend_notify = true;
+       }
+
+       if (!report_device &&
+           ((matched && !notify) || !msft_monitor_supported(hdev))) {
+               /* Handle 0 indicates that we are not active scanning and this
+                * is a subsequent advertisement report for an already matched
+                * Advertisement Monitor or the controller offloading support
+                * is not available.
+                */
+               *monitor_handle = 0;
+               notify = true;
+       }
+
+       if (report_device)
+               mgmt_event_skb(skb, skip_sk);
+       else
+               kfree_skb(skb);
+
+       if (notify)
+               mgmt_event_skb(advmon_skb, skip_sk);
+       else
+               kfree_skb(advmon_skb);
+}
+
 void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
                       u8 addr_type, u8 *dev_class, s8 rssi, u32 flags,
                       u8 *eir, u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len)
 {
        struct sk_buff *skb;
        struct mgmt_ev_device_found *ev;
+       bool report_device = hci_discovery_active(hdev);
 
        /* Don't send events for a non-kernel initiated discovery. With
         * LE one exception is if we have pend_le_reports > 0 in which
@@ -9603,11 +9709,10 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
        if (!hci_discovery_active(hdev)) {
                if (link_type == ACL_LINK)
                        return;
-               if (link_type == LE_LINK &&
-                   list_empty(&hdev->pend_le_reports) &&
-                   !hci_is_adv_monitoring(hdev)) {
+               if (link_type == LE_LINK && !list_empty(&hdev->pend_le_reports))
+                       report_device = true;
+               else if (!hci_is_adv_monitoring(hdev))
                        return;
-               }
        }
 
        if (hdev->discovery.result_filtering) {
@@ -9672,7 +9777,7 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
 
        ev->eir_len = cpu_to_le16(eir_len + scan_rsp_len);
 
-       mgmt_event_skb(skb, NULL);
+       mgmt_adv_monitor_device_found(hdev, bdaddr, report_device, skb, NULL);
 }
 
 void mgmt_remote_name(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
index 6a94363..9a3d77d 100644 (file)
@@ -80,6 +80,14 @@ struct msft_rp_le_set_advertisement_filter_enable {
        __u8 sub_opcode;
 } __packed;
 
+#define MSFT_EV_LE_MONITOR_DEVICE      0x02
+struct msft_ev_le_monitor_device {
+       __u8     addr_type;
+       bdaddr_t bdaddr;
+       __u8     monitor_handle;
+       __u8     monitor_state;
+} __packed;
+
 struct msft_monitor_advertisement_handle_data {
        __u8  msft_handle;
        __u16 mgmt_handle;
@@ -204,6 +212,37 @@ static struct msft_monitor_advertisement_handle_data *msft_find_handle_data
        return NULL;
 }
 
+/* This function requires the caller holds hdev->lock */
+static int msft_monitor_device_del(struct hci_dev *hdev, __u16 mgmt_handle,
+                                  bdaddr_t *bdaddr, __u8 addr_type,
+                                  bool notify)
+{
+       struct monitored_device *dev, *tmp;
+       int count = 0;
+
+       list_for_each_entry_safe(dev, tmp, &hdev->monitored_devices, list) {
+               /* mgmt_handle == 0 indicates remove all devices, whereas,
+                * bdaddr == NULL indicates remove all devices matching the
+                * mgmt_handle.
+                */
+               if ((!mgmt_handle || dev->handle == mgmt_handle) &&
+                   (!bdaddr || (!bacmp(bdaddr, &dev->bdaddr) &&
+                                addr_type == dev->addr_type))) {
+                       if (notify && dev->notified) {
+                               mgmt_adv_monitor_device_lost(hdev, dev->handle,
+                                                            &dev->bdaddr,
+                                                            dev->addr_type);
+                       }
+
+                       list_del(&dev->list);
+                       kfree(dev);
+                       count++;
+               }
+       }
+
+       return count;
+}
+
 static void msft_le_monitor_advertisement_cb(struct hci_dev *hdev,
                                             u8 status, u16 opcode,
                                             struct sk_buff *skb)
@@ -294,6 +333,10 @@ static void msft_le_cancel_monitor_advertisement_cb(struct hci_dev *hdev,
                if (monitor && !msft->suspending)
                        hci_free_adv_monitor(hdev, monitor);
 
+               /* Clear any monitored devices by this Adv Monitor */
+               msft_monitor_device_del(hdev, handle_data->mgmt_handle, NULL,
+                                       0, false);
+
                list_del(&handle_data->list);
                kfree(handle_data);
        }
@@ -557,6 +600,14 @@ void msft_do_close(struct hci_dev *hdev)
                list_del(&handle_data->list);
                kfree(handle_data);
        }
+
+       hci_dev_lock(hdev);
+
+       /* Clear any devices that are being monitored and notify device lost */
+       hdev->advmon_pend_notify = false;
+       msft_monitor_device_del(hdev, 0, NULL, 0, true);
+
+       hci_dev_unlock(hdev);
 }
 
 void msft_register(struct hci_dev *hdev)
@@ -590,10 +641,101 @@ void msft_unregister(struct hci_dev *hdev)
        kfree(msft);
 }
 
+/* This function requires the caller holds hdev->lock */
+static void msft_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr,
+                             __u8 addr_type, __u16 mgmt_handle)
+{
+       struct monitored_device *dev;
+
+       dev = kmalloc(sizeof(*dev), GFP_KERNEL);
+       if (!dev) {
+               bt_dev_err(hdev, "MSFT vendor event %u: no memory",
+                          MSFT_EV_LE_MONITOR_DEVICE);
+               return;
+       }
+
+       bacpy(&dev->bdaddr, bdaddr);
+       dev->addr_type = addr_type;
+       dev->handle = mgmt_handle;
+       dev->notified = false;
+
+       INIT_LIST_HEAD(&dev->list);
+       list_add(&dev->list, &hdev->monitored_devices);
+       hdev->advmon_pend_notify = true;
+}
+
+/* This function requires the caller holds hdev->lock */
+static void msft_device_lost(struct hci_dev *hdev, bdaddr_t *bdaddr,
+                            __u8 addr_type, __u16 mgmt_handle)
+{
+       if (!msft_monitor_device_del(hdev, mgmt_handle, bdaddr, addr_type,
+                                    true)) {
+               bt_dev_err(hdev, "MSFT vendor event %u: dev %pMR not in list",
+                          MSFT_EV_LE_MONITOR_DEVICE, bdaddr);
+       }
+}
+
+static void *msft_skb_pull(struct hci_dev *hdev, struct sk_buff *skb,
+                          u8 ev, size_t len)
+{
+       void *data;
+
+       data = skb_pull_data(skb, len);
+       if (!data)
+               bt_dev_err(hdev, "Malformed MSFT vendor event: 0x%02x", ev);
+
+       return data;
+}
+
+/* This function requires the caller holds hdev->lock */
+static void msft_monitor_device_evt(struct hci_dev *hdev, struct sk_buff *skb)
+{
+       struct msft_ev_le_monitor_device *ev;
+       struct msft_monitor_advertisement_handle_data *handle_data;
+       u8 addr_type;
+
+       ev = msft_skb_pull(hdev, skb, MSFT_EV_LE_MONITOR_DEVICE, sizeof(*ev));
+       if (!ev)
+               return;
+
+       bt_dev_dbg(hdev,
+                  "MSFT vendor event 0x%02x: handle 0x%04x state %d addr %pMR",
+                  MSFT_EV_LE_MONITOR_DEVICE, ev->monitor_handle,
+                  ev->monitor_state, &ev->bdaddr);
+
+       handle_data = msft_find_handle_data(hdev, ev->monitor_handle, false);
+       if (!handle_data)
+               return;
+
+       switch (ev->addr_type) {
+       case ADDR_LE_DEV_PUBLIC:
+               addr_type = BDADDR_LE_PUBLIC;
+               break;
+
+       case ADDR_LE_DEV_RANDOM:
+               addr_type = BDADDR_LE_RANDOM;
+               break;
+
+       default:
+               bt_dev_err(hdev,
+                          "MSFT vendor event 0x%02x: unknown addr type 0x%02x",
+                          MSFT_EV_LE_MONITOR_DEVICE, ev->addr_type);
+               return;
+       }
+
+       if (ev->monitor_state)
+               msft_device_found(hdev, &ev->bdaddr, addr_type,
+                                 handle_data->mgmt_handle);
+       else
+               msft_device_lost(hdev, &ev->bdaddr, addr_type,
+                                handle_data->mgmt_handle);
+}
+
 void msft_vendor_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb)
 {
        struct msft_data *msft = hdev->msft_data;
-       u8 event;
+       u8 *evt_prefix;
+       u8 *evt;
 
        if (!msft)
                return;
@@ -602,13 +744,12 @@ void msft_vendor_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb)
         * matches, and otherwise just return.
         */
        if (msft->evt_prefix_len > 0) {
-               if (skb->len < msft->evt_prefix_len)
+               evt_prefix = msft_skb_pull(hdev, skb, 0, msft->evt_prefix_len);
+               if (!evt_prefix)
                        return;
 
-               if (memcmp(skb->data, msft->evt_prefix, msft->evt_prefix_len))
+               if (memcmp(evt_prefix, msft->evt_prefix, msft->evt_prefix_len))
                        return;
-
-               skb_pull(skb, msft->evt_prefix_len);
        }
 
        /* Every event starts at least with an event code and the rest of
@@ -617,10 +758,23 @@ void msft_vendor_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb)
        if (skb->len < 1)
                return;
 
-       event = *skb->data;
-       skb_pull(skb, 1);
+       evt = msft_skb_pull(hdev, skb, 0, sizeof(*evt));
+       if (!evt)
+               return;
+
+       hci_dev_lock(hdev);
+
+       switch (*evt) {
+       case MSFT_EV_LE_MONITOR_DEVICE:
+               msft_monitor_device_evt(hdev, skb);
+               break;
 
-       bt_dev_dbg(hdev, "MSFT vendor event %u", event);
+       default:
+               bt_dev_dbg(hdev, "MSFT vendor event 0x%02x", *evt);
+               break;
+       }
+
+       hci_dev_unlock(hdev);
 }
 
 __u64 msft_get_features(struct hci_dev *hdev)
index 46dd957..65b52b4 100644 (file)
@@ -5,6 +5,7 @@
 #include <linux/btf.h>
 #include <linux/btf_ids.h>
 #include <linux/slab.h>
+#include <linux/init.h>
 #include <linux/vmalloc.h>
 #include <linux/etherdevice.h>
 #include <linux/filter.h>
@@ -130,7 +131,8 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
 
 static int bpf_test_finish(const union bpf_attr *kattr,
                           union bpf_attr __user *uattr, const void *data,
-                          u32 size, u32 retval, u32 duration)
+                          struct skb_shared_info *sinfo, u32 size,
+                          u32 retval, u32 duration)
 {
        void __user *data_out = u64_to_user_ptr(kattr->test.data_out);
        int err = -EFAULT;
@@ -145,8 +147,36 @@ static int bpf_test_finish(const union bpf_attr *kattr,
                err = -ENOSPC;
        }
 
-       if (data_out && copy_to_user(data_out, data, copy_size))
-               goto out;
+       if (data_out) {
+               int len = sinfo ? copy_size - sinfo->xdp_frags_size : copy_size;
+
+               if (copy_to_user(data_out, data, len))
+                       goto out;
+
+               if (sinfo) {
+                       int i, offset = len, data_len;
+
+                       for (i = 0; i < sinfo->nr_frags; i++) {
+                               skb_frag_t *frag = &sinfo->frags[i];
+
+                               if (offset >= copy_size) {
+                                       err = -ENOSPC;
+                                       break;
+                               }
+
+                               data_len = min_t(int, copy_size - offset,
+                                                skb_frag_size(frag));
+
+                               if (copy_to_user(data_out + offset,
+                                                skb_frag_address(frag),
+                                                data_len))
+                                       goto out;
+
+                               offset += data_len;
+                       }
+               }
+       }
+
        if (copy_to_user(&uattr->test.data_size_out, &size, sizeof(size)))
                goto out;
        if (copy_to_user(&uattr->test.retval, &retval, sizeof(retval)))
@@ -171,6 +201,8 @@ int noinline bpf_fentry_test1(int a)
 {
        return a + 1;
 }
+EXPORT_SYMBOL_GPL(bpf_fentry_test1);
+ALLOW_ERROR_INJECTION(bpf_fentry_test1, ERRNO);
 
 int noinline bpf_fentry_test2(int a, u64 b)
 {
@@ -232,28 +264,142 @@ struct sock * noinline bpf_kfunc_call_test3(struct sock *sk)
        return sk;
 }
 
+struct prog_test_ref_kfunc {
+       int a;
+       int b;
+       struct prog_test_ref_kfunc *next;
+};
+
+static struct prog_test_ref_kfunc prog_test_struct = {
+       .a = 42,
+       .b = 108,
+       .next = &prog_test_struct,
+};
+
+noinline struct prog_test_ref_kfunc *
+bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr)
+{
+       /* randomly return NULL */
+       if (get_jiffies_64() % 2)
+               return NULL;
+       return &prog_test_struct;
+}
+
+noinline void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p)
+{
+}
+
+struct prog_test_pass1 {
+       int x0;
+       struct {
+               int x1;
+               struct {
+                       int x2;
+                       struct {
+                               int x3;
+                       };
+               };
+       };
+};
+
+struct prog_test_pass2 {
+       int len;
+       short arr1[4];
+       struct {
+               char arr2[4];
+               unsigned long arr3[8];
+       } x;
+};
+
+struct prog_test_fail1 {
+       void *p;
+       int x;
+};
+
+struct prog_test_fail2 {
+       int x8;
+       struct prog_test_pass1 x;
+};
+
+struct prog_test_fail3 {
+       int len;
+       char arr1[2];
+       char arr2[];
+};
+
+noinline void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb)
+{
+}
+
+noinline void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p)
+{
+}
+
+noinline void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p)
+{
+}
+
+noinline void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p)
+{
+}
+
+noinline void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p)
+{
+}
+
+noinline void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p)
+{
+}
+
+noinline void bpf_kfunc_call_test_mem_len_pass1(void *mem, int mem__sz)
+{
+}
+
+noinline void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len)
+{
+}
+
+noinline void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len)
+{
+}
+
 __diag_pop();
 
 ALLOW_ERROR_INJECTION(bpf_modify_return_test, ERRNO);
 
-BTF_SET_START(test_sk_kfunc_ids)
+BTF_SET_START(test_sk_check_kfunc_ids)
 BTF_ID(func, bpf_kfunc_call_test1)
 BTF_ID(func, bpf_kfunc_call_test2)
 BTF_ID(func, bpf_kfunc_call_test3)
-BTF_SET_END(test_sk_kfunc_ids)
-
-bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, struct module *owner)
-{
-       if (btf_id_set_contains(&test_sk_kfunc_ids, kfunc_id))
-               return true;
-       return bpf_check_mod_kfunc_call(&prog_test_kfunc_list, kfunc_id, owner);
-}
-
-static void *bpf_test_init(const union bpf_attr *kattr, u32 size,
-                          u32 headroom, u32 tailroom)
+BTF_ID(func, bpf_kfunc_call_test_acquire)
+BTF_ID(func, bpf_kfunc_call_test_release)
+BTF_ID(func, bpf_kfunc_call_test_pass_ctx)
+BTF_ID(func, bpf_kfunc_call_test_pass1)
+BTF_ID(func, bpf_kfunc_call_test_pass2)
+BTF_ID(func, bpf_kfunc_call_test_fail1)
+BTF_ID(func, bpf_kfunc_call_test_fail2)
+BTF_ID(func, bpf_kfunc_call_test_fail3)
+BTF_ID(func, bpf_kfunc_call_test_mem_len_pass1)
+BTF_ID(func, bpf_kfunc_call_test_mem_len_fail1)
+BTF_ID(func, bpf_kfunc_call_test_mem_len_fail2)
+BTF_SET_END(test_sk_check_kfunc_ids)
+
+BTF_SET_START(test_sk_acquire_kfunc_ids)
+BTF_ID(func, bpf_kfunc_call_test_acquire)
+BTF_SET_END(test_sk_acquire_kfunc_ids)
+
+BTF_SET_START(test_sk_release_kfunc_ids)
+BTF_ID(func, bpf_kfunc_call_test_release)
+BTF_SET_END(test_sk_release_kfunc_ids)
+
+BTF_SET_START(test_sk_ret_null_kfunc_ids)
+BTF_ID(func, bpf_kfunc_call_test_acquire)
+BTF_SET_END(test_sk_ret_null_kfunc_ids)
+
+static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size,
+                          u32 size, u32 headroom, u32 tailroom)
 {
        void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
-       u32 user_size = kattr->test.data_size_in;
        void *data;
 
        if (size < ETH_HLEN || size > PAGE_SIZE - headroom - tailroom)
@@ -581,7 +727,8 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
        if (kattr->test.flags || kattr->test.cpu)
                return -EINVAL;
 
-       data = bpf_test_init(kattr, size, NET_SKB_PAD + NET_IP_ALIGN,
+       data = bpf_test_init(kattr, kattr->test.data_size_in,
+                            size, NET_SKB_PAD + NET_IP_ALIGN,
                             SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
        if (IS_ERR(data))
                return PTR_ERR(data);
@@ -683,7 +830,8 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
        /* bpf program can never convert linear skb to non-linear */
        if (WARN_ON_ONCE(skb_is_nonlinear(skb)))
                size = skb_headlen(skb);
-       ret = bpf_test_finish(kattr, uattr, skb->data, size, retval, duration);
+       ret = bpf_test_finish(kattr, uattr, skb->data, NULL, size, retval,
+                             duration);
        if (!ret)
                ret = bpf_ctx_finish(kattr, uattr, ctx,
                                     sizeof(struct __sk_buff));
@@ -758,16 +906,16 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
                          union bpf_attr __user *uattr)
 {
        u32 tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
-       u32 headroom = XDP_PACKET_HEADROOM;
        u32 size = kattr->test.data_size_in;
+       u32 headroom = XDP_PACKET_HEADROOM;
+       u32 retval, duration, max_data_sz;
        u32 repeat = kattr->test.repeat;
        struct netdev_rx_queue *rxqueue;
+       struct skb_shared_info *sinfo;
        struct xdp_buff xdp = {};
-       u32 retval, duration;
+       int i, ret = -EINVAL;
        struct xdp_md *ctx;
-       u32 max_data_sz;
        void *data;
-       int ret = -EINVAL;
 
        if (prog->expected_attach_type == BPF_XDP_DEVMAP ||
            prog->expected_attach_type == BPF_XDP_CPUMAP)
@@ -787,26 +935,60 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
                headroom -= ctx->data;
        }
 
-       /* XDP have extra tailroom as (most) drivers use full page */
        max_data_sz = 4096 - headroom - tailroom;
+       size = min_t(u32, size, max_data_sz);
 
-       data = bpf_test_init(kattr, max_data_sz, headroom, tailroom);
+       data = bpf_test_init(kattr, size, max_data_sz, headroom, tailroom);
        if (IS_ERR(data)) {
                ret = PTR_ERR(data);
                goto free_ctx;
        }
 
        rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0);
-       xdp_init_buff(&xdp, headroom + max_data_sz + tailroom,
-                     &rxqueue->xdp_rxq);
+       rxqueue->xdp_rxq.frag_size = headroom + max_data_sz + tailroom;
+       xdp_init_buff(&xdp, rxqueue->xdp_rxq.frag_size, &rxqueue->xdp_rxq);
        xdp_prepare_buff(&xdp, data, headroom, size, true);
+       sinfo = xdp_get_shared_info_from_buff(&xdp);
 
        ret = xdp_convert_md_to_buff(ctx, &xdp);
        if (ret)
                goto free_data;
 
+       if (unlikely(kattr->test.data_size_in > size)) {
+               void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
+
+               while (size < kattr->test.data_size_in) {
+                       struct page *page;
+                       skb_frag_t *frag;
+                       int data_len;
+
+                       page = alloc_page(GFP_KERNEL);
+                       if (!page) {
+                               ret = -ENOMEM;
+                               goto out;
+                       }
+
+                       frag = &sinfo->frags[sinfo->nr_frags++];
+                       __skb_frag_set_page(frag, page);
+
+                       data_len = min_t(int, kattr->test.data_size_in - size,
+                                        PAGE_SIZE);
+                       skb_frag_size_set(frag, data_len);
+
+                       if (copy_from_user(page_address(page), data_in + size,
+                                          data_len)) {
+                               ret = -EFAULT;
+                               goto out;
+                       }
+                       sinfo->xdp_frags_size += data_len;
+                       size += data_len;
+               }
+               xdp_buff_set_frags_flag(&xdp);
+       }
+
        if (repeat > 1)
                bpf_prog_change_xdp(NULL, prog);
+
        ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration, true);
        /* We convert the xdp_buff back to an xdp_md before checking the return
         * code so the reference count of any held netdevice will be decremented
@@ -816,12 +998,9 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
        if (ret)
                goto out;
 
-       if (xdp.data_meta != data + headroom ||
-           xdp.data_end != xdp.data_meta + size)
-               size = xdp.data_end - xdp.data_meta;
-
-       ret = bpf_test_finish(kattr, uattr, xdp.data_meta, size, retval,
-                             duration);
+       size = xdp.data_end - xdp.data_meta + sinfo->xdp_frags_size;
+       ret = bpf_test_finish(kattr, uattr, xdp.data_meta, sinfo, size,
+                             retval, duration);
        if (!ret)
                ret = bpf_ctx_finish(kattr, uattr, ctx,
                                     sizeof(struct xdp_md));
@@ -830,6 +1009,8 @@ out:
        if (repeat > 1)
                bpf_prog_change_xdp(prog, NULL);
 free_data:
+       for (i = 0; i < sinfo->nr_frags; i++)
+               __free_page(skb_frag_page(&sinfo->frags[i]));
        kfree(data);
 free_ctx:
        kfree(ctx);
@@ -876,7 +1057,7 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
        if (size < ETH_HLEN)
                return -EINVAL;
 
-       data = bpf_test_init(kattr, size, 0, 0);
+       data = bpf_test_init(kattr, kattr->test.data_size_in, size, 0, 0);
        if (IS_ERR(data))
                return PTR_ERR(data);
 
@@ -911,8 +1092,8 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
        if (ret < 0)
                goto out;
 
-       ret = bpf_test_finish(kattr, uattr, &flow_keys, sizeof(flow_keys),
-                             retval, duration);
+       ret = bpf_test_finish(kattr, uattr, &flow_keys, NULL,
+                             sizeof(flow_keys), retval, duration);
        if (!ret)
                ret = bpf_ctx_finish(kattr, uattr, user_ctx,
                                     sizeof(struct bpf_flow_keys));
@@ -1016,7 +1197,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat
                user_ctx->cookie = sock_gen_cookie(ctx.selected_sk);
        }
 
-       ret = bpf_test_finish(kattr, uattr, NULL, 0, retval, duration);
+       ret = bpf_test_finish(kattr, uattr, NULL, NULL, 0, retval, duration);
        if (!ret)
                ret = bpf_ctx_finish(kattr, uattr, user_ctx, sizeof(*user_ctx));
 
@@ -1067,3 +1248,17 @@ out:
        kfree(ctx);
        return err;
 }
+
+static const struct btf_kfunc_id_set bpf_prog_test_kfunc_set = {
+       .owner        = THIS_MODULE,
+       .check_set    = &test_sk_check_kfunc_ids,
+       .acquire_set  = &test_sk_acquire_kfunc_ids,
+       .release_set  = &test_sk_release_kfunc_ids,
+       .ret_null_set = &test_sk_ret_null_kfunc_ids,
+};
+
+static int __init bpf_prog_test_run_init(void)
+{
+       return register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_prog_test_kfunc_set);
+}
+late_initcall(bpf_prog_test_run_init);
index 4603b7c..9615ae1 100644 (file)
@@ -3783,6 +3783,28 @@ static const struct bpf_func_proto sk_skb_change_head_proto = {
        .arg2_type      = ARG_ANYTHING,
        .arg3_type      = ARG_ANYTHING,
 };
+
+BPF_CALL_1(bpf_xdp_get_buff_len, struct  xdp_buff*, xdp)
+{
+       return xdp_get_buff_len(xdp);
+}
+
+static const struct bpf_func_proto bpf_xdp_get_buff_len_proto = {
+       .func           = bpf_xdp_get_buff_len,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+};
+
+BTF_ID_LIST_SINGLE(bpf_xdp_get_buff_len_bpf_ids, struct, xdp_buff)
+
+const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto = {
+       .func           = bpf_xdp_get_buff_len,
+       .gpl_only       = false,
+       .arg1_type      = ARG_PTR_TO_BTF_ID,
+       .arg1_btf_id    = &bpf_xdp_get_buff_len_bpf_ids[0],
+};
+
 static unsigned long xdp_get_metalen(const struct xdp_buff *xdp)
 {
        return xdp_data_meta_unsupported(xdp) ? 0 :
@@ -3817,11 +3839,208 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
        .arg2_type      = ARG_ANYTHING,
 };
 
+static void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off,
+                            void *buf, unsigned long len, bool flush)
+{
+       unsigned long ptr_len, ptr_off = 0;
+       skb_frag_t *next_frag, *end_frag;
+       struct skb_shared_info *sinfo;
+       void *src, *dst;
+       u8 *ptr_buf;
+
+       if (likely(xdp->data_end - xdp->data >= off + len)) {
+               src = flush ? buf : xdp->data + off;
+               dst = flush ? xdp->data + off : buf;
+               memcpy(dst, src, len);
+               return;
+       }
+
+       sinfo = xdp_get_shared_info_from_buff(xdp);
+       end_frag = &sinfo->frags[sinfo->nr_frags];
+       next_frag = &sinfo->frags[0];
+
+       ptr_len = xdp->data_end - xdp->data;
+       ptr_buf = xdp->data;
+
+       while (true) {
+               if (off < ptr_off + ptr_len) {
+                       unsigned long copy_off = off - ptr_off;
+                       unsigned long copy_len = min(len, ptr_len - copy_off);
+
+                       src = flush ? buf : ptr_buf + copy_off;
+                       dst = flush ? ptr_buf + copy_off : buf;
+                       memcpy(dst, src, copy_len);
+
+                       off += copy_len;
+                       len -= copy_len;
+                       buf += copy_len;
+               }
+
+               if (!len || next_frag == end_frag)
+                       break;
+
+               ptr_off += ptr_len;
+               ptr_buf = skb_frag_address(next_frag);
+               ptr_len = skb_frag_size(next_frag);
+               next_frag++;
+       }
+}
+
+static void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len)
+{
+       struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
+       u32 size = xdp->data_end - xdp->data;
+       void *addr = xdp->data;
+       int i;
+
+       if (unlikely(offset > 0xffff || len > 0xffff))
+               return ERR_PTR(-EFAULT);
+
+       if (offset + len > xdp_get_buff_len(xdp))
+               return ERR_PTR(-EINVAL);
+
+       if (offset < size) /* linear area */
+               goto out;
+
+       offset -= size;
+       for (i = 0; i < sinfo->nr_frags; i++) { /* paged area */
+               u32 frag_size = skb_frag_size(&sinfo->frags[i]);
+
+               if  (offset < frag_size) {
+                       addr = skb_frag_address(&sinfo->frags[i]);
+                       size = frag_size;
+                       break;
+               }
+               offset -= frag_size;
+       }
+out:
+       return offset + len < size ? addr + offset : NULL;
+}
+
+BPF_CALL_4(bpf_xdp_load_bytes, struct xdp_buff *, xdp, u32, offset,
+          void *, buf, u32, len)
+{
+       void *ptr;
+
+       ptr = bpf_xdp_pointer(xdp, offset, len);
+       if (IS_ERR(ptr))
+               return PTR_ERR(ptr);
+
+       if (!ptr)
+               bpf_xdp_copy_buf(xdp, offset, buf, len, false);
+       else
+               memcpy(buf, ptr, len);
+
+       return 0;
+}
+
+static const struct bpf_func_proto bpf_xdp_load_bytes_proto = {
+       .func           = bpf_xdp_load_bytes,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_ANYTHING,
+       .arg3_type      = ARG_PTR_TO_UNINIT_MEM,
+       .arg4_type      = ARG_CONST_SIZE,
+};
+
+BPF_CALL_4(bpf_xdp_store_bytes, struct xdp_buff *, xdp, u32, offset,
+          void *, buf, u32, len)
+{
+       void *ptr;
+
+       ptr = bpf_xdp_pointer(xdp, offset, len);
+       if (IS_ERR(ptr))
+               return PTR_ERR(ptr);
+
+       if (!ptr)
+               bpf_xdp_copy_buf(xdp, offset, buf, len, true);
+       else
+               memcpy(ptr, buf, len);
+
+       return 0;
+}
+
+static const struct bpf_func_proto bpf_xdp_store_bytes_proto = {
+       .func           = bpf_xdp_store_bytes,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_ANYTHING,
+       .arg3_type      = ARG_PTR_TO_UNINIT_MEM,
+       .arg4_type      = ARG_CONST_SIZE,
+};
+
+static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset)
+{
+       struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
+       skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags - 1];
+       struct xdp_rxq_info *rxq = xdp->rxq;
+       unsigned int tailroom;
+
+       if (!rxq->frag_size || rxq->frag_size > xdp->frame_sz)
+               return -EOPNOTSUPP;
+
+       tailroom = rxq->frag_size - skb_frag_size(frag) - skb_frag_off(frag);
+       if (unlikely(offset > tailroom))
+               return -EINVAL;
+
+       memset(skb_frag_address(frag) + skb_frag_size(frag), 0, offset);
+       skb_frag_size_add(frag, offset);
+       sinfo->xdp_frags_size += offset;
+
+       return 0;
+}
+
+static int bpf_xdp_frags_shrink_tail(struct xdp_buff *xdp, int offset)
+{
+       struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
+       int i, n_frags_free = 0, len_free = 0;
+
+       if (unlikely(offset > (int)xdp_get_buff_len(xdp) - ETH_HLEN))
+               return -EINVAL;
+
+       for (i = sinfo->nr_frags - 1; i >= 0 && offset > 0; i--) {
+               skb_frag_t *frag = &sinfo->frags[i];
+               int shrink = min_t(int, offset, skb_frag_size(frag));
+
+               len_free += shrink;
+               offset -= shrink;
+
+               if (skb_frag_size(frag) == shrink) {
+                       struct page *page = skb_frag_page(frag);
+
+                       __xdp_return(page_address(page), &xdp->rxq->mem,
+                                    false, NULL);
+                       n_frags_free++;
+               } else {
+                       skb_frag_size_sub(frag, shrink);
+                       break;
+               }
+       }
+       sinfo->nr_frags -= n_frags_free;
+       sinfo->xdp_frags_size -= len_free;
+
+       if (unlikely(!sinfo->nr_frags)) {
+               xdp_buff_clear_frags_flag(xdp);
+               xdp->data_end -= offset;
+       }
+
+       return 0;
+}
+
 BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset)
 {
        void *data_hard_end = xdp_data_hard_end(xdp); /* use xdp->frame_sz */
        void *data_end = xdp->data_end + offset;
 
+       if (unlikely(xdp_buff_has_frags(xdp))) { /* non-linear xdp buff */
+               if (offset < 0)
+                       return bpf_xdp_frags_shrink_tail(xdp, -offset);
+
+               return bpf_xdp_frags_increase_tail(xdp, offset);
+       }
+
        /* Notice that xdp_data_hard_end have reserved some tailroom */
        if (unlikely(data_end > data_hard_end))
                return -EINVAL;
@@ -4047,6 +4266,14 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
        struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
        enum bpf_map_type map_type = ri->map_type;
 
+       /* XDP_REDIRECT is not fully supported yet for xdp frags since
+        * not all XDP capable drivers can map non-linear xdp_frame in
+        * ndo_xdp_xmit.
+        */
+       if (unlikely(xdp_buff_has_frags(xdp) &&
+                    map_type != BPF_MAP_TYPE_CPUMAP))
+               return -EOPNOTSUPP;
+
        if (map_type == BPF_MAP_TYPE_XSKMAP)
                return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog);
 
@@ -4590,10 +4817,12 @@ static const struct bpf_func_proto bpf_sk_ancestor_cgroup_id_proto = {
 };
 #endif
 
-static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff,
+static unsigned long bpf_xdp_copy(void *dst, const void *ctx,
                                  unsigned long off, unsigned long len)
 {
-       memcpy(dst_buff, src_buff + off, len);
+       struct xdp_buff *xdp = (struct xdp_buff *)ctx;
+
+       bpf_xdp_copy_buf(xdp, off, dst, len, false);
        return 0;
 }
 
@@ -4604,11 +4833,11 @@ BPF_CALL_5(bpf_xdp_event_output, struct xdp_buff *, xdp, struct bpf_map *, map,
 
        if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
                return -EINVAL;
-       if (unlikely(!xdp ||
-                    xdp_size > (unsigned long)(xdp->data_end - xdp->data)))
+
+       if (unlikely(!xdp || xdp_size > xdp_get_buff_len(xdp)))
                return -EFAULT;
 
-       return bpf_event_output(map, flags, meta, meta_size, xdp->data,
+       return bpf_event_output(map, flags, meta, meta_size, xdp,
                                xdp_size, bpf_xdp_copy);
 }
 
@@ -4862,6 +5091,13 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
                case SO_REUSEPORT:
                        sk->sk_reuseport = valbool;
                        break;
+               case SO_TXREHASH:
+                       if (val < -1 || val > 1) {
+                               ret = -EINVAL;
+                               break;
+                       }
+                       sk->sk_txrehash = (u8)val;
+                       break;
                default:
                        ret = -EINVAL;
                }
@@ -5040,6 +5276,9 @@ static int _bpf_getsockopt(struct sock *sk, int level, int optname,
                case SO_REUSEPORT:
                        *((int *)optval) = sk->sk_reuseport;
                        break;
+               case SO_TXREHASH:
+                       *((int *)optval) = sk->sk_txrehash;
+                       break;
                default:
                        goto err_clear;
                }
@@ -7533,6 +7772,12 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                return &bpf_xdp_redirect_map_proto;
        case BPF_FUNC_xdp_adjust_tail:
                return &bpf_xdp_adjust_tail_proto;
+       case BPF_FUNC_xdp_get_buff_len:
+               return &bpf_xdp_get_buff_len_proto;
+       case BPF_FUNC_xdp_load_bytes:
+               return &bpf_xdp_load_bytes_proto;
+       case BPF_FUNC_xdp_store_bytes:
+               return &bpf_xdp_store_bytes_proto;
        case BPF_FUNC_fib_lookup:
                return &bpf_xdp_fib_lookup_proto;
        case BPF_FUNC_check_mtu:
@@ -10062,7 +10307,6 @@ const struct bpf_verifier_ops tc_cls_act_verifier_ops = {
        .convert_ctx_access     = tc_cls_act_convert_ctx_access,
        .gen_prologue           = tc_cls_act_prologue,
        .gen_ld_abs             = bpf_gen_ld_abs,
-       .check_kfunc_call       = bpf_prog_test_check_kfunc_call,
 };
 
 const struct bpf_prog_ops tc_cls_act_prog_ops = {
index a5b5bb9..8711350 100644 (file)
@@ -301,6 +301,7 @@ struct net *get_net_ns_by_id(const struct net *net, int id)
 
        return peer;
 }
+EXPORT_SYMBOL_GPL(get_net_ns_by_id);
 
 /*
  * setup_net runs the initializers for the network namespace object.
@@ -363,6 +364,8 @@ out_undo:
 static int __net_init net_defaults_init_net(struct net *net)
 {
        net->core.sysctl_somaxconn = SOMAXCONN;
+       net->core.sysctl_txrehash = SOCK_TXREHASH_ENABLED;
+
        return 0;
 }
 
index bd62c01..e25d359 100644 (file)
@@ -423,11 +423,6 @@ static __always_inline struct page *
 __page_pool_put_page(struct page_pool *pool, struct page *page,
                     unsigned int dma_sync_size, bool allow_direct)
 {
-       /* It is not the last user for the page frag case */
-       if (pool->p.flags & PP_FLAG_PAGE_FRAG &&
-           page_pool_atomic_sub_frag_count_return(page, 1))
-               return NULL;
-
        /* This allocator is optimized for the XDP mode that uses
         * one-frame-per-page, but have fallbacks that act like the
         * regular page allocator APIs.
@@ -471,8 +466,8 @@ __page_pool_put_page(struct page_pool *pool, struct page *page,
        return NULL;
 }
 
-void page_pool_put_page(struct page_pool *pool, struct page *page,
-                       unsigned int dma_sync_size, bool allow_direct)
+void page_pool_put_defragged_page(struct page_pool *pool, struct page *page,
+                                 unsigned int dma_sync_size, bool allow_direct)
 {
        page = __page_pool_put_page(pool, page, dma_sync_size, allow_direct);
        if (page && !page_pool_recycle_in_ring(pool, page)) {
@@ -480,7 +475,7 @@ void page_pool_put_page(struct page_pool *pool, struct page *page,
                page_pool_return_page(pool, page);
        }
 }
-EXPORT_SYMBOL(page_pool_put_page);
+EXPORT_SYMBOL(page_pool_put_defragged_page);
 
 /* Caller must not use data area after call, as this function overwrites it */
 void page_pool_put_page_bulk(struct page_pool *pool, void **data,
@@ -491,6 +486,10 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
        for (i = 0; i < count; i++) {
                struct page *page = virt_to_head_page(data[i]);
 
+               /* It is not the last user for the page frag case */
+               if (!page_pool_is_last_frag(pool, page))
+                       continue;
+
                page = __page_pool_put_page(pool, page, -1, false);
                /* Approved for bulk recycling in ptr_ring cache */
                if (page)
@@ -526,8 +525,7 @@ static struct page *page_pool_drain_frag(struct page_pool *pool,
        long drain_count = BIAS_MAX - pool->frag_users;
 
        /* Some user is still using the page frag */
-       if (likely(page_pool_atomic_sub_frag_count_return(page,
-                                                         drain_count)))
+       if (likely(page_pool_defrag_page(page, drain_count)))
                return NULL;
 
        if (page_ref_count(page) == 1 && !page_is_pfmemalloc(page)) {
@@ -548,8 +546,7 @@ static void page_pool_free_frag(struct page_pool *pool)
 
        pool->frag_page = NULL;
 
-       if (!page ||
-           page_pool_atomic_sub_frag_count_return(page, drain_count))
+       if (!page || page_pool_defrag_page(page, drain_count))
                return;
 
        page_pool_return_page(pool, page);
@@ -588,7 +585,7 @@ frag_reset:
                pool->frag_users = 1;
                *offset = 0;
                pool->frag_offset = size;
-               page_pool_set_frag_count(page, BIAS_MAX);
+               page_pool_fragment_page(page, BIAS_MAX);
                return page;
        }
 
index 4ff806d..09d31a7 100644 (file)
@@ -1447,6 +1447,15 @@ set_sndbuf:
                break;
        }
 
+       case SO_TXREHASH:
+               if (val < -1 || val > 1) {
+                       ret = -EINVAL;
+                       break;
+               }
+               /* Paired with READ_ONCE() in tcp_rtx_synack() */
+               WRITE_ONCE(sk->sk_txrehash, (u8)val);
+               break;
+
        default:
                ret = -ENOPROTOOPT;
                break;
@@ -1834,6 +1843,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
                v.val = sk->sk_reserved_mem;
                break;
 
+       case SO_TXREHASH:
+               v.val = sk->sk_txrehash;
+               break;
+
        default:
                /* We implement the SO_SNDLOWAT etc to not be settable
                 * (1003.1g 7).
@@ -2266,6 +2279,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
                        sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
                        /* pairs with the WRITE_ONCE() in netif_set_gso_max_size() */
                        sk->sk_gso_max_size = READ_ONCE(dst->dev->gso_max_size);
+                       sk->sk_gso_max_size -= (MAX_TCP_HEADER + 1);
                        /* pairs with the WRITE_ONCE() in netif_set_gso_max_segs() */
                        max_segs = max_t(u32, READ_ONCE(dst->dev->gso_max_segs), 1);
                }
@@ -2611,7 +2625,8 @@ int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg,
 
        switch (cmsg->cmsg_type) {
        case SO_MARK:
-               if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
+               if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
+                   !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
                        return -EPERM;
                if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
                        return -EINVAL;
@@ -3278,6 +3293,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
        sk->sk_pacing_rate = ~0UL;
        WRITE_ONCE(sk->sk_pacing_shift, 10);
        sk->sk_incoming_cpu = -1;
+       sk->sk_txrehash = SOCK_TXREHASH_DEFAULT;
 
        sk_rx_queue_clear(sk);
        /*
index 1827669..2d213c4 100644 (file)
@@ -1416,38 +1416,50 @@ static struct sk_psock_progs *sock_map_progs(struct bpf_map *map)
        return NULL;
 }
 
-static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
-                               struct bpf_prog *old, u32 which)
+static int sock_map_prog_lookup(struct bpf_map *map, struct bpf_prog ***pprog,
+                               u32 which)
 {
        struct sk_psock_progs *progs = sock_map_progs(map);
-       struct bpf_prog **pprog;
 
        if (!progs)
                return -EOPNOTSUPP;
 
        switch (which) {
        case BPF_SK_MSG_VERDICT:
-               pprog = &progs->msg_parser;
+               *pprog = &progs->msg_parser;
                break;
 #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
        case BPF_SK_SKB_STREAM_PARSER:
-               pprog = &progs->stream_parser;
+               *pprog = &progs->stream_parser;
                break;
 #endif
        case BPF_SK_SKB_STREAM_VERDICT:
                if (progs->skb_verdict)
                        return -EBUSY;
-               pprog = &progs->stream_verdict;
+               *pprog = &progs->stream_verdict;
                break;
        case BPF_SK_SKB_VERDICT:
                if (progs->stream_verdict)
                        return -EBUSY;
-               pprog = &progs->skb_verdict;
+               *pprog = &progs->skb_verdict;
                break;
        default:
                return -EOPNOTSUPP;
        }
 
+       return 0;
+}
+
+static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
+                               struct bpf_prog *old, u32 which)
+{
+       struct bpf_prog **pprog;
+       int ret;
+
+       ret = sock_map_prog_lookup(map, &pprog, which);
+       if (ret)
+               return ret;
+
        if (old)
                return psock_replace_prog(pprog, prog, old);
 
@@ -1455,6 +1467,57 @@ static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
        return 0;
 }
 
+int sock_map_bpf_prog_query(const union bpf_attr *attr,
+                           union bpf_attr __user *uattr)
+{
+       __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
+       u32 prog_cnt = 0, flags = 0, ufd = attr->target_fd;
+       struct bpf_prog **pprog;
+       struct bpf_prog *prog;
+       struct bpf_map *map;
+       struct fd f;
+       u32 id = 0;
+       int ret;
+
+       if (attr->query.query_flags)
+               return -EINVAL;
+
+       f = fdget(ufd);
+       map = __bpf_map_get(f);
+       if (IS_ERR(map))
+               return PTR_ERR(map);
+
+       rcu_read_lock();
+
+       ret = sock_map_prog_lookup(map, &pprog, attr->query.attach_type);
+       if (ret)
+               goto end;
+
+       prog = *pprog;
+       prog_cnt = !prog ? 0 : 1;
+
+       if (!attr->query.prog_cnt || !prog_ids || !prog_cnt)
+               goto end;
+
+       /* we do not hold the refcnt, the bpf prog may be released
+        * asynchronously and the id would be set to 0.
+        */
+       id = data_race(prog->aux->id);
+       if (id == 0)
+               prog_cnt = 0;
+
+end:
+       rcu_read_unlock();
+
+       if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)) ||
+           (id != 0 && copy_to_user(prog_ids, &id, sizeof(u32))) ||
+           copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt)))
+               ret = -EFAULT;
+
+       fdput(f);
+       return ret;
+}
+
 static void sock_map_unlink(struct sock *sk, struct sk_psock_link *link)
 {
        switch (link->map->map_type) {
index 7b4d485..dbeb8ec 100644 (file)
@@ -593,6 +593,15 @@ static struct ctl_table netns_core_table[] = {
                .extra1         = SYSCTL_ZERO,
                .proc_handler   = proc_dointvec_minmax
        },
+       {
+               .procname       = "txrehash",
+               .data           = &init_net.core.sysctl_txrehash,
+               .maxlen         = sizeof(u8),
+               .mode           = 0644,
+               .extra1         = SYSCTL_ZERO,
+               .extra2         = SYSCTL_ONE,
+               .proc_handler   = proc_dou8vec_minmax,
+       },
        { }
 };
 
@@ -611,7 +620,7 @@ __setup("fb_tunnels=", fb_tunnels_only_for_init_net_sysctl_setup);
 
 static __net_init int sysctl_core_net_init(struct net *net)
 {
-       struct ctl_table *tbl;
+       struct ctl_table *tbl, *tmp;
 
        tbl = netns_core_table;
        if (!net_eq(net, &init_net)) {
@@ -619,7 +628,8 @@ static __net_init int sysctl_core_net_init(struct net *net)
                if (tbl == NULL)
                        goto err_dup;
 
-               tbl[0].data = &net->core.sysctl_somaxconn;
+               for (tmp = tbl; tmp->procname; tmp++)
+                       tmp->data += (char *)net - (char *)&init_net;
 
                /* Don't export any sysctls to unprivileged users */
                if (net->user_ns != &init_user_ns) {
index 7aba355..361df31 100644 (file)
@@ -162,8 +162,9 @@ static void xdp_rxq_info_init(struct xdp_rxq_info *xdp_rxq)
 }
 
 /* Returns 0 on success, negative on failure */
-int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
-                    struct net_device *dev, u32 queue_index, unsigned int napi_id)
+int __xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
+                      struct net_device *dev, u32 queue_index,
+                      unsigned int napi_id, u32 frag_size)
 {
        if (!dev) {
                WARN(1, "Missing net_device from driver");
@@ -185,11 +186,12 @@ int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
        xdp_rxq->dev = dev;
        xdp_rxq->queue_index = queue_index;
        xdp_rxq->napi_id = napi_id;
+       xdp_rxq->frag_size = frag_size;
 
        xdp_rxq->reg_state = REG_STATE_REGISTERED;
        return 0;
 }
-EXPORT_SYMBOL_GPL(xdp_rxq_info_reg);
+EXPORT_SYMBOL_GPL(__xdp_rxq_info_reg);
 
 void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq)
 {
@@ -369,8 +371,8 @@ EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);
  * is used for those calls sites.  Thus, allowing for faster recycling
  * of xdp_frames/pages in those cases.
  */
-static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
-                        struct xdp_buff *xdp)
+void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
+                 struct xdp_buff *xdp)
 {
        struct xdp_mem_allocator *xa;
        struct page *page;
@@ -406,12 +408,38 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
 
 void xdp_return_frame(struct xdp_frame *xdpf)
 {
+       struct skb_shared_info *sinfo;
+       int i;
+
+       if (likely(!xdp_frame_has_frags(xdpf)))
+               goto out;
+
+       sinfo = xdp_get_shared_info_from_frame(xdpf);
+       for (i = 0; i < sinfo->nr_frags; i++) {
+               struct page *page = skb_frag_page(&sinfo->frags[i]);
+
+               __xdp_return(page_address(page), &xdpf->mem, false, NULL);
+       }
+out:
        __xdp_return(xdpf->data, &xdpf->mem, false, NULL);
 }
 EXPORT_SYMBOL_GPL(xdp_return_frame);
 
 void xdp_return_frame_rx_napi(struct xdp_frame *xdpf)
 {
+       struct skb_shared_info *sinfo;
+       int i;
+
+       if (likely(!xdp_frame_has_frags(xdpf)))
+               goto out;
+
+       sinfo = xdp_get_shared_info_from_frame(xdpf);
+       for (i = 0; i < sinfo->nr_frags; i++) {
+               struct page *page = skb_frag_page(&sinfo->frags[i]);
+
+               __xdp_return(page_address(page), &xdpf->mem, true, NULL);
+       }
+out:
        __xdp_return(xdpf->data, &xdpf->mem, true, NULL);
 }
 EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi);
@@ -447,7 +475,7 @@ void xdp_return_frame_bulk(struct xdp_frame *xdpf,
        struct xdp_mem_allocator *xa;
 
        if (mem->type != MEM_TYPE_PAGE_POOL) {
-               __xdp_return(xdpf->data, &xdpf->mem, false, NULL);
+               xdp_return_frame(xdpf);
                return;
        }
 
@@ -466,12 +494,38 @@ void xdp_return_frame_bulk(struct xdp_frame *xdpf,
                bq->xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
        }
 
+       if (unlikely(xdp_frame_has_frags(xdpf))) {
+               struct skb_shared_info *sinfo;
+               int i;
+
+               sinfo = xdp_get_shared_info_from_frame(xdpf);
+               for (i = 0; i < sinfo->nr_frags; i++) {
+                       skb_frag_t *frag = &sinfo->frags[i];
+
+                       bq->q[bq->count++] = skb_frag_address(frag);
+                       if (bq->count == XDP_BULK_QUEUE_SIZE)
+                               xdp_flush_frame_bulk(bq);
+               }
+       }
        bq->q[bq->count++] = xdpf->data;
 }
 EXPORT_SYMBOL_GPL(xdp_return_frame_bulk);
 
 void xdp_return_buff(struct xdp_buff *xdp)
 {
+       struct skb_shared_info *sinfo;
+       int i;
+
+       if (likely(!xdp_buff_has_frags(xdp)))
+               goto out;
+
+       sinfo = xdp_get_shared_info_from_buff(xdp);
+       for (i = 0; i < sinfo->nr_frags; i++) {
+               struct page *page = skb_frag_page(&sinfo->frags[i]);
+
+               __xdp_return(page_address(page), &xdp->rxq->mem, true, xdp);
+       }
+out:
        __xdp_return(xdp->data, &xdp->rxq->mem, true, xdp);
 }
 
@@ -561,8 +615,14 @@ struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
                                           struct sk_buff *skb,
                                           struct net_device *dev)
 {
+       struct skb_shared_info *sinfo = xdp_get_shared_info_from_frame(xdpf);
        unsigned int headroom, frame_size;
        void *hard_start;
+       u8 nr_frags;
+
+       /* xdp frags frame */
+       if (unlikely(xdp_frame_has_frags(xdpf)))
+               nr_frags = sinfo->nr_frags;
 
        /* Part of headroom was reserved to xdpf */
        headroom = sizeof(*xdpf) + xdpf->headroom;
@@ -582,6 +642,12 @@ struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
        if (xdpf->metasize)
                skb_metadata_set(skb, xdpf->metasize);
 
+       if (unlikely(xdp_frame_has_frags(xdpf)))
+               xdp_update_skb_shared_info(skb, nr_frags,
+                                          sinfo->xdp_frags_size,
+                                          nr_frags * xdpf->frame_sz,
+                                          xdp_frame_is_frag_pfmemalloc(xdpf));
+
        /* Essential SKB info: protocol and skb->dev */
        skb->protocol = eth_type_trans(skb, dev);
 
index 5183e62..671c377 100644 (file)
@@ -136,11 +136,6 @@ static inline int between48(const u64 seq1, const u64 seq2, const u64 seq3)
        return (seq3 << 16) - (seq2 << 16) >= (seq1 << 16) - (seq2 << 16);
 }
 
-static inline u64 max48(const u64 seq1, const u64 seq2)
-{
-       return after48(seq1, seq2) ? seq1 : seq2;
-}
-
 /**
  * dccp_loss_count - Approximate the number of lost data packets in a burst loss
  * @s1:  last known sequence number before the loss ('hole')
index 0ea2927..ae66256 100644 (file)
@@ -1030,15 +1030,9 @@ static void __net_exit dccp_v4_exit_net(struct net *net)
        inet_ctl_sock_destroy(pn->v4_ctl_sk);
 }
 
-static void __net_exit dccp_v4_exit_batch(struct list_head *net_exit_list)
-{
-       inet_twsk_purge(&dccp_hashinfo, AF_INET);
-}
-
 static struct pernet_operations dccp_v4_ops = {
        .init   = dccp_v4_init_net,
        .exit   = dccp_v4_exit_net,
-       .exit_batch = dccp_v4_exit_batch,
        .id     = &dccp_v4_pernet_id,
        .size   = sizeof(struct dccp_v4_pernet),
 };
index fa66351..eab3bd1 100644 (file)
@@ -1115,15 +1115,9 @@ static void __net_exit dccp_v6_exit_net(struct net *net)
        inet_ctl_sock_destroy(pn->v6_ctl_sk);
 }
 
-static void __net_exit dccp_v6_exit_batch(struct list_head *net_exit_list)
-{
-       inet_twsk_purge(&dccp_hashinfo, AF_INET6);
-}
-
 static struct pernet_operations dccp_v6_ops = {
        .init   = dccp_v6_init_net,
        .exit   = dccp_v6_exit_net,
-       .exit_batch = dccp_v6_exit_batch,
        .id     = &dccp_v6_pernet_id,
        .size   = sizeof(struct dccp_v6_pernet),
 };
index 91e7a22..64d805b 100644 (file)
@@ -22,6 +22,7 @@
 #include "feat.h"
 
 struct inet_timewait_death_row dccp_death_row = {
+       .tw_refcount = REFCOUNT_INIT(1),
        .sysctl_max_tw_buckets = NR_FILE * 2,
        .hashinfo       = &dccp_hashinfo,
 };
index 3d21521..909b045 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/of.h>
 #include <linux/of_net.h>
 #include <net/devlink.h>
+#include <net/sch_generic.h>
 
 #include "dsa_priv.h"
 
@@ -1064,9 +1065,18 @@ static int dsa_tree_setup_master(struct dsa_switch_tree *dst)
 
        list_for_each_entry(dp, &dst->ports, list) {
                if (dsa_port_is_cpu(dp)) {
-                       err = dsa_master_setup(dp->master, dp);
+                       struct net_device *master = dp->master;
+                       bool admin_up = (master->flags & IFF_UP) &&
+                                       !qdisc_tx_is_noop(master);
+
+                       err = dsa_master_setup(master, dp);
                        if (err)
                                return err;
+
+                       /* Replay master state event */
+                       dsa_tree_master_admin_state_change(dst, master, admin_up);
+                       dsa_tree_master_oper_state_change(dst, master,
+                                                         netif_oper_up(master));
                }
        }
 
@@ -1081,9 +1091,19 @@ static void dsa_tree_teardown_master(struct dsa_switch_tree *dst)
 
        rtnl_lock();
 
-       list_for_each_entry(dp, &dst->ports, list)
-               if (dsa_port_is_cpu(dp))
-                       dsa_master_teardown(dp->master);
+       list_for_each_entry(dp, &dst->ports, list) {
+               if (dsa_port_is_cpu(dp)) {
+                       struct net_device *master = dp->master;
+
+                       /* Synthesizing an "admin down" state is sufficient for
+                        * the switches to get a notification if the master is
+                        * currently up and running.
+                        */
+                       dsa_tree_master_admin_state_change(dst, master, false);
+
+                       dsa_master_teardown(master);
+               }
+       }
 
        rtnl_unlock();
 }
@@ -1279,6 +1299,52 @@ out_unlock:
        return err;
 }
 
+static void dsa_tree_master_state_change(struct dsa_switch_tree *dst,
+                                        struct net_device *master)
+{
+       struct dsa_notifier_master_state_info info;
+       struct dsa_port *cpu_dp = master->dsa_ptr;
+
+       info.master = master;
+       info.operational = dsa_port_master_is_operational(cpu_dp);
+
+       dsa_tree_notify(dst, DSA_NOTIFIER_MASTER_STATE_CHANGE, &info);
+}
+
+void dsa_tree_master_admin_state_change(struct dsa_switch_tree *dst,
+                                       struct net_device *master,
+                                       bool up)
+{
+       struct dsa_port *cpu_dp = master->dsa_ptr;
+       bool notify = false;
+
+       if ((dsa_port_master_is_operational(cpu_dp)) !=
+           (up && cpu_dp->master_oper_up))
+               notify = true;
+
+       cpu_dp->master_admin_up = up;
+
+       if (notify)
+               dsa_tree_master_state_change(dst, master);
+}
+
+void dsa_tree_master_oper_state_change(struct dsa_switch_tree *dst,
+                                      struct net_device *master,
+                                      bool up)
+{
+       struct dsa_port *cpu_dp = master->dsa_ptr;
+       bool notify = false;
+
+       if ((dsa_port_master_is_operational(cpu_dp)) !=
+           (cpu_dp->master_admin_up && up))
+               notify = true;
+
+       cpu_dp->master_oper_up = up;
+
+       if (notify)
+               dsa_tree_master_state_change(dst, master);
+}
+
 static struct dsa_port *dsa_port_touch(struct dsa_switch *ds, int index)
 {
        struct dsa_switch_tree *dst = ds->dst;
index 760306f..2bbfa9e 100644 (file)
@@ -40,6 +40,7 @@ enum {
        DSA_NOTIFIER_TAG_PROTO_DISCONNECT,
        DSA_NOTIFIER_TAG_8021Q_VLAN_ADD,
        DSA_NOTIFIER_TAG_8021Q_VLAN_DEL,
+       DSA_NOTIFIER_MASTER_STATE_CHANGE,
 };
 
 /* DSA_NOTIFIER_AGEING_TIME */
@@ -109,6 +110,12 @@ struct dsa_notifier_tag_8021q_vlan_info {
        u16 vid;
 };
 
+/* DSA_NOTIFIER_MASTER_STATE_CHANGE */
+struct dsa_notifier_master_state_info {
+       const struct net_device *master;
+       bool operational;
+};
+
 struct dsa_switchdev_event_work {
        struct dsa_switch *ds;
        int port;
@@ -482,6 +489,12 @@ int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst,
                              struct net_device *master,
                              const struct dsa_device_ops *tag_ops,
                              const struct dsa_device_ops *old_tag_ops);
+void dsa_tree_master_admin_state_change(struct dsa_switch_tree *dst,
+                                       struct net_device *master,
+                                       bool up);
+void dsa_tree_master_oper_state_change(struct dsa_switch_tree *dst,
+                                      struct net_device *master,
+                                      bool up);
 unsigned int dsa_bridge_num_get(const struct net_device *bridge_dev, int max);
 void dsa_bridge_num_put(const struct net_device *bridge_dev,
                        unsigned int bridge_num);
index 22241af..2b5b0f2 100644 (file)
@@ -2346,6 +2346,36 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb,
                err = dsa_port_lag_change(dp, info->lower_state_info);
                return notifier_from_errno(err);
        }
+       case NETDEV_CHANGE:
+       case NETDEV_UP: {
+               /* Track state of master port.
+                * DSA driver may require the master port (and indirectly
+                * the tagger) to be available for some special operation.
+                */
+               if (netdev_uses_dsa(dev)) {
+                       struct dsa_port *cpu_dp = dev->dsa_ptr;
+                       struct dsa_switch_tree *dst = cpu_dp->ds->dst;
+
+                       /* Track when the master port is UP */
+                       dsa_tree_master_oper_state_change(dst, dev,
+                                                         netif_oper_up(dev));
+
+                       /* Track when the master port is ready and can accept
+                        * packet.
+                        * NETDEV_UP event is not enough to flag a port as ready.
+                        * We also have to wait for linkwatch_do_dev to dev_activate
+                        * and emit a NETDEV_CHANGE event.
+                        * We check if a master port is ready by checking if the dev
+                        * have a qdisc assigned and is not noop.
+                        */
+                       dsa_tree_master_admin_state_change(dst, dev,
+                                                          !qdisc_tx_is_noop(dev));
+
+                       return NOTIFY_OK;
+               }
+
+               return NOTIFY_DONE;
+       }
        case NETDEV_GOING_DOWN: {
                struct dsa_port *dp, *cpu_dp;
                struct dsa_switch_tree *dst;
@@ -2357,6 +2387,8 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb,
                cpu_dp = dev->dsa_ptr;
                dst = cpu_dp->ds->dst;
 
+               dsa_tree_master_admin_state_change(dst, dev, false);
+
                list_for_each_entry(dp, &dst->ports, list) {
                        if (!dsa_port_is_user(dp))
                                continue;
index e3c7d26..4866b58 100644 (file)
@@ -113,26 +113,15 @@ static int dsa_switch_bridge_join(struct dsa_switch *ds,
        return dsa_tag_8021q_bridge_join(ds, info);
 }
 
-static int dsa_switch_bridge_leave(struct dsa_switch *ds,
-                                  struct dsa_notifier_bridge_info *info)
+static int dsa_switch_sync_vlan_filtering(struct dsa_switch *ds,
+                                         struct dsa_notifier_bridge_info *info)
 {
-       struct dsa_switch_tree *dst = ds->dst;
        struct netlink_ext_ack extack = {0};
        bool change_vlan_filtering = false;
        bool vlan_filtering;
        struct dsa_port *dp;
        int err;
 
-       if (dst->index == info->tree_index && ds->index == info->sw_index &&
-           ds->ops->port_bridge_leave)
-               ds->ops->port_bridge_leave(ds, info->port, info->bridge);
-
-       if ((dst->index != info->tree_index || ds->index != info->sw_index) &&
-           ds->ops->crosschip_bridge_leave)
-               ds->ops->crosschip_bridge_leave(ds, info->tree_index,
-                                               info->sw_index, info->port,
-                                               info->bridge);
-
        if (ds->needs_standalone_vlan_filtering &&
            !br_vlan_enabled(info->bridge.dev)) {
                change_vlan_filtering = true;
@@ -172,6 +161,31 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds,
                        return err;
        }
 
+       return 0;
+}
+
+static int dsa_switch_bridge_leave(struct dsa_switch *ds,
+                                  struct dsa_notifier_bridge_info *info)
+{
+       struct dsa_switch_tree *dst = ds->dst;
+       int err;
+
+       if (dst->index == info->tree_index && ds->index == info->sw_index &&
+           ds->ops->port_bridge_leave)
+               ds->ops->port_bridge_leave(ds, info->port, info->bridge);
+
+       if ((dst->index != info->tree_index || ds->index != info->sw_index) &&
+           ds->ops->crosschip_bridge_leave)
+               ds->ops->crosschip_bridge_leave(ds, info->tree_index,
+                                               info->sw_index, info->port,
+                                               info->bridge);
+
+       if (ds->dst->index == info->tree_index && ds->index == info->sw_index) {
+               err = dsa_switch_sync_vlan_filtering(ds, info);
+               if (err)
+                       return err;
+       }
+
        return dsa_tag_8021q_bridge_leave(ds, info);
 }
 
@@ -683,6 +697,18 @@ dsa_switch_disconnect_tag_proto(struct dsa_switch *ds,
        return 0;
 }
 
+static int
+dsa_switch_master_state_change(struct dsa_switch *ds,
+                              struct dsa_notifier_master_state_info *info)
+{
+       if (!ds->ops->master_state_change)
+               return 0;
+
+       ds->ops->master_state_change(ds, info->master, info->operational);
+
+       return 0;
+}
+
 static int dsa_switch_event(struct notifier_block *nb,
                            unsigned long event, void *info)
 {
@@ -756,6 +782,9 @@ static int dsa_switch_event(struct notifier_block *nb,
        case DSA_NOTIFIER_TAG_8021Q_VLAN_DEL:
                err = dsa_switch_tag_8021q_vlan_del(ds, info);
                break;
+       case DSA_NOTIFIER_MASTER_STATE_CHANGE:
+               err = dsa_switch_master_state_change(ds, info);
+               break;
        default:
                err = -EOPNOTSUPP;
                break;
index 1ea9401..57d2e00 100644 (file)
@@ -4,30 +4,12 @@
  */
 
 #include <linux/etherdevice.h>
+#include <linux/bitfield.h>
+#include <net/dsa.h>
+#include <linux/dsa/tag_qca.h>
 
 #include "dsa_priv.h"
 
-#define QCA_HDR_LEN    2
-#define QCA_HDR_VERSION        0x2
-
-#define QCA_HDR_RECV_VERSION_MASK      GENMASK(15, 14)
-#define QCA_HDR_RECV_VERSION_S         14
-#define QCA_HDR_RECV_PRIORITY_MASK     GENMASK(13, 11)
-#define QCA_HDR_RECV_PRIORITY_S                11
-#define QCA_HDR_RECV_TYPE_MASK         GENMASK(10, 6)
-#define QCA_HDR_RECV_TYPE_S            6
-#define QCA_HDR_RECV_FRAME_IS_TAGGED   BIT(3)
-#define QCA_HDR_RECV_SOURCE_PORT_MASK  GENMASK(2, 0)
-
-#define QCA_HDR_XMIT_VERSION_MASK      GENMASK(15, 14)
-#define QCA_HDR_XMIT_VERSION_S         14
-#define QCA_HDR_XMIT_PRIORITY_MASK     GENMASK(13, 11)
-#define QCA_HDR_XMIT_PRIORITY_S                11
-#define QCA_HDR_XMIT_CONTROL_MASK      GENMASK(10, 8)
-#define QCA_HDR_XMIT_CONTROL_S         8
-#define QCA_HDR_XMIT_FROM_CPU          BIT(7)
-#define QCA_HDR_XMIT_DP_BIT_MASK       GENMASK(6, 0)
-
 static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
 {
        struct dsa_port *dp = dsa_slave_to_port(dev);
@@ -40,8 +22,9 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
        phdr = dsa_etype_header_pos_tx(skb);
 
        /* Set the version field, and set destination port information */
-       hdr = QCA_HDR_VERSION << QCA_HDR_XMIT_VERSION_S |
-               QCA_HDR_XMIT_FROM_CPU | BIT(dp->index);
+       hdr = FIELD_PREP(QCA_HDR_XMIT_VERSION, QCA_HDR_VERSION);
+       hdr |= QCA_HDR_XMIT_FROM_CPU;
+       hdr |= FIELD_PREP(QCA_HDR_XMIT_DP_BIT, BIT(dp->index));
 
        *phdr = htons(hdr);
 
@@ -50,10 +33,17 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
 
 static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev)
 {
-       u8 ver;
-       u16  hdr;
-       int port;
+       struct qca_tagger_data *tagger_data;
+       struct dsa_port *dp = dev->dsa_ptr;
+       struct dsa_switch *ds = dp->ds;
+       u8 ver, pk_type;
        __be16 *phdr;
+       int port;
+       u16 hdr;
+
+       BUILD_BUG_ON(sizeof(struct qca_mgmt_ethhdr) != QCA_HDR_MGMT_HEADER_LEN + QCA_HDR_LEN);
+
+       tagger_data = ds->tagger_data;
 
        if (unlikely(!pskb_may_pull(skb, QCA_HDR_LEN)))
                return NULL;
@@ -62,16 +52,33 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev)
        hdr = ntohs(*phdr);
 
        /* Make sure the version is correct */
-       ver = (hdr & QCA_HDR_RECV_VERSION_MASK) >> QCA_HDR_RECV_VERSION_S;
+       ver = FIELD_GET(QCA_HDR_RECV_VERSION, hdr);
        if (unlikely(ver != QCA_HDR_VERSION))
                return NULL;
 
+       /* Get pk type */
+       pk_type = FIELD_GET(QCA_HDR_RECV_TYPE, hdr);
+
+       /* Ethernet mgmt read/write packet */
+       if (pk_type == QCA_HDR_RECV_TYPE_RW_REG_ACK) {
+               if (likely(tagger_data->rw_reg_ack_handler))
+                       tagger_data->rw_reg_ack_handler(ds, skb);
+               return NULL;
+       }
+
+       /* Ethernet MIB counter packet */
+       if (pk_type == QCA_HDR_RECV_TYPE_MIB) {
+               if (likely(tagger_data->mib_autocast_handler))
+                       tagger_data->mib_autocast_handler(ds, skb);
+               return NULL;
+       }
+
        /* Remove QCA tag and recalculate checksum */
        skb_pull_rcsum(skb, QCA_HDR_LEN);
        dsa_strip_etype_header(skb, QCA_HDR_LEN);
 
        /* Get source port information */
-       port = (hdr & QCA_HDR_RECV_SOURCE_PORT_MASK);
+       port = FIELD_GET(QCA_HDR_RECV_SOURCE_PORT, hdr);
 
        skb->dev = dsa_master_find_slave(dev, 0, port);
        if (!skb->dev)
@@ -80,12 +87,34 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev)
        return skb;
 }
 
+static int qca_tag_connect(struct dsa_switch *ds)
+{
+       struct qca_tagger_data *tagger_data;
+
+       tagger_data = kzalloc(sizeof(*tagger_data), GFP_KERNEL);
+       if (!tagger_data)
+               return -ENOMEM;
+
+       ds->tagger_data = tagger_data;
+
+       return 0;
+}
+
+static void qca_tag_disconnect(struct dsa_switch *ds)
+{
+       kfree(ds->tagger_data);
+       ds->tagger_data = NULL;
+}
+
 static const struct dsa_device_ops qca_netdev_ops = {
        .name   = "qca",
        .proto  = DSA_TAG_PROTO_QCA,
+       .connect = qca_tag_connect,
+       .disconnect = qca_tag_disconnect,
        .xmit   = qca_tag_xmit,
        .rcv    = qca_tag_rcv,
        .needed_headroom = QCA_HDR_LEN,
+       .promisc_on_master = true,
 };
 
 MODULE_LICENSE("GPL");
index c1d5f5e..18a5035 100644 (file)
@@ -53,7 +53,8 @@ static int rings_reply_size(const struct ethnl_req_info *req_base,
               nla_total_size(sizeof(u32)) +    /* _RINGS_RX_MINI */
               nla_total_size(sizeof(u32)) +    /* _RINGS_RX_JUMBO */
               nla_total_size(sizeof(u32)) +    /* _RINGS_TX */
-              nla_total_size(sizeof(u32));     /* _RINGS_RX_BUF_LEN */
+              nla_total_size(sizeof(u32)) +    /* _RINGS_RX_BUF_LEN */
+              nla_total_size(sizeof(u8));      /* _RINGS_TCP_DATA_SPLIT */
 }
 
 static int rings_fill_reply(struct sk_buff *skb,
@@ -61,9 +62,11 @@ static int rings_fill_reply(struct sk_buff *skb,
                            const struct ethnl_reply_data *reply_base)
 {
        const struct rings_reply_data *data = RINGS_REPDATA(reply_base);
-       const struct kernel_ethtool_ringparam *kernel_ringparam = &data->kernel_ringparam;
+       const struct kernel_ethtool_ringparam *kr = &data->kernel_ringparam;
        const struct ethtool_ringparam *ringparam = &data->ringparam;
 
+       WARN_ON(kr->tcp_data_split > ETHTOOL_TCP_DATA_SPLIT_ENABLED);
+
        if ((ringparam->rx_max_pending &&
             (nla_put_u32(skb, ETHTOOL_A_RINGS_RX_MAX,
                          ringparam->rx_max_pending) ||
@@ -84,9 +87,11 @@ static int rings_fill_reply(struct sk_buff *skb,
                          ringparam->tx_max_pending) ||
              nla_put_u32(skb, ETHTOOL_A_RINGS_TX,
                          ringparam->tx_pending)))  ||
-           (kernel_ringparam->rx_buf_len &&
-            (nla_put_u32(skb, ETHTOOL_A_RINGS_RX_BUF_LEN,
-                         kernel_ringparam->rx_buf_len))))
+           (kr->rx_buf_len &&
+            (nla_put_u32(skb, ETHTOOL_A_RINGS_RX_BUF_LEN, kr->rx_buf_len))) ||
+           (kr->tcp_data_split &&
+            (nla_put_u8(skb, ETHTOOL_A_RINGS_TCP_DATA_SPLIT,
+                        kr->tcp_data_split))))
                return -EMSGSIZE;
 
        return 0;
index 043e4e9..ff9ec76 100644 (file)
@@ -259,11 +259,6 @@ static inline u16 prp_get_skb_sequence_nr(struct prp_rct *rct)
        return ntohs(rct->sequence_nr);
 }
 
-static inline u16 get_prp_lan_id(struct prp_rct *rct)
-{
-       return ntohs(rct->lan_id_and_LSDU_size) >> 12;
-}
-
 /* assume there is a valid rct */
 static inline bool prp_check_lsdu_size(struct sk_buff *skb,
                                       struct prp_rct *rct,
index de610cb..b60c9fd 100644 (file)
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2019 Facebook  */
 
+#include <linux/init.h>
 #include <linux/types.h>
 #include <linux/bpf_verifier.h>
 #include <linux/bpf.h>
@@ -212,26 +213,23 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id,
        }
 }
 
-BTF_SET_START(bpf_tcp_ca_kfunc_ids)
+BTF_SET_START(bpf_tcp_ca_check_kfunc_ids)
 BTF_ID(func, tcp_reno_ssthresh)
 BTF_ID(func, tcp_reno_cong_avoid)
 BTF_ID(func, tcp_reno_undo_cwnd)
 BTF_ID(func, tcp_slow_start)
 BTF_ID(func, tcp_cong_avoid_ai)
-BTF_SET_END(bpf_tcp_ca_kfunc_ids)
+BTF_SET_END(bpf_tcp_ca_check_kfunc_ids)
 
-static bool bpf_tcp_ca_check_kfunc_call(u32 kfunc_btf_id, struct module *owner)
-{
-       if (btf_id_set_contains(&bpf_tcp_ca_kfunc_ids, kfunc_btf_id))
-               return true;
-       return bpf_check_mod_kfunc_call(&bpf_tcp_ca_kfunc_list, kfunc_btf_id, owner);
-}
+static const struct btf_kfunc_id_set bpf_tcp_ca_kfunc_set = {
+       .owner     = THIS_MODULE,
+       .check_set = &bpf_tcp_ca_check_kfunc_ids,
+};
 
 static const struct bpf_verifier_ops bpf_tcp_ca_verifier_ops = {
        .get_func_proto         = bpf_tcp_ca_get_func_proto,
        .is_valid_access        = bpf_tcp_ca_is_valid_access,
        .btf_struct_access      = bpf_tcp_ca_btf_struct_access,
-       .check_kfunc_call       = bpf_tcp_ca_check_kfunc_call,
 };
 
 static int bpf_tcp_ca_init_member(const struct btf_type *t,
@@ -300,3 +298,9 @@ struct bpf_struct_ops bpf_tcp_congestion_ops = {
        .init = bpf_tcp_ca_init,
        .name = "tcp_congestion_ops",
 };
+
+static int __init bpf_tcp_ca_kfunc_init(void)
+{
+       return register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &bpf_tcp_ca_kfunc_set);
+}
+late_initcall(bpf_tcp_ca_kfunc_init);
index b458986..4c53994 100644 (file)
@@ -1257,34 +1257,13 @@ fib_info_laddrhash_bucket(const struct net *net, __be32 val)
        return &fib_info_laddrhash[slot];
 }
 
-static struct hlist_head *fib_info_hash_alloc(int bytes)
-{
-       if (bytes <= PAGE_SIZE)
-               return kzalloc(bytes, GFP_KERNEL);
-       else
-               return (struct hlist_head *)
-                       __get_free_pages(GFP_KERNEL | __GFP_ZERO,
-                                        get_order(bytes));
-}
-
-static void fib_info_hash_free(struct hlist_head *hash, int bytes)
-{
-       if (!hash)
-               return;
-
-       if (bytes <= PAGE_SIZE)
-               kfree(hash);
-       else
-               free_pages((unsigned long) hash, get_order(bytes));
-}
-
 static void fib_info_hash_move(struct hlist_head *new_info_hash,
                               struct hlist_head *new_laddrhash,
                               unsigned int new_size)
 {
        struct hlist_head *old_info_hash, *old_laddrhash;
        unsigned int old_size = fib_info_hash_size;
-       unsigned int i, bytes;
+       unsigned int i;
 
        spin_lock_bh(&fib_info_lock);
        old_info_hash = fib_info_hash;
@@ -1325,9 +1304,8 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
 
        spin_unlock_bh(&fib_info_lock);
 
-       bytes = old_size * sizeof(struct hlist_head *);
-       fib_info_hash_free(old_info_hash, bytes);
-       fib_info_hash_free(old_laddrhash, bytes);
+       kvfree(old_info_hash);
+       kvfree(old_laddrhash);
 }
 
 __be32 fib_info_update_nhc_saddr(struct net *net, struct fib_nh_common *nhc,
@@ -1444,19 +1422,19 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
                unsigned int new_size = fib_info_hash_size << 1;
                struct hlist_head *new_info_hash;
                struct hlist_head *new_laddrhash;
-               unsigned int bytes;
+               size_t bytes;
 
                if (!new_size)
                        new_size = 16;
-               bytes = new_size * sizeof(struct hlist_head *);
-               new_info_hash = fib_info_hash_alloc(bytes);
-               new_laddrhash = fib_info_hash_alloc(bytes);
+               bytes = (size_t)new_size * sizeof(struct hlist_head *);
+               new_info_hash = kvzalloc(bytes, GFP_KERNEL);
+               new_laddrhash = kvzalloc(bytes, GFP_KERNEL);
                if (!new_info_hash || !new_laddrhash) {
-                       fib_info_hash_free(new_info_hash, bytes);
-                       fib_info_hash_free(new_laddrhash, bytes);
-               } else
+                       kvfree(new_info_hash);
+                       kvfree(new_laddrhash);
+               } else {
                        fib_info_hash_move(new_info_hash, new_laddrhash, new_size);
-
+               }
                if (!fib_info_hash_size)
                        goto failure;
        }
index b7e277d..72a375c 100644 (file)
@@ -192,24 +192,14 @@ struct icmp_control {
 
 static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1];
 
-/*
- *     The ICMP socket(s). This is the most convenient way to flow control
- *     our ICMP output as well as maintain a clean interface throughout
- *     all layers. All Socketless IP sends will soon be gone.
- *
- *     On SMP we have one ICMP socket per-cpu.
- */
-static struct sock *icmp_sk(struct net *net)
-{
-       return this_cpu_read(*net->ipv4.icmp_sk);
-}
+static DEFINE_PER_CPU(struct sock *, ipv4_icmp_sk);
 
 /* Called with BH disabled */
 static inline struct sock *icmp_xmit_lock(struct net *net)
 {
        struct sock *sk;
 
-       sk = icmp_sk(net);
+       sk = this_cpu_read(ipv4_icmp_sk);
 
        if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
                /* This can happen if the output path signals a
@@ -217,11 +207,13 @@ static inline struct sock *icmp_xmit_lock(struct net *net)
                 */
                return NULL;
        }
+       sock_net_set(sk, net);
        return sk;
 }
 
 static inline void icmp_xmit_unlock(struct sock *sk)
 {
+       sock_net_set(sk, &init_net);
        spin_unlock(&sk->sk_lock.slock);
 }
 
@@ -363,14 +355,13 @@ static int icmp_glue_bits(void *from, char *to, int offset, int len, int odd,
        return 0;
 }
 
-static void icmp_push_reply(struct icmp_bxm *icmp_param,
+static void icmp_push_reply(struct sock *sk,
+                           struct icmp_bxm *icmp_param,
                            struct flowi4 *fl4,
                            struct ipcm_cookie *ipc, struct rtable **rt)
 {
-       struct sock *sk;
        struct sk_buff *skb;
 
-       sk = icmp_sk(dev_net((*rt)->dst.dev));
        if (ip_append_data(sk, fl4, icmp_glue_bits, icmp_param,
                           icmp_param->data_len+icmp_param->head_len,
                           icmp_param->head_len,
@@ -452,7 +443,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
        if (IS_ERR(rt))
                goto out_unlock;
        if (icmpv4_xrlim_allow(net, rt, &fl4, type, code))
-               icmp_push_reply(icmp_param, &fl4, &ipc, &rt);
+               icmp_push_reply(sk, icmp_param, &fl4, &ipc, &rt);
        ip_rt_put(rt);
 out_unlock:
        icmp_xmit_unlock(sk);
@@ -766,7 +757,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
        if (!fl4.saddr)
                fl4.saddr = htonl(INADDR_DUMMY);
 
-       icmp_push_reply(&icmp_param, &fl4, &ipc, &rt);
+       icmp_push_reply(sk, &icmp_param, &fl4, &ipc, &rt);
 ende:
        ip_rt_put(rt);
 out_unlock:
@@ -1434,46 +1425,8 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = {
        },
 };
 
-static void __net_exit icmp_sk_exit(struct net *net)
-{
-       int i;
-
-       for_each_possible_cpu(i)
-               inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.icmp_sk, i));
-       free_percpu(net->ipv4.icmp_sk);
-       net->ipv4.icmp_sk = NULL;
-}
-
 static int __net_init icmp_sk_init(struct net *net)
 {
-       int i, err;
-
-       net->ipv4.icmp_sk = alloc_percpu(struct sock *);
-       if (!net->ipv4.icmp_sk)
-               return -ENOMEM;
-
-       for_each_possible_cpu(i) {
-               struct sock *sk;
-
-               err = inet_ctl_sock_create(&sk, PF_INET,
-                                          SOCK_RAW, IPPROTO_ICMP, net);
-               if (err < 0)
-                       goto fail;
-
-               *per_cpu_ptr(net->ipv4.icmp_sk, i) = sk;
-
-               /* Enough space for 2 64K ICMP packets, including
-                * sk_buff/skb_shared_info struct overhead.
-                */
-               sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
-
-               /*
-                * Speedup sock_wfree()
-                */
-               sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
-               inet_sk(sk)->pmtudisc = IP_PMTUDISC_DONT;
-       }
-
        /* Control parameters for ECHO replies. */
        net->ipv4.sysctl_icmp_echo_ignore_all = 0;
        net->ipv4.sysctl_icmp_echo_enable_probe = 0;
@@ -1499,18 +1452,36 @@ static int __net_init icmp_sk_init(struct net *net)
        net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr = 0;
 
        return 0;
-
-fail:
-       icmp_sk_exit(net);
-       return err;
 }
 
 static struct pernet_operations __net_initdata icmp_sk_ops = {
        .init = icmp_sk_init,
-       .exit = icmp_sk_exit,
 };
 
 int __init icmp_init(void)
 {
+       int err, i;
+
+       for_each_possible_cpu(i) {
+               struct sock *sk;
+
+               err = inet_ctl_sock_create(&sk, PF_INET,
+                                          SOCK_RAW, IPPROTO_ICMP, &init_net);
+               if (err < 0)
+                       return err;
+
+               per_cpu(ipv4_icmp_sk, i) = sk;
+
+               /* Enough space for 2 64K ICMP packets, including
+                * sk_buff/skb_shared_info struct overhead.
+                */
+               sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
+
+               /*
+                * Speedup sock_wfree()
+                */
+               sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
+               inet_sk(sk)->pmtudisc = IP_PMTUDISC_DONT;
+       }
        return register_pernet_subsys(&icmp_sk_ops);
 }
index fc2a985..1e5b53c 100644 (file)
@@ -866,12 +866,9 @@ static void reqsk_timer_handler(struct timer_list *t)
            (!resend ||
             !inet_rtx_syn_ack(sk_listener, req) ||
             inet_rsk(req)->acked)) {
-               unsigned long timeo;
-
                if (req->num_timeout++ == 0)
                        atomic_dec(&queue->young);
-               timeo = min(TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX);
-               mod_timer(&req->rsk_timer, jiffies + timeo);
+               mod_timer(&req->rsk_timer, jiffies + reqsk_timeout(req, TCP_RTO_MAX));
 
                if (!nreq)
                        return;
@@ -1046,6 +1043,9 @@ int inet_csk_listen_start(struct sock *sk)
        sk->sk_ack_backlog = 0;
        inet_csk_delack_init(sk);
 
+       if (sk->sk_txrehash == SOCK_TXREHASH_DEFAULT)
+               sk->sk_txrehash = READ_ONCE(sock_net(sk)->core.sysctl_txrehash);
+
        /* There is race window here: we announce ourselves listening,
         * but this transition is still not validated by get_port().
         * It is OK, because this socket enters to hash table only
index 437afe3..9e0bbd0 100644 (file)
@@ -52,14 +52,15 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw)
        spin_unlock(lock);
 
        /* Disassociate with bind bucket. */
-       bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num,
-                       hashinfo->bhash_size)];
+       bhead = &hashinfo->bhash[tw->tw_bslot];
 
        spin_lock(&bhead->lock);
        inet_twsk_bind_unhash(tw, hashinfo);
        spin_unlock(&bhead->lock);
 
-       atomic_dec(&tw->tw_dr->tw_count);
+       if (refcount_dec_and_test(&tw->tw_dr->tw_refcount))
+               kfree(tw->tw_dr);
+
        inet_twsk_put(tw);
 }
 
@@ -110,8 +111,12 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
           Note, that any socket with inet->num != 0 MUST be bound in
           binding cache, even if it is closed.
         */
-       bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num,
-                       hashinfo->bhash_size)];
+       /* Cache inet_bhashfn(), because 'struct net' might be no longer
+        * available later in inet_twsk_kill().
+        */
+       tw->tw_bslot = inet_bhashfn(twsk_net(tw), inet->inet_num,
+                                   hashinfo->bhash_size);
+       bhead = &hashinfo->bhash[tw->tw_bslot];
        spin_lock(&bhead->lock);
        tw->tw_tb = icsk->icsk_bind_hash;
        WARN_ON(!icsk->icsk_bind_hash);
@@ -145,10 +150,6 @@ static void tw_timer_handler(struct timer_list *t)
 {
        struct inet_timewait_sock *tw = from_timer(tw, t, tw_timer);
 
-       if (tw->tw_kill)
-               __NET_INC_STATS(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED);
-       else
-               __NET_INC_STATS(twsk_net(tw), LINUX_MIB_TIMEWAITED);
        inet_twsk_kill(tw);
 }
 
@@ -158,7 +159,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
 {
        struct inet_timewait_sock *tw;
 
-       if (atomic_read(&dr->tw_count) >= dr->sysctl_max_tw_buckets)
+       if (refcount_read(&dr->tw_refcount) - 1 >= dr->sysctl_max_tw_buckets)
                return NULL;
 
        tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab,
@@ -244,59 +245,15 @@ void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm)
         * of PAWS.
         */
 
-       tw->tw_kill = timeo <= 4*HZ;
        if (!rearm) {
+               bool kill = timeo <= 4*HZ;
+
+               __NET_INC_STATS(twsk_net(tw), kill ? LINUX_MIB_TIMEWAITKILLED :
+                                                    LINUX_MIB_TIMEWAITED);
                BUG_ON(mod_timer(&tw->tw_timer, jiffies + timeo));
-               atomic_inc(&tw->tw_dr->tw_count);
+               refcount_inc(&tw->tw_dr->tw_refcount);
        } else {
                mod_timer_pending(&tw->tw_timer, jiffies + timeo);
        }
 }
 EXPORT_SYMBOL_GPL(__inet_twsk_schedule);
-
-void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family)
-{
-       struct inet_timewait_sock *tw;
-       struct sock *sk;
-       struct hlist_nulls_node *node;
-       unsigned int slot;
-
-       for (slot = 0; slot <= hashinfo->ehash_mask; slot++) {
-               struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
-restart_rcu:
-               cond_resched();
-               rcu_read_lock();
-restart:
-               sk_nulls_for_each_rcu(sk, node, &head->chain) {
-                       if (sk->sk_state != TCP_TIME_WAIT)
-                               continue;
-                       tw = inet_twsk(sk);
-                       if ((tw->tw_family != family) ||
-                               refcount_read(&twsk_net(tw)->ns.count))
-                               continue;
-
-                       if (unlikely(!refcount_inc_not_zero(&tw->tw_refcnt)))
-                               continue;
-
-                       if (unlikely((tw->tw_family != family) ||
-                                    refcount_read(&twsk_net(tw)->ns.count))) {
-                               inet_twsk_put(tw);
-                               goto restart;
-                       }
-
-                       rcu_read_unlock();
-                       local_bh_disable();
-                       inet_twsk_deschedule_put(tw);
-                       local_bh_enable();
-                       goto restart_rcu;
-               }
-               /* If the nulls value we got at the end of this lookup is
-                * not the expected one, we must restart lookup.
-                * We probably met an item that was moved to another chain.
-                */
-               if (get_nulls_value(node) != slot)
-                       goto restart;
-               rcu_read_unlock();
-       }
-}
-EXPORT_SYMBOL_GPL(inet_twsk_purge);
index da1b503..a9e22a0 100644 (file)
@@ -42,7 +42,7 @@
  */
 
 void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
-                     __be32 daddr, struct rtable *rt, int is_frag)
+                     __be32 daddr, struct rtable *rt)
 {
        unsigned char *iph = skb_network_header(skb);
 
@@ -53,28 +53,15 @@ void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
        if (opt->srr)
                memcpy(iph + opt->srr + iph[opt->srr + 1] - 4, &daddr, 4);
 
-       if (!is_frag) {
-               if (opt->rr_needaddr)
-                       ip_rt_get_source(iph + opt->rr + iph[opt->rr + 2] - 5, skb, rt);
-               if (opt->ts_needaddr)
-                       ip_rt_get_source(iph + opt->ts + iph[opt->ts + 2] - 9, skb, rt);
-               if (opt->ts_needtime) {
-                       __be32 midtime;
+       if (opt->rr_needaddr)
+               ip_rt_get_source(iph + opt->rr + iph[opt->rr + 2] - 5, skb, rt);
+       if (opt->ts_needaddr)
+               ip_rt_get_source(iph + opt->ts + iph[opt->ts + 2] - 9, skb, rt);
+       if (opt->ts_needtime) {
+               __be32 midtime;
 
-                       midtime = inet_current_timestamp();
-                       memcpy(iph + opt->ts + iph[opt->ts + 2] - 5, &midtime, 4);
-               }
-               return;
-       }
-       if (opt->rr) {
-               memset(iph + opt->rr, IPOPT_NOP, iph[opt->rr + 1]);
-               opt->rr = 0;
-               opt->rr_needaddr = 0;
-       }
-       if (opt->ts) {
-               memset(iph + opt->ts, IPOPT_NOP, iph[opt->ts + 1]);
-               opt->ts = 0;
-               opt->ts_needaddr = opt->ts_needtime = 0;
+               midtime = inet_current_timestamp();
+               memcpy(iph + opt->ts + iph[opt->ts + 2] - 5, &midtime, 4);
        }
 }
 
index 139cec2..0c0574e 100644 (file)
@@ -179,7 +179,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk,
 
        if (opt && opt->opt.optlen) {
                iph->ihl += opt->opt.optlen>>2;
-               ip_options_build(skb, &opt->opt, daddr, rt, 0);
+               ip_options_build(skb, &opt->opt, daddr, rt);
        }
 
        skb->priority = sk->sk_priority;
@@ -519,7 +519,7 @@ packet_routed:
 
        if (inet_opt && inet_opt->opt.optlen) {
                iph->ihl += inet_opt->opt.optlen >> 2;
-               ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0);
+               ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt);
        }
 
        ip_select_ident_segs(net, skb, sk,
@@ -1541,7 +1541,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
 
        if (opt) {
                iph->ihl += opt->optlen >> 2;
-               ip_options_build(skb, opt, cork->addr, rt, 0);
+               ip_options_build(skb, opt, cork->addr, rt);
        }
 
        skb->priority = (cork->tos != -1) ? cork->priority: sk->sk_priority;
index f30273a..2883607 100644 (file)
@@ -59,8 +59,8 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
        socket_seq_show(seq);
        seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n",
                   sock_prot_inuse_get(net, &tcp_prot), orphans,
-                  atomic_read(&net->ipv4.tcp_death_row.tw_count), sockets,
-                  proto_memory_allocated(&tcp_prot));
+                  refcount_read(&net->ipv4.tcp_death_row->tw_refcount) - 1,
+                  sockets, proto_memory_allocated(&tcp_prot));
        seq_printf(seq, "UDP: inuse %d mem %ld\n",
                   sock_prot_inuse_get(net, &udp_prot),
                   proto_memory_allocated(&udp_prot));
index ff6f91c..8b35075 100644 (file)
 
 #define DEFAULT_MIN_PMTU (512 + 20 + 20)
 #define DEFAULT_MTU_EXPIRES (10 * 60 * HZ)
-
+#define DEFAULT_MIN_ADVMSS 256
 static int ip_rt_max_size;
 static int ip_rt_redirect_number __read_mostly = 9;
 static int ip_rt_redirect_load __read_mostly   = HZ / 50;
 static int ip_rt_redirect_silence __read_mostly        = ((HZ / 50) << (9 + 1));
 static int ip_rt_error_cost __read_mostly      = HZ;
 static int ip_rt_error_burst __read_mostly     = 5 * HZ;
-static int ip_rt_min_advmss __read_mostly      = 256;
 
 static int ip_rt_gc_timeout __read_mostly      = RT_GC_TIMEOUT;
 
@@ -458,7 +457,7 @@ static u32 *ip_tstamps __read_mostly;
  * if one generator is seldom used. This makes hard for an attacker
  * to infer how many packets were sent between two points in time.
  */
-u32 ip_idents_reserve(u32 hash, int segs)
+static u32 ip_idents_reserve(u32 hash, int segs)
 {
        u32 bucket, old, now = (u32)jiffies;
        atomic_t *p_id;
@@ -479,7 +478,6 @@ u32 ip_idents_reserve(u32 hash, int segs)
         */
        return atomic_add_return(segs + delta, p_id) - segs;
 }
-EXPORT_SYMBOL(ip_idents_reserve);
 
 void __ip_select_ident(struct net *net, struct iphdr *iph, int segs)
 {
@@ -1298,9 +1296,10 @@ static void set_class_tag(struct rtable *rt, u32 tag)
 
 static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
 {
+       struct net *net = dev_net(dst->dev);
        unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr);
        unsigned int advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size,
-                                   ip_rt_min_advmss);
+                                   net->ipv4.ip_rt_min_advmss);
 
        return min(advmss, IPV4_MAX_PMTU - header_size);
 }
@@ -3535,13 +3534,6 @@ static struct ctl_table ipv4_route_table[] = {
                .mode           = 0644,
                .proc_handler   = proc_dointvec,
        },
-       {
-               .procname       = "min_adv_mss",
-               .data           = &ip_rt_min_advmss,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec,
-       },
        { }
 };
 
@@ -3569,6 +3561,13 @@ static struct ctl_table ipv4_route_netns_table[] = {
                .mode           = 0644,
                .proc_handler   = proc_dointvec_jiffies,
        },
+       {
+               .procname   = "min_adv_mss",
+               .data       = &init_net.ipv4.ip_rt_min_advmss,
+               .maxlen     = sizeof(int),
+               .mode       = 0644,
+               .proc_handler   = proc_dointvec,
+       },
        { },
 };
 
@@ -3631,6 +3630,7 @@ static __net_init int netns_ip_rt_init(struct net *net)
        /* Set default value for namespaceified sysctls */
        net->ipv4.ip_rt_min_pmtu = DEFAULT_MIN_PMTU;
        net->ipv4.ip_rt_mtu_expires = DEFAULT_MTU_EXPIRES;
+       net->ipv4.ip_rt_min_advmss = DEFAULT_MIN_ADVMSS;
        return 0;
 }
 
index 97eb547..1cae27b 100644 (file)
@@ -589,6 +589,14 @@ static struct ctl_table ipv4_table[] = {
 };
 
 static struct ctl_table ipv4_net_table[] = {
+       /* tcp_max_tw_buckets must be first in this table. */
+       {
+               .procname       = "tcp_max_tw_buckets",
+/*             .data           = &init_net.ipv4.tcp_death_row.sysctl_max_tw_buckets, */
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec
+       },
        {
                .procname       = "icmp_echo_ignore_all",
                .data           = &init_net.ipv4.sysctl_icmp_echo_ignore_all,
@@ -1001,13 +1009,6 @@ static struct ctl_table ipv4_net_table[] = {
                .extra2         = &two,
        },
        {
-               .procname       = "tcp_max_tw_buckets",
-               .data           = &init_net.ipv4.tcp_death_row.sysctl_max_tw_buckets,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec
-       },
-       {
                .procname       = "tcp_max_syn_backlog",
                .data           = &init_net.ipv4.sysctl_max_syn_backlog,
                .maxlen         = sizeof(int),
@@ -1400,7 +1401,8 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
                if (!table)
                        goto err_alloc;
 
-               for (i = 0; i < ARRAY_SIZE(ipv4_net_table) - 1; i++) {
+               /* skip first entry (sysctl_max_tw_buckets) */
+               for (i = 1; i < ARRAY_SIZE(ipv4_net_table) - 1; i++) {
                        if (table[i].data) {
                                /* Update the variables to point into
                                 * the current struct net
@@ -1415,6 +1417,8 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
                }
        }
 
+       table[0].data = &net->ipv4.tcp_death_row->sysctl_max_tw_buckets;
+
        net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table);
        if (!net->ipv4.ipv4_hdr)
                goto err_reg;
index bdf108f..a03a6bf 100644 (file)
@@ -894,8 +894,7 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
                return mss_now;
 
        /* Note : tcp_tso_autosize() will eventually split this later */
-       new_size_goal = sk->sk_gso_max_size - 1 - MAX_TCP_HEADER;
-       new_size_goal = tcp_bound_to_half_wnd(tp, new_size_goal);
+       new_size_goal = tcp_bound_to_half_wnd(tp, sk->sk_gso_max_size);
 
        /* We try hard to avoid divides here */
        size_goal = tp->gso_segs * mss_now;
index ec55500..02e8626 100644 (file)
@@ -1154,7 +1154,7 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = {
        .set_state      = bbr_set_state,
 };
 
-BTF_SET_START(tcp_bbr_kfunc_ids)
+BTF_SET_START(tcp_bbr_check_kfunc_ids)
 #ifdef CONFIG_X86
 #ifdef CONFIG_DYNAMIC_FTRACE
 BTF_ID(func, bbr_init)
@@ -1167,25 +1167,27 @@ BTF_ID(func, bbr_min_tso_segs)
 BTF_ID(func, bbr_set_state)
 #endif
 #endif
-BTF_SET_END(tcp_bbr_kfunc_ids)
+BTF_SET_END(tcp_bbr_check_kfunc_ids)
 
-static DEFINE_KFUNC_BTF_ID_SET(&tcp_bbr_kfunc_ids, tcp_bbr_kfunc_btf_set);
+static const struct btf_kfunc_id_set tcp_bbr_kfunc_set = {
+       .owner     = THIS_MODULE,
+       .check_set = &tcp_bbr_check_kfunc_ids,
+};
 
 static int __init bbr_register(void)
 {
        int ret;
 
        BUILD_BUG_ON(sizeof(struct bbr) > ICSK_CA_PRIV_SIZE);
-       ret = tcp_register_congestion_control(&tcp_bbr_cong_ops);
-       if (ret)
+
+       ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &tcp_bbr_kfunc_set);
+       if (ret < 0)
                return ret;
-       register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_bbr_kfunc_btf_set);
-       return 0;
+       return tcp_register_congestion_control(&tcp_bbr_cong_ops);
 }
 
 static void __exit bbr_unregister(void)
 {
-       unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_bbr_kfunc_btf_set);
        tcp_unregister_congestion_control(&tcp_bbr_cong_ops);
 }
 
index e07837e..24d562d 100644 (file)
@@ -485,7 +485,7 @@ static struct tcp_congestion_ops cubictcp __read_mostly = {
        .name           = "cubic",
 };
 
-BTF_SET_START(tcp_cubic_kfunc_ids)
+BTF_SET_START(tcp_cubic_check_kfunc_ids)
 #ifdef CONFIG_X86
 #ifdef CONFIG_DYNAMIC_FTRACE
 BTF_ID(func, cubictcp_init)
@@ -496,9 +496,12 @@ BTF_ID(func, cubictcp_cwnd_event)
 BTF_ID(func, cubictcp_acked)
 #endif
 #endif
-BTF_SET_END(tcp_cubic_kfunc_ids)
+BTF_SET_END(tcp_cubic_check_kfunc_ids)
 
-static DEFINE_KFUNC_BTF_ID_SET(&tcp_cubic_kfunc_ids, tcp_cubic_kfunc_btf_set);
+static const struct btf_kfunc_id_set tcp_cubic_kfunc_set = {
+       .owner     = THIS_MODULE,
+       .check_set = &tcp_cubic_check_kfunc_ids,
+};
 
 static int __init cubictcp_register(void)
 {
@@ -534,16 +537,14 @@ static int __init cubictcp_register(void)
        /* divide by bic_scale and by constant Srtt (100ms) */
        do_div(cube_factor, bic_scale * 10);
 
-       ret = tcp_register_congestion_control(&cubictcp);
-       if (ret)
+       ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &tcp_cubic_kfunc_set);
+       if (ret < 0)
                return ret;
-       register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_cubic_kfunc_btf_set);
-       return 0;
+       return tcp_register_congestion_control(&cubictcp);
 }
 
 static void __exit cubictcp_unregister(void)
 {
-       unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_cubic_kfunc_btf_set);
        tcp_unregister_congestion_control(&cubictcp);
 }
 
index 0d7ab3c..1943a66 100644 (file)
@@ -238,7 +238,7 @@ static struct tcp_congestion_ops dctcp_reno __read_mostly = {
        .name           = "dctcp-reno",
 };
 
-BTF_SET_START(tcp_dctcp_kfunc_ids)
+BTF_SET_START(tcp_dctcp_check_kfunc_ids)
 #ifdef CONFIG_X86
 #ifdef CONFIG_DYNAMIC_FTRACE
 BTF_ID(func, dctcp_init)
@@ -249,25 +249,27 @@ BTF_ID(func, dctcp_cwnd_undo)
 BTF_ID(func, dctcp_state)
 #endif
 #endif
-BTF_SET_END(tcp_dctcp_kfunc_ids)
+BTF_SET_END(tcp_dctcp_check_kfunc_ids)
 
-static DEFINE_KFUNC_BTF_ID_SET(&tcp_dctcp_kfunc_ids, tcp_dctcp_kfunc_btf_set);
+static const struct btf_kfunc_id_set tcp_dctcp_kfunc_set = {
+       .owner     = THIS_MODULE,
+       .check_set = &tcp_dctcp_check_kfunc_ids,
+};
 
 static int __init dctcp_register(void)
 {
        int ret;
 
        BUILD_BUG_ON(sizeof(struct dctcp) > ICSK_CA_PRIV_SIZE);
-       ret = tcp_register_congestion_control(&dctcp);
-       if (ret)
+
+       ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &tcp_dctcp_kfunc_set);
+       if (ret < 0)
                return ret;
-       register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_dctcp_kfunc_btf_set);
-       return 0;
+       return tcp_register_congestion_control(&dctcp);
 }
 
 static void __exit dctcp_unregister(void)
 {
-       unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_dctcp_kfunc_btf_set);
        tcp_unregister_congestion_control(&dctcp);
 }
 
index bfe4112..af94a6d 100644 (file)
@@ -6725,6 +6725,7 @@ struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
                ireq->ireq_state = TCP_NEW_SYN_RECV;
                write_pnet(&ireq->ireq_net, sock_net(sk_listener));
                ireq->ireq_family = sk_listener->sk_family;
+               req->timeout = TCP_TIMEOUT_INIT;
        }
 
        return req;
@@ -6941,9 +6942,10 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
                sock_put(fastopen_sk);
        } else {
                tcp_rsk(req)->tfo_listener = false;
-               if (!want_cookie)
-                       inet_csk_reqsk_queue_hash_add(sk, req,
-                               tcp_timeout_init((struct sock *)req));
+               if (!want_cookie) {
+                       req->timeout = tcp_timeout_init((struct sock *)req);
+                       inet_csk_reqsk_queue_hash_add(sk, req, req->timeout);
+               }
                af_ops->send_synack(sk, dst, &fl, req, &foc,
                                    !want_cookie ? TCP_SYNACK_NORMAL :
                                                   TCP_SYNACK_COOKIE,
index fec656f..6873f46 100644 (file)
@@ -91,6 +91,8 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
 struct inet_hashinfo tcp_hashinfo;
 EXPORT_SYMBOL(tcp_hashinfo);
 
+static DEFINE_PER_CPU(struct sock *, ipv4_tcp_sk);
+
 static u32 tcp_v4_init_seq(const struct sk_buff *skb)
 {
        return secure_tcp_seq(ip_hdr(skb)->daddr,
@@ -206,7 +208,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
        struct rtable *rt;
        int err;
        struct ip_options_rcu *inet_opt;
-       struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
+       struct inet_timewait_death_row *tcp_death_row = sock_net(sk)->ipv4.tcp_death_row;
 
        if (addr_len < sizeof(struct sockaddr_in))
                return -EINVAL;
@@ -810,7 +812,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
        arg.tos = ip_hdr(skb)->tos;
        arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
        local_bh_disable();
-       ctl_sk = this_cpu_read(*net->ipv4.tcp_sk);
+       ctl_sk = this_cpu_read(ipv4_tcp_sk);
+       sock_net_set(ctl_sk, net);
        if (sk) {
                ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
                                   inet_twsk(sk)->tw_mark : sk->sk_mark;
@@ -825,6 +828,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
                              transmit_time);
 
        ctl_sk->sk_mark = 0;
+       sock_net_set(ctl_sk, &init_net);
        __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
        __TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
        local_bh_enable();
@@ -908,7 +912,8 @@ static void tcp_v4_send_ack(const struct sock *sk,
        arg.tos = tos;
        arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL);
        local_bh_disable();
-       ctl_sk = this_cpu_read(*net->ipv4.tcp_sk);
+       ctl_sk = this_cpu_read(ipv4_tcp_sk);
+       sock_net_set(ctl_sk, net);
        ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
                           inet_twsk(sk)->tw_mark : sk->sk_mark;
        ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ?
@@ -921,6 +926,7 @@ static void tcp_v4_send_ack(const struct sock *sk,
                              transmit_time);
 
        ctl_sk->sk_mark = 0;
+       sock_net_set(ctl_sk, &init_net);
        __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
        local_bh_enable();
 }
@@ -3111,41 +3117,18 @@ EXPORT_SYMBOL(tcp_prot);
 
 static void __net_exit tcp_sk_exit(struct net *net)
 {
-       int cpu;
+       struct inet_timewait_death_row *tcp_death_row = net->ipv4.tcp_death_row;
 
        if (net->ipv4.tcp_congestion_control)
                bpf_module_put(net->ipv4.tcp_congestion_control,
                               net->ipv4.tcp_congestion_control->owner);
-
-       for_each_possible_cpu(cpu)
-               inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
-       free_percpu(net->ipv4.tcp_sk);
+       if (refcount_dec_and_test(&tcp_death_row->tw_refcount))
+               kfree(tcp_death_row);
 }
 
 static int __net_init tcp_sk_init(struct net *net)
 {
-       int res, cpu, cnt;
-
-       net->ipv4.tcp_sk = alloc_percpu(struct sock *);
-       if (!net->ipv4.tcp_sk)
-               return -ENOMEM;
-
-       for_each_possible_cpu(cpu) {
-               struct sock *sk;
-
-               res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
-                                          IPPROTO_TCP, net);
-               if (res)
-                       goto fail;
-               sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
-
-               /* Please enforce IP_DF and IPID==0 for RST and
-                * ACK sent in SYN-RECV and TIME-WAIT state.
-                */
-               inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO;
-
-               *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
-       }
+       int cnt;
 
        net->ipv4.sysctl_tcp_ecn = 2;
        net->ipv4.sysctl_tcp_ecn_fallback = 1;
@@ -3172,9 +3155,13 @@ static int __net_init tcp_sk_init(struct net *net)
        net->ipv4.sysctl_tcp_tw_reuse = 2;
        net->ipv4.sysctl_tcp_no_ssthresh_metrics_save = 1;
 
+       net->ipv4.tcp_death_row = kzalloc(sizeof(struct inet_timewait_death_row), GFP_KERNEL);
+       if (!net->ipv4.tcp_death_row)
+               return -ENOMEM;
+       refcount_set(&net->ipv4.tcp_death_row->tw_refcount, 1);
        cnt = tcp_hashinfo.ehash_mask + 1;
-       net->ipv4.tcp_death_row.sysctl_max_tw_buckets = cnt / 2;
-       net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo;
+       net->ipv4.tcp_death_row->sysctl_max_tw_buckets = cnt / 2;
+       net->ipv4.tcp_death_row->hashinfo = &tcp_hashinfo;
 
        net->ipv4.sysctl_max_syn_backlog = max(128, cnt / 128);
        net->ipv4.sysctl_tcp_sack = 1;
@@ -3229,18 +3216,12 @@ static int __net_init tcp_sk_init(struct net *net)
                net->ipv4.tcp_congestion_control = &tcp_reno;
 
        return 0;
-fail:
-       tcp_sk_exit(net);
-
-       return res;
 }
 
 static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
 {
        struct net *net;
 
-       inet_twsk_purge(&tcp_hashinfo, AF_INET);
-
        list_for_each_entry(net, net_exit_list, exit_list)
                tcp_fastopen_ctx_destroy(net);
 }
@@ -3326,6 +3307,24 @@ static void __init bpf_iter_register(void)
 
 void __init tcp_v4_init(void)
 {
+       int cpu, res;
+
+       for_each_possible_cpu(cpu) {
+               struct sock *sk;
+
+               res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
+                                          IPPROTO_TCP, &init_net);
+               if (res)
+                       panic("Failed to create the TCP control socket.\n");
+               sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
+
+               /* Please enforce IP_DF and IPID==0 for RST and
+                * ACK sent in SYN-RECV and TIME-WAIT state.
+                */
+               inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO;
+
+               per_cpu(ipv4_tcp_sk, cpu) = sk;
+       }
        if (register_pernet_subsys(&tcp_sk_ops))
                panic("Failed to create the TCP control socket.\n");
 
index 7c2d3ac..6366df7 100644 (file)
@@ -248,7 +248,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
        const struct inet_connection_sock *icsk = inet_csk(sk);
        const struct tcp_sock *tp = tcp_sk(sk);
        struct inet_timewait_sock *tw;
-       struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
+       struct inet_timewait_death_row *tcp_death_row = sock_net(sk)->ipv4.tcp_death_row;
 
        tw = inet_twsk_alloc(sk, tcp_death_row, state);
 
@@ -583,7 +583,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
                         * it can be estimated (approximately)
                         * from another data.
                         */
-                       tmp_opt.ts_recent_stamp = ktime_get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->num_timeout);
+                       tmp_opt.ts_recent_stamp = ktime_get_seconds() - reqsk_timeout(req, TCP_RTO_MAX) / HZ;
                        paws_reject = tcp_paws_reject(&tmp_opt, th->rst);
                }
        }
@@ -622,8 +622,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
                    !inet_rtx_syn_ack(sk, req)) {
                        unsigned long expires = jiffies;
 
-                       expires += min(TCP_TIMEOUT_INIT << req->num_timeout,
-                                      TCP_RTO_MAX);
+                       expires += reqsk_timeout(req, TCP_RTO_MAX);
                        if (!fastopen)
                                mod_timer_pending(&req->rsk_timer, expires);
                        else
index 5079832..e76bf1e 100644 (file)
@@ -1960,7 +1960,7 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
 
        bytes = min_t(unsigned long,
                      sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift),
-                     sk->sk_gso_max_size - 1 - MAX_TCP_HEADER);
+                     sk->sk_gso_max_size);
 
        /* Goal is to send at least one packet per ms,
         * not one big TSO packet every 100 ms.
@@ -4092,7 +4092,9 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req)
        struct flowi fl;
        int res;
 
-       tcp_rsk(req)->txhash = net_tx_rndhash();
+       /* Paired with WRITE_ONCE() in sock_setsockopt() */
+       if (READ_ONCE(sk->sk_txrehash) == SOCK_TXREHASH_ENABLED)
+               tcp_rsk(req)->txhash = net_tx_rndhash();
        res = af_ops->send_synack(sk, NULL, &fl, req, NULL, TCP_SYNACK_NORMAL,
                                  NULL);
        if (!res) {
index 77e34ae..658d5ea 100644 (file)
@@ -1344,14 +1344,14 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt,
        return opt2;
 }
 
-struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space,
-                                         struct ipv6_txoptions *opt)
+struct ipv6_txoptions *__ipv6_fixup_options(struct ipv6_txoptions *opt_space,
+                                           struct ipv6_txoptions *opt)
 {
        /*
         * ignore the dest before srcrt unless srcrt is being included.
         * --yoshfuji
         */
-       if (opt && opt->dst0opt && !opt->srcrt) {
+       if (opt->dst0opt && !opt->srcrt) {
                if (opt_space != opt) {
                        memcpy(opt_space, opt, sizeof(*opt_space));
                        opt = opt_space;
@@ -1362,7 +1362,7 @@ struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space,
 
        return opt;
 }
-EXPORT_SYMBOL_GPL(ipv6_fixup_options);
+EXPORT_SYMBOL_GPL(__ipv6_fixup_options);
 
 /**
  * fl6_update_dst - update flowi destination address with info given
index 96c5cc0..e6b978e 100644 (file)
 
 #include <linux/uaccess.h>
 
-/*
- *     The ICMP socket(s). This is the most convenient way to flow control
- *     our ICMP output as well as maintain a clean interface throughout
- *     all layers. All Socketless IP sends will soon be gone.
- *
- *     On SMP we have one ICMP socket per-cpu.
- */
-static struct sock *icmpv6_sk(struct net *net)
-{
-       return this_cpu_read(*net->ipv6.icmp_sk);
-}
+static DEFINE_PER_CPU(struct sock *, ipv6_icmp_sk);
 
 static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
                       u8 type, u8 code, int offset, __be32 info)
@@ -110,11 +100,11 @@ static const struct inet6_protocol icmpv6_protocol = {
 };
 
 /* Called with BH disabled */
-static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
+static struct sock *icmpv6_xmit_lock(struct net *net)
 {
        struct sock *sk;
 
-       sk = icmpv6_sk(net);
+       sk = this_cpu_read(ipv6_icmp_sk);
        if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
                /* This can happen if the output path (f.e. SIT or
                 * ip6ip6 tunnel) signals dst_link_failure() for an
@@ -122,11 +112,13 @@ static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
                 */
                return NULL;
        }
+       sock_net_set(sk, net);
        return sk;
 }
 
-static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
+static void icmpv6_xmit_unlock(struct sock *sk)
 {
+       sock_net_set(sk, &init_net);
        spin_unlock(&sk->sk_lock.slock);
 }
 
@@ -1034,59 +1026,27 @@ void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
        security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
 }
 
-static void __net_exit icmpv6_sk_exit(struct net *net)
-{
-       int i;
-
-       for_each_possible_cpu(i)
-               inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv6.icmp_sk, i));
-       free_percpu(net->ipv6.icmp_sk);
-}
-
-static int __net_init icmpv6_sk_init(struct net *net)
+int __init icmpv6_init(void)
 {
        struct sock *sk;
        int err, i;
 
-       net->ipv6.icmp_sk = alloc_percpu(struct sock *);
-       if (!net->ipv6.icmp_sk)
-               return -ENOMEM;
-
        for_each_possible_cpu(i) {
                err = inet_ctl_sock_create(&sk, PF_INET6,
-                                          SOCK_RAW, IPPROTO_ICMPV6, net);
+                                          SOCK_RAW, IPPROTO_ICMPV6, &init_net);
                if (err < 0) {
                        pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
                               err);
-                       goto fail;
+                       return err;
                }
 
-               *per_cpu_ptr(net->ipv6.icmp_sk, i) = sk;
+               per_cpu(ipv6_icmp_sk, i) = sk;
 
                /* Enough space for 2 64K ICMP packets, including
                 * sk_buff struct overhead.
                 */
                sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
        }
-       return 0;
-
- fail:
-       icmpv6_sk_exit(net);
-       return err;
-}
-
-static struct pernet_operations icmpv6_sk_ops = {
-       .init = icmpv6_sk_init,
-       .exit = icmpv6_sk_exit,
-};
-
-int __init icmpv6_init(void)
-{
-       int err;
-
-       err = register_pernet_subsys(&icmpv6_sk_ops);
-       if (err < 0)
-               return err;
 
        err = -EAGAIN;
        if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
@@ -1101,14 +1061,12 @@ sender_reg_err:
        inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
 fail:
        pr_err("Failed to register ICMP6 protocol\n");
-       unregister_pernet_subsys(&icmpv6_sk_ops);
        return err;
 }
 
 void icmpv6_cleanup(void)
 {
        inet6_unregister_icmp_sender(icmp6_send);
-       unregister_pernet_subsys(&icmpv6_sk_ops);
        inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
 }
 
index b29e9ba..d37a79a 100644 (file)
@@ -249,7 +249,7 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head,
                 if ((first_word & htonl(0xF00FFFFF)) ||
                     !ipv6_addr_equal(&iph->saddr, &iph2->saddr) ||
                     !ipv6_addr_equal(&iph->daddr, &iph2->daddr) ||
-                    *(u16 *)&iph->nexthdr != *(u16 *)&iph2->nexthdr) {
+                    iph->nexthdr != iph2->nexthdr) {
 not_same_flow:
                        NAPI_GRO_CB(p)->same_flow = 0;
                        continue;
@@ -260,7 +260,8 @@ not_same_flow:
                                goto not_same_flow;
                }
                /* flush if Traffic Class fields are different */
-               NAPI_GRO_CB(p)->flush |= !!(first_word & htonl(0x0FF00000));
+               NAPI_GRO_CB(p)->flush |= !!((first_word & htonl(0x0FF00000)) |
+                       (__force __be32)(iph->hop_limit ^ iph2->hop_limit));
                NAPI_GRO_CB(p)->flush |= flush;
 
                /* If the previous IP ID value was based on an atomic
index 2995f8d..0c6c971 100644 (file)
@@ -1350,11 +1350,16 @@ static void ip6_append_data_mtu(unsigned int *mtu,
 
 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
                          struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
-                         struct rt6_info *rt, struct flowi6 *fl6)
+                         struct rt6_info *rt)
 {
        struct ipv6_pinfo *np = inet6_sk(sk);
        unsigned int mtu;
-       struct ipv6_txoptions *opt = ipc6->opt;
+       struct ipv6_txoptions *nopt, *opt = ipc6->opt;
+
+       /* callers pass dst together with a reference, set it first so
+        * ip6_cork_release() can put it down even in case of an error.
+        */
+       cork->base.dst = &rt->dst;
 
        /*
         * setup for corking
@@ -1363,39 +1368,32 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
                if (WARN_ON(v6_cork->opt))
                        return -EINVAL;
 
-               v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
-               if (unlikely(!v6_cork->opt))
+               nopt = v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
+               if (unlikely(!nopt))
                        return -ENOBUFS;
 
-               v6_cork->opt->tot_len = sizeof(*opt);
-               v6_cork->opt->opt_flen = opt->opt_flen;
-               v6_cork->opt->opt_nflen = opt->opt_nflen;
+               nopt->tot_len = sizeof(*opt);
+               nopt->opt_flen = opt->opt_flen;
+               nopt->opt_nflen = opt->opt_nflen;
 
-               v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
-                                                   sk->sk_allocation);
-               if (opt->dst0opt && !v6_cork->opt->dst0opt)
+               nopt->dst0opt = ip6_opt_dup(opt->dst0opt, sk->sk_allocation);
+               if (opt->dst0opt && !nopt->dst0opt)
                        return -ENOBUFS;
 
-               v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
-                                                   sk->sk_allocation);
-               if (opt->dst1opt && !v6_cork->opt->dst1opt)
+               nopt->dst1opt = ip6_opt_dup(opt->dst1opt, sk->sk_allocation);
+               if (opt->dst1opt && !nopt->dst1opt)
                        return -ENOBUFS;
 
-               v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
-                                                  sk->sk_allocation);
-               if (opt->hopopt && !v6_cork->opt->hopopt)
+               nopt->hopopt = ip6_opt_dup(opt->hopopt, sk->sk_allocation);
+               if (opt->hopopt && !nopt->hopopt)
                        return -ENOBUFS;
 
-               v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
-                                                   sk->sk_allocation);
-               if (opt->srcrt && !v6_cork->opt->srcrt)
+               nopt->srcrt = ip6_rthdr_dup(opt->srcrt, sk->sk_allocation);
+               if (opt->srcrt && !nopt->srcrt)
                        return -ENOBUFS;
 
                /* need source address above miyazawa*/
        }
-       dst_hold(&rt->dst);
-       cork->base.dst = &rt->dst;
-       cork->fl.u.ip6 = *fl6;
        v6_cork->hop_limit = ipc6->hlimit;
        v6_cork->tclass = ipc6->tclass;
        if (rt->dst.flags & DST_XFRM_TUNNEL)
@@ -1426,9 +1424,8 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
 }
 
 static int __ip6_append_data(struct sock *sk,
-                            struct flowi6 *fl6,
                             struct sk_buff_head *queue,
-                            struct inet_cork *cork,
+                            struct inet_cork_full *cork_full,
                             struct inet6_cork *v6_cork,
                             struct page_frag *pfrag,
                             int getfrag(void *from, char *to, int offset,
@@ -1437,6 +1434,8 @@ static int __ip6_append_data(struct sock *sk,
                             unsigned int flags, struct ipcm6_cookie *ipc6)
 {
        struct sk_buff *skb, *skb_prev = NULL;
+       struct inet_cork *cork = &cork_full->base;
+       struct flowi6 *fl6 = &cork_full->fl.u.ip6;
        unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
        struct ubuf_info *uarg = NULL;
        int exthdrlen = 0;
@@ -1788,34 +1787,46 @@ int ip6_append_data(struct sock *sk,
                /*
                 * setup for corking
                 */
+               dst_hold(&rt->dst);
                err = ip6_setup_cork(sk, &inet->cork, &np->cork,
-                                    ipc6, rt, fl6);
+                                    ipc6, rt);
                if (err)
                        return err;
 
+               inet->cork.fl.u.ip6 = *fl6;
                exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
                length += exthdrlen;
                transhdrlen += exthdrlen;
        } else {
-               fl6 = &inet->cork.fl.u.ip6;
                transhdrlen = 0;
        }
 
-       return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
+       return __ip6_append_data(sk, &sk->sk_write_queue, &inet->cork,
                                 &np->cork, sk_page_frag(sk), getfrag,
                                 from, length, transhdrlen, flags, ipc6);
 }
 EXPORT_SYMBOL_GPL(ip6_append_data);
 
+static void ip6_cork_steal_dst(struct sk_buff *skb, struct inet_cork_full *cork)
+{
+       struct dst_entry *dst = cork->base.dst;
+
+       cork->base.dst = NULL;
+       cork->base.flags &= ~IPCORK_ALLFRAG;
+       skb_dst_set(skb, dst);
+}
+
 static void ip6_cork_release(struct inet_cork_full *cork,
                             struct inet6_cork *v6_cork)
 {
        if (v6_cork->opt) {
-               kfree(v6_cork->opt->dst0opt);
-               kfree(v6_cork->opt->dst1opt);
-               kfree(v6_cork->opt->hopopt);
-               kfree(v6_cork->opt->srcrt);
-               kfree(v6_cork->opt);
+               struct ipv6_txoptions *opt = v6_cork->opt;
+
+               kfree(opt->dst0opt);
+               kfree(opt->dst1opt);
+               kfree(opt->hopopt);
+               kfree(opt->srcrt);
+               kfree(opt);
                v6_cork->opt = NULL;
        }
 
@@ -1824,7 +1835,6 @@ static void ip6_cork_release(struct inet_cork_full *cork,
                cork->base.dst = NULL;
                cork->base.flags &= ~IPCORK_ALLFRAG;
        }
-       memset(&cork->fl, 0, sizeof(cork->fl));
 }
 
 struct sk_buff *__ip6_make_skb(struct sock *sk,
@@ -1834,7 +1844,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
 {
        struct sk_buff *skb, *tmp_skb;
        struct sk_buff **tail_skb;
-       struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
+       struct in6_addr *final_dst;
        struct ipv6_pinfo *np = inet6_sk(sk);
        struct net *net = sock_net(sk);
        struct ipv6hdr *hdr;
@@ -1864,9 +1874,9 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
 
        /* Allow local fragmentation. */
        skb->ignore_df = ip6_sk_ignore_df(sk);
-
-       *final_dst = fl6->daddr;
        __skb_pull(skb, skb_network_header_len(skb));
+
+       final_dst = &fl6->daddr;
        if (opt && opt->opt_flen)
                ipv6_push_frag_opts(skb, opt, &proto);
        if (opt && opt->opt_nflen)
@@ -1886,10 +1896,9 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
 
        skb->priority = sk->sk_priority;
        skb->mark = cork->base.mark;
-
        skb->tstamp = cork->base.transmit_time;
 
-       skb_dst_set(skb, dst_clone(&rt->dst));
+       ip6_cork_steal_dst(skb, cork);
        IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
        if (proto == IPPROTO_ICMPV6) {
                struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
@@ -1961,26 +1970,26 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
                             int getfrag(void *from, char *to, int offset,
                                         int len, int odd, struct sk_buff *skb),
                             void *from, int length, int transhdrlen,
-                            struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
-                            struct rt6_info *rt, unsigned int flags,
-                            struct inet_cork_full *cork)
+                            struct ipcm6_cookie *ipc6, struct rt6_info *rt,
+                            unsigned int flags, struct inet_cork_full *cork)
 {
        struct inet6_cork v6_cork;
        struct sk_buff_head queue;
        int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
        int err;
 
-       if (flags & MSG_PROBE)
+       if (flags & MSG_PROBE) {
+               dst_release(&rt->dst);
                return NULL;
+       }
 
        __skb_queue_head_init(&queue);
 
        cork->base.flags = 0;
        cork->base.addr = 0;
        cork->base.opt = NULL;
-       cork->base.dst = NULL;
        v6_cork.opt = NULL;
-       err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
+       err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt);
        if (err) {
                ip6_cork_release(cork, &v6_cork);
                return ERR_PTR(err);
@@ -1988,7 +1997,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
        if (ipc6->dontfrag < 0)
                ipc6->dontfrag = inet6_sk(sk)->dontfrag;
 
-       err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
+       err = __ip6_append_data(sk, &queue, cork, &v6_cork,
                                &current->task_frag, getfrag, from,
                                length + exthdrlen, transhdrlen + exthdrlen,
                                flags, ipc6);
index 97ade83..b47ffc8 100644 (file)
@@ -1121,6 +1121,11 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
 
                        memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
                        neigh_release(neigh);
+               } else if (skb->protocol == htons(ETH_P_IP)) {
+                       struct rtable *rt = skb_rtable(skb);
+
+                       if (rt->rt_gw_family == AF_INET6)
+                               memcpy(&fl6->daddr, &rt->rt_gw6, sizeof(fl6->daddr));
                }
        } else if (t->parms.proto != 0 && !(t->parms.flags &
                                            (IP6_TNL_F_USE_ORIG_TCLASS |
index 075ee8a..0c648bf 100644 (file)
@@ -148,6 +148,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
        struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
        struct inet_sock *inet = inet_sk(sk);
        struct inet_connection_sock *icsk = inet_csk(sk);
+       struct inet_timewait_death_row *tcp_death_row;
        struct ipv6_pinfo *np = tcp_inet6_sk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
        struct in6_addr *saddr = NULL, *final_p, final;
@@ -156,7 +157,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
        struct dst_entry *dst;
        int addr_type;
        int err;
-       struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
 
        if (addr_len < SIN6_LEN_RFC2133)
                return -EINVAL;
@@ -308,6 +308,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
        inet->inet_dport = usin->sin6_port;
 
        tcp_set_state(sk, TCP_SYN_SENT);
+       tcp_death_row = sock_net(sk)->ipv4.tcp_death_row;
        err = inet6_hash_connect(tcp_death_row, sk);
        if (err)
                goto late_failure;
@@ -2237,15 +2238,9 @@ static void __net_exit tcpv6_net_exit(struct net *net)
        inet_ctl_sock_destroy(net->ipv6.tcp_sk);
 }
 
-static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
-{
-       inet_twsk_purge(&tcp_hashinfo, AF_INET6);
-}
-
 static struct pernet_operations tcpv6_net_ops = {
        .init       = tcpv6_net_init,
        .exit       = tcpv6_net_exit,
-       .exit_batch = tcpv6_net_exit_batch,
 };
 
 int __init tcpv6_init(void)
index 528b81e..c687259 100644 (file)
@@ -1266,23 +1266,17 @@ static int udp_v6_push_pending_frames(struct sock *sk)
 {
        struct sk_buff *skb;
        struct udp_sock  *up = udp_sk(sk);
-       struct flowi6 fl6;
        int err = 0;
 
        if (up->pending == AF_INET)
                return udp_push_pending_frames(sk);
 
-       /* ip6_finish_skb will release the cork, so make a copy of
-        * fl6 here.
-        */
-       fl6 = inet_sk(sk)->cork.fl.u.ip6;
-
        skb = ip6_finish_skb(sk);
        if (!skb)
                goto out;
 
-       err = udp_v6_send_skb(skb, &fl6, &inet_sk(sk)->cork.base);
-
+       err = udp_v6_send_skb(skb, &inet_sk(sk)->cork.fl.u.ip6,
+                             &inet_sk(sk)->cork.base);
 out:
        up->len = 0;
        up->pending = 0;
@@ -1300,7 +1294,8 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
        struct ipv6_txoptions *opt = NULL;
        struct ipv6_txoptions *opt_to_free = NULL;
        struct ip6_flowlabel *flowlabel = NULL;
-       struct flowi6 fl6;
+       struct inet_cork_full cork;
+       struct flowi6 *fl6 = &cork.fl.u.ip6;
        struct dst_entry *dst;
        struct ipcm6_cookie ipc6;
        int addr_len = msg->msg_namelen;
@@ -1363,9 +1358,6 @@ do_udp_sendmsg:
                }
        }
 
-       if (up->pending == AF_INET)
-               return udp_sendmsg(sk, msg, len);
-
        /* Rough check on arithmetic overflow,
           better check is made in ip6_append_data().
           */
@@ -1374,6 +1366,8 @@ do_udp_sendmsg:
 
        getfrag  =  is_udplite ?  udplite_getfrag : ip_generic_getfrag;
        if (up->pending) {
+               if (up->pending == AF_INET)
+                       return udp_sendmsg(sk, msg, len);
                /*
                 * There are pending frames.
                 * The socket lock must be held while it's corked.
@@ -1391,19 +1385,19 @@ do_udp_sendmsg:
        }
        ulen += sizeof(struct udphdr);
 
-       memset(&fl6, 0, sizeof(fl6));
+       memset(fl6, 0, sizeof(*fl6));
 
        if (sin6) {
                if (sin6->sin6_port == 0)
                        return -EINVAL;
 
-               fl6.fl6_dport = sin6->sin6_port;
+               fl6->fl6_dport = sin6->sin6_port;
                daddr = &sin6->sin6_addr;
 
                if (np->sndflow) {
-                       fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
-                       if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
-                               flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
+                       fl6->flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
+                       if (fl6->flowlabel & IPV6_FLOWLABEL_MASK) {
+                               flowlabel = fl6_sock_lookup(sk, fl6->flowlabel);
                                if (IS_ERR(flowlabel))
                                        return -EINVAL;
                        }
@@ -1420,24 +1414,24 @@ do_udp_sendmsg:
                if (addr_len >= sizeof(struct sockaddr_in6) &&
                    sin6->sin6_scope_id &&
                    __ipv6_addr_needs_scope_id(__ipv6_addr_type(daddr)))
-                       fl6.flowi6_oif = sin6->sin6_scope_id;
+                       fl6->flowi6_oif = sin6->sin6_scope_id;
        } else {
                if (sk->sk_state != TCP_ESTABLISHED)
                        return -EDESTADDRREQ;
 
-               fl6.fl6_dport = inet->inet_dport;
+               fl6->fl6_dport = inet->inet_dport;
                daddr = &sk->sk_v6_daddr;
-               fl6.flowlabel = np->flow_label;
+               fl6->flowlabel = np->flow_label;
                connected = true;
        }
 
-       if (!fl6.flowi6_oif)
-               fl6.flowi6_oif = sk->sk_bound_dev_if;
+       if (!fl6->flowi6_oif)
+               fl6->flowi6_oif = sk->sk_bound_dev_if;
 
-       if (!fl6.flowi6_oif)
-               fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
+       if (!fl6->flowi6_oif)
+               fl6->flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
 
-       fl6.flowi6_uid = sk->sk_uid;
+       fl6->flowi6_uid = sk->sk_uid;
 
        if (msg->msg_controllen) {
                opt = &opt_space;
@@ -1447,14 +1441,14 @@ do_udp_sendmsg:
 
                err = udp_cmsg_send(sk, msg, &ipc6.gso_size);
                if (err > 0)
-                       err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6,
+                       err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, fl6,
                                                    &ipc6);
                if (err < 0) {
                        fl6_sock_release(flowlabel);
                        return err;
                }
-               if ((fl6.flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
-                       flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
+               if ((fl6->flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
+                       flowlabel = fl6_sock_lookup(sk, fl6->flowlabel);
                        if (IS_ERR(flowlabel))
                                return -EINVAL;
                }
@@ -1471,16 +1465,17 @@ do_udp_sendmsg:
        opt = ipv6_fixup_options(&opt_space, opt);
        ipc6.opt = opt;
 
-       fl6.flowi6_proto = sk->sk_protocol;
-       fl6.flowi6_mark = ipc6.sockc.mark;
-       fl6.daddr = *daddr;
-       if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr))
-               fl6.saddr = np->saddr;
-       fl6.fl6_sport = inet->inet_sport;
+       fl6->flowi6_proto = sk->sk_protocol;
+       fl6->flowi6_mark = ipc6.sockc.mark;
+       fl6->daddr = *daddr;
+       if (ipv6_addr_any(&fl6->saddr) && !ipv6_addr_any(&np->saddr))
+               fl6->saddr = np->saddr;
+       fl6->fl6_sport = inet->inet_sport;
 
        if (cgroup_bpf_enabled(CGROUP_UDP6_SENDMSG) && !connected) {
                err = BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk,
-                                          (struct sockaddr *)sin6, &fl6.saddr);
+                                          (struct sockaddr *)sin6,
+                                          &fl6->saddr);
                if (err)
                        goto out_no_dst;
                if (sin6) {
@@ -1496,32 +1491,32 @@ do_udp_sendmsg:
                                err = -EINVAL;
                                goto out_no_dst;
                        }
-                       fl6.fl6_dport = sin6->sin6_port;
-                       fl6.daddr = sin6->sin6_addr;
+                       fl6->fl6_dport = sin6->sin6_port;
+                       fl6->daddr = sin6->sin6_addr;
                }
        }
 
-       if (ipv6_addr_any(&fl6.daddr))
-               fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */
+       if (ipv6_addr_any(&fl6->daddr))
+               fl6->daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */
 
-       final_p = fl6_update_dst(&fl6, opt, &final);
+       final_p = fl6_update_dst(fl6, opt, &final);
        if (final_p)
                connected = false;
 
-       if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) {
-               fl6.flowi6_oif = np->mcast_oif;
+       if (!fl6->flowi6_oif && ipv6_addr_is_multicast(&fl6->daddr)) {
+               fl6->flowi6_oif = np->mcast_oif;
                connected = false;
-       } else if (!fl6.flowi6_oif)
-               fl6.flowi6_oif = np->ucast_oif;
+       } else if (!fl6->flowi6_oif)
+               fl6->flowi6_oif = np->ucast_oif;
 
-       security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
+       security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
 
        if (ipc6.tclass < 0)
                ipc6.tclass = np->tclass;
 
-       fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
+       fl6->flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6->flowlabel);
 
-       dst = ip6_sk_dst_lookup_flow(sk, &fl6, final_p, connected);
+       dst = ip6_sk_dst_lookup_flow(sk, fl6, final_p, connected);
        if (IS_ERR(dst)) {
                err = PTR_ERR(dst);
                dst = NULL;
@@ -1529,7 +1524,7 @@ do_udp_sendmsg:
        }
 
        if (ipc6.hlimit < 0)
-               ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
+               ipc6.hlimit = ip6_sk_dst_hoplimit(np, fl6, dst);
 
        if (msg->msg_flags&MSG_CONFIRM)
                goto do_confirm;
@@ -1537,17 +1532,17 @@ back_from_confirm:
 
        /* Lockless fast path for the non-corking case */
        if (!corkreq) {
-               struct inet_cork_full cork;
                struct sk_buff *skb;
 
                skb = ip6_make_skb(sk, getfrag, msg, ulen,
                                   sizeof(struct udphdr), &ipc6,
-                                  &fl6, (struct rt6_info *)dst,
+                                  (struct rt6_info *)dst,
                                   msg->msg_flags, &cork);
                err = PTR_ERR(skb);
                if (!IS_ERR_OR_NULL(skb))
-                       err = udp_v6_send_skb(skb, &fl6, &cork.base);
-               goto out;
+                       err = udp_v6_send_skb(skb, fl6, &cork.base);
+               /* ip6_make_skb steals dst reference */
+               goto out_no_dst;
        }
 
        lock_sock(sk);
@@ -1568,7 +1563,7 @@ do_append_data:
                ipc6.dontfrag = np->dontfrag;
        up->len += ulen;
        err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr),
-                             &ipc6, &fl6, (struct rt6_info *)dst,
+                             &ipc6, fl6, (struct rt6_info *)dst,
                              corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
        if (err)
                udp_v6_flush_pending_frames(sk);
@@ -1603,7 +1598,7 @@ out_no_dst:
 
 do_confirm:
        if (msg->msg_flags & MSG_PROBE)
-               dst_confirm_neigh(dst, &fl6.daddr);
+               dst_confirm_neigh(dst, &fl6->daddr);
        if (!(msg->msg_flags&MSG_PROBE) || len)
                goto back_from_confirm;
        err = 0;
index 645dd98..3e82ac2 100644 (file)
@@ -336,6 +336,8 @@ static void mptcp_parse_option(const struct sk_buff *skb,
                flags = *ptr++;
                mp_opt->reset_transient = flags & MPTCP_RST_TRANSIENT;
                mp_opt->reset_reason = *ptr;
+               pr_debug("MP_RST: transient=%u reason=%u",
+                        mp_opt->reset_transient, mp_opt->reset_reason);
                break;
 
        case MPTCPOPT_MP_FAIL:
@@ -1264,22 +1266,30 @@ static u16 mptcp_make_csum(const struct mptcp_ext *mpext)
 void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
                         struct mptcp_out_options *opts)
 {
-       if (unlikely(OPTION_MPTCP_FAIL & opts->suboptions)) {
-               const struct sock *ssk = (const struct sock *)tp;
-               struct mptcp_subflow_context *subflow;
-
-               subflow = mptcp_subflow_ctx(ssk);
-               subflow->send_mp_fail = 0;
-
-               *ptr++ = mptcp_option(MPTCPOPT_MP_FAIL,
-                                     TCPOLEN_MPTCP_FAIL,
-                                     0, 0);
-               put_unaligned_be64(opts->fail_seq, ptr);
-               ptr += 2;
-       }
-
-       /* DSS, MPC, MPJ, ADD_ADDR, FASTCLOSE and RST are mutually exclusive,
-        * see mptcp_established_options*()
+       const struct sock *ssk = (const struct sock *)tp;
+       struct mptcp_subflow_context *subflow;
+
+       /* Which options can be used together?
+        *
+        * X: mutually exclusive
+        * O: often used together
+        * C: can be used together in some cases
+        * P: could be used together but we prefer not to (optimisations)
+        *
+        *  Opt: | MPC  | MPJ  | DSS  | ADD  |  RM  | PRIO | FAIL |  FC  |
+        * ------|------|------|------|------|------|------|------|------|
+        *  MPC  |------|------|------|------|------|------|------|------|
+        *  MPJ  |  X   |------|------|------|------|------|------|------|
+        *  DSS  |  X   |  X   |------|------|------|------|------|------|
+        *  ADD  |  X   |  X   |  P   |------|------|------|------|------|
+        *  RM   |  C   |  C   |  C   |  P   |------|------|------|------|
+        *  PRIO |  X   |  C   |  C   |  C   |  C   |------|------|------|
+        *  FAIL |  X   |  X   |  C   |  X   |  X   |  X   |------|------|
+        *  FC   |  X   |  X   |  X   |  X   |  X   |  X   |  X   |------|
+        *  RST  |  X   |  X   |  X   |  X   |  X   |  X   |  O   |  O   |
+        * ------|------|------|------|------|------|------|------|------|
+        *
+        * The same applies in mptcp_established_options() function.
         */
        if (likely(OPTION_MPTCP_DSS & opts->suboptions)) {
                struct mptcp_ext *mpext = &opts->ext_copy;
@@ -1336,6 +1346,10 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
                        }
                        ptr += 1;
                }
+
+               /* We might need to add MP_FAIL options in rare cases */
+               if (unlikely(OPTION_MPTCP_FAIL & opts->suboptions))
+                       goto mp_fail;
        } else if (OPTIONS_MPTCP_MPC & opts->suboptions) {
                u8 len, flag = MPTCP_CAP_HMAC_SHA256;
 
@@ -1479,6 +1493,21 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
                if (OPTION_MPTCP_RST & opts->suboptions)
                        goto mp_rst;
                return;
+       } else if (unlikely(OPTION_MPTCP_FAIL & opts->suboptions)) {
+mp_fail:
+               /* MP_FAIL is mutually exclusive with others except RST */
+               subflow = mptcp_subflow_ctx(ssk);
+               subflow->send_mp_fail = 0;
+
+               *ptr++ = mptcp_option(MPTCPOPT_MP_FAIL,
+                                     TCPOLEN_MPTCP_FAIL,
+                                     0, 0);
+               put_unaligned_be64(opts->fail_seq, ptr);
+               ptr += 2;
+
+               if (OPTION_MPTCP_RST & opts->suboptions)
+                       goto mp_rst;
+               return;
        } else if (unlikely(OPTION_MPTCP_RST & opts->suboptions)) {
 mp_rst:
                *ptr++ = mptcp_option(MPTCPOPT_RST,
@@ -1489,9 +1518,6 @@ mp_rst:
        }
 
        if (OPTION_MPTCP_PRIO & opts->suboptions) {
-               const struct sock *ssk = (const struct sock *)tp;
-               struct mptcp_subflow_context *subflow;
-
                subflow = mptcp_subflow_ctx(ssk);
                subflow->send_mp_prio = 0;
 
index 782b1d4..d477957 100644 (file)
@@ -1728,9 +1728,20 @@ fail:
        return -EMSGSIZE;
 }
 
-static int mptcp_nl_addr_backup(struct net *net,
-                               struct mptcp_addr_info *addr,
-                               u8 bkup)
+static void mptcp_pm_nl_fullmesh(struct mptcp_sock *msk,
+                                struct mptcp_addr_info *addr)
+{
+       struct mptcp_rm_list list = { .nr = 0 };
+
+       list.ids[list.nr++] = addr->id;
+
+       mptcp_pm_nl_rm_subflow_received(msk, &list);
+       mptcp_pm_create_subflow_or_signal_addr(msk);
+}
+
+static int mptcp_nl_set_flags(struct net *net,
+                             struct mptcp_addr_info *addr,
+                             u8 bkup, u8 changed)
 {
        long s_slot = 0, s_num = 0;
        struct mptcp_sock *msk;
@@ -1744,7 +1755,10 @@ static int mptcp_nl_addr_backup(struct net *net,
 
                lock_sock(sk);
                spin_lock_bh(&msk->pm.lock);
-               ret = mptcp_pm_nl_mp_prio_send_ack(msk, addr, bkup);
+               if (changed & MPTCP_PM_ADDR_FLAG_BACKUP)
+                       ret = mptcp_pm_nl_mp_prio_send_ack(msk, addr, bkup);
+               if (changed & MPTCP_PM_ADDR_FLAG_FULLMESH)
+                       mptcp_pm_nl_fullmesh(msk, addr);
                spin_unlock_bh(&msk->pm.lock);
                release_sock(sk);
 
@@ -1761,6 +1775,8 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
        struct mptcp_pm_addr_entry addr = { .addr = { .family = AF_UNSPEC }, }, *entry;
        struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR];
        struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
+       u8 changed, mask = MPTCP_PM_ADDR_FLAG_BACKUP |
+                          MPTCP_PM_ADDR_FLAG_FULLMESH;
        struct net *net = sock_net(skb->sk);
        u8 bkup = 0, lookup_by_id = 0;
        int ret;
@@ -1783,15 +1799,18 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
                spin_unlock_bh(&pernet->lock);
                return -EINVAL;
        }
+       if ((addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) &&
+           (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) {
+               spin_unlock_bh(&pernet->lock);
+               return -EINVAL;
+       }
 
-       if (bkup)
-               entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
-       else
-               entry->flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP;
+       changed = (addr.flags ^ entry->flags) & mask;
+       entry->flags = (entry->flags & ~mask) | (addr.flags & mask);
        addr = *entry;
        spin_unlock_bh(&pernet->lock);
 
-       mptcp_nl_addr_backup(net, &addr.addr, bkup);
+       mptcp_nl_set_flags(net, &addr.addr, bkup, changed);
        return 0;
 }
 
index a135b1a..238b6a6 100644 (file)
@@ -14,6 +14,11 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o
 nf_conntrack-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o
 nf_conntrack-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o
 nf_conntrack-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o
+ifeq ($(CONFIG_NF_CONNTRACK),m)
+nf_conntrack-$(CONFIG_DEBUG_INFO_BTF_MODULES) += nf_conntrack_bpf.o
+else ifeq ($(CONFIG_NF_CONNTRACK),y)
+nf_conntrack-$(CONFIG_DEBUG_INFO_BTF) += nf_conntrack_bpf.o
+endif
 
 obj-$(CONFIG_NETFILTER) = netfilter.o
 
diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c
new file mode 100644 (file)
index 0000000..8ad3f52
--- /dev/null
@@ -0,0 +1,257 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Unstable Conntrack Helpers for XDP and TC-BPF hook
+ *
+ * These are called from the XDP and SCHED_CLS BPF programs. Note that it is
+ * allowed to break compatibility for these functions since the interface they
+ * are exposed through to BPF programs is explicitly unstable.
+ */
+
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <linux/types.h>
+#include <linux/btf_ids.h>
+#include <linux/net_namespace.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+
+/* bpf_ct_opts - Options for CT lookup helpers
+ *
+ * Members:
+ * @netns_id   - Specify the network namespace for lookup
+ *              Values:
+ *                BPF_F_CURRENT_NETNS (-1)
+ *                  Use namespace associated with ctx (xdp_md, __sk_buff)
+ *                [0, S32_MAX]
+ *                  Network Namespace ID
+ * @error      - Out parameter, set for any errors encountered
+ *              Values:
+ *                -EINVAL - Passed NULL for bpf_tuple pointer
+ *                -EINVAL - opts->reserved is not 0
+ *                -EINVAL - netns_id is less than -1
+ *                -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (12)
+ *                -EPROTO - l4proto isn't one of IPPROTO_TCP or IPPROTO_UDP
+ *                -ENONET - No network namespace found for netns_id
+ *                -ENOENT - Conntrack lookup could not find entry for tuple
+ *                -EAFNOSUPPORT - tuple__sz isn't one of sizeof(tuple->ipv4)
+ *                                or sizeof(tuple->ipv6)
+ * @l4proto    - Layer 4 protocol
+ *              Values:
+ *                IPPROTO_TCP, IPPROTO_UDP
+ * @reserved   - Reserved member, will be reused for more options in future
+ *              Values:
+ *                0
+ */
+struct bpf_ct_opts {
+       s32 netns_id;
+       s32 error;
+       u8 l4proto;
+       u8 reserved[3];
+};
+
+enum {
+       NF_BPF_CT_OPTS_SZ = 12,
+};
+
+static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
+                                         struct bpf_sock_tuple *bpf_tuple,
+                                         u32 tuple_len, u8 protonum,
+                                         s32 netns_id)
+{
+       struct nf_conntrack_tuple_hash *hash;
+       struct nf_conntrack_tuple tuple;
+
+       if (unlikely(protonum != IPPROTO_TCP && protonum != IPPROTO_UDP))
+               return ERR_PTR(-EPROTO);
+       if (unlikely(netns_id < BPF_F_CURRENT_NETNS))
+               return ERR_PTR(-EINVAL);
+
+       memset(&tuple, 0, sizeof(tuple));
+       switch (tuple_len) {
+       case sizeof(bpf_tuple->ipv4):
+               tuple.src.l3num = AF_INET;
+               tuple.src.u3.ip = bpf_tuple->ipv4.saddr;
+               tuple.src.u.tcp.port = bpf_tuple->ipv4.sport;
+               tuple.dst.u3.ip = bpf_tuple->ipv4.daddr;
+               tuple.dst.u.tcp.port = bpf_tuple->ipv4.dport;
+               break;
+       case sizeof(bpf_tuple->ipv6):
+               tuple.src.l3num = AF_INET6;
+               memcpy(tuple.src.u3.ip6, bpf_tuple->ipv6.saddr, sizeof(bpf_tuple->ipv6.saddr));
+               tuple.src.u.tcp.port = bpf_tuple->ipv6.sport;
+               memcpy(tuple.dst.u3.ip6, bpf_tuple->ipv6.daddr, sizeof(bpf_tuple->ipv6.daddr));
+               tuple.dst.u.tcp.port = bpf_tuple->ipv6.dport;
+               break;
+       default:
+               return ERR_PTR(-EAFNOSUPPORT);
+       }
+
+       tuple.dst.protonum = protonum;
+
+       if (netns_id >= 0) {
+               net = get_net_ns_by_id(net, netns_id);
+               if (unlikely(!net))
+                       return ERR_PTR(-ENONET);
+       }
+
+       hash = nf_conntrack_find_get(net, &nf_ct_zone_dflt, &tuple);
+       if (netns_id >= 0)
+               put_net(net);
+       if (!hash)
+               return ERR_PTR(-ENOENT);
+       return nf_ct_tuplehash_to_ctrack(hash);
+}
+
+__diag_push();
+__diag_ignore(GCC, 8, "-Wmissing-prototypes",
+             "Global functions as their definitions will be in nf_conntrack BTF");
+
+/* bpf_xdp_ct_lookup - Lookup CT entry for the given tuple, and acquire a
+ *                    reference to it
+ *
+ * Parameters:
+ * @xdp_ctx    - Pointer to ctx (xdp_md) in XDP program
+ *                 Cannot be NULL
+ * @bpf_tuple  - Pointer to memory representing the tuple to look up
+ *                 Cannot be NULL
+ * @tuple__sz  - Length of the tuple structure
+ *                 Must be one of sizeof(bpf_tuple->ipv4) or
+ *                 sizeof(bpf_tuple->ipv6)
+ * @opts       - Additional options for lookup (documented above)
+ *                 Cannot be NULL
+ * @opts__sz   - Length of the bpf_ct_opts structure
+ *                 Must be NF_BPF_CT_OPTS_SZ (12)
+ */
+struct nf_conn *
+bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
+                 u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
+{
+       struct xdp_buff *ctx = (struct xdp_buff *)xdp_ctx;
+       struct net *caller_net;
+       struct nf_conn *nfct;
+
+       BUILD_BUG_ON(sizeof(struct bpf_ct_opts) != NF_BPF_CT_OPTS_SZ);
+
+       if (!opts)
+               return NULL;
+       if (!bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
+           opts->reserved[2] || opts__sz != NF_BPF_CT_OPTS_SZ) {
+               opts->error = -EINVAL;
+               return NULL;
+       }
+       caller_net = dev_net(ctx->rxq->dev);
+       nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts->l4proto,
+                                 opts->netns_id);
+       if (IS_ERR(nfct)) {
+               opts->error = PTR_ERR(nfct);
+               return NULL;
+       }
+       return nfct;
+}
+
+/* bpf_skb_ct_lookup - Lookup CT entry for the given tuple, and acquire a
+ *                    reference to it
+ *
+ * Parameters:
+ * @skb_ctx    - Pointer to ctx (__sk_buff) in TC program
+ *                 Cannot be NULL
+ * @bpf_tuple  - Pointer to memory representing the tuple to look up
+ *                 Cannot be NULL
+ * @tuple__sz  - Length of the tuple structure
+ *                 Must be one of sizeof(bpf_tuple->ipv4) or
+ *                 sizeof(bpf_tuple->ipv6)
+ * @opts       - Additional options for lookup (documented above)
+ *                 Cannot be NULL
+ * @opts__sz   - Length of the bpf_ct_opts structure
+ *                 Must be NF_BPF_CT_OPTS_SZ (12)
+ */
+struct nf_conn *
+bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
+                 u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
+{
+       struct sk_buff *skb = (struct sk_buff *)skb_ctx;
+       struct net *caller_net;
+       struct nf_conn *nfct;
+
+       BUILD_BUG_ON(sizeof(struct bpf_ct_opts) != NF_BPF_CT_OPTS_SZ);
+
+       if (!opts)
+               return NULL;
+       if (!bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
+           opts->reserved[2] || opts__sz != NF_BPF_CT_OPTS_SZ) {
+               opts->error = -EINVAL;
+               return NULL;
+       }
+       caller_net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk);
+       nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts->l4proto,
+                                 opts->netns_id);
+       if (IS_ERR(nfct)) {
+               opts->error = PTR_ERR(nfct);
+               return NULL;
+       }
+       return nfct;
+}
+
+/* bpf_ct_release - Release acquired nf_conn object
+ *
+ * This must be invoked for referenced PTR_TO_BTF_ID, and the verifier rejects
+ * the program if any references remain in the program in all of the explored
+ * states.
+ *
+ * Parameters:
+ * @nf_conn     - Pointer to referenced nf_conn object, obtained using
+ *                bpf_xdp_ct_lookup or bpf_skb_ct_lookup.
+ */
+void bpf_ct_release(struct nf_conn *nfct)
+{
+       if (!nfct)
+               return;
+       nf_ct_put(nfct);
+}
+
+__diag_pop()
+
+BTF_SET_START(nf_ct_xdp_check_kfunc_ids)
+BTF_ID(func, bpf_xdp_ct_lookup)
+BTF_ID(func, bpf_ct_release)
+BTF_SET_END(nf_ct_xdp_check_kfunc_ids)
+
+BTF_SET_START(nf_ct_tc_check_kfunc_ids)
+BTF_ID(func, bpf_skb_ct_lookup)
+BTF_ID(func, bpf_ct_release)
+BTF_SET_END(nf_ct_tc_check_kfunc_ids)
+
+BTF_SET_START(nf_ct_acquire_kfunc_ids)
+BTF_ID(func, bpf_xdp_ct_lookup)
+BTF_ID(func, bpf_skb_ct_lookup)
+BTF_SET_END(nf_ct_acquire_kfunc_ids)
+
+BTF_SET_START(nf_ct_release_kfunc_ids)
+BTF_ID(func, bpf_ct_release)
+BTF_SET_END(nf_ct_release_kfunc_ids)
+
+/* Both sets are identical */
+#define nf_ct_ret_null_kfunc_ids nf_ct_acquire_kfunc_ids
+
+static const struct btf_kfunc_id_set nf_conntrack_xdp_kfunc_set = {
+       .owner        = THIS_MODULE,
+       .check_set    = &nf_ct_xdp_check_kfunc_ids,
+       .acquire_set  = &nf_ct_acquire_kfunc_ids,
+       .release_set  = &nf_ct_release_kfunc_ids,
+       .ret_null_set = &nf_ct_ret_null_kfunc_ids,
+};
+
+static const struct btf_kfunc_id_set nf_conntrack_tc_kfunc_set = {
+       .owner        = THIS_MODULE,
+       .check_set    = &nf_ct_tc_check_kfunc_ids,
+       .acquire_set  = &nf_ct_acquire_kfunc_ids,
+       .release_set  = &nf_ct_release_kfunc_ids,
+       .ret_null_set = &nf_ct_ret_null_kfunc_ids,
+};
+
+int register_nf_conntrack_bpf(void)
+{
+       int ret;
+
+       ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &nf_conntrack_xdp_kfunc_set);
+       return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_tc_kfunc_set);
+}
index d6aa5b4..d38d689 100644 (file)
@@ -34,6 +34,7 @@
 #include <linux/rculist_nulls.h>
 
 #include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_bpf.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <net/netfilter/nf_conntrack_helper.h>
@@ -2750,8 +2751,15 @@ int nf_conntrack_init_start(void)
        conntrack_gc_work_init(&conntrack_gc_work);
        queue_delayed_work(system_power_efficient_wq, &conntrack_gc_work.dwork, HZ);
 
+       ret = register_nf_conntrack_bpf();
+       if (ret < 0)
+               goto err_kfunc;
+
        return 0;
 
+err_kfunc:
+       cancel_delayed_work_sync(&conntrack_gc_work.dwork);
+       nf_conntrack_proto_fini();
 err_proto:
        nf_conntrack_seqadj_fini();
 err_seqadj:
index 8c89d0b..00b2e9d 100644 (file)
@@ -2626,8 +2626,8 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
                    sk->sk_state != SMC_CLOSED) {
                        if (!val) {
                                SMC_STAT_INC(smc, cork_cnt);
-                               mod_delayed_work(smc->conn.lgr->tx_wq,
-                                                &smc->conn.tx_work, 0);
+                               smc_tx_pending(&smc->conn);
+                               cancel_delayed_work(&smc->conn.tx_work);
                        }
                }
                break;
@@ -2765,8 +2765,10 @@ static ssize_t smc_sendpage(struct socket *sock, struct page *page,
                rc = kernel_sendpage(smc->clcsock, page, offset,
                                     size, flags);
        } else {
+               lock_sock(sk);
+               rc = smc_tx_sendpage(smc, page, offset, size, flags);
+               release_sock(sk);
                SMC_STAT_INC(smc, sendpage_cnt);
-               rc = sock_no_sendpage(sock, page, offset, size, flags);
        }
 
 out:
index be241d5..a96ce16 100644 (file)
@@ -31,7 +31,6 @@
 #include "smc_tracepoint.h"
 
 #define SMC_TX_WORK_DELAY      0
-#define SMC_TX_CORK_DELAY      (HZ >> 2)       /* 250 ms */
 
 /***************************** sndbuf producer *******************************/
 
@@ -236,16 +235,15 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
                 */
                if ((msg->msg_flags & MSG_OOB) && !send_remaining)
                        conn->urg_tx_pend = true;
-               if ((msg->msg_flags & MSG_MORE || smc_tx_is_corked(smc)) &&
-                   (atomic_read(&conn->sndbuf_space) >
-                                               (conn->sndbuf_desc->len >> 1)))
-                       /* for a corked socket defer the RDMA writes if there
-                        * is still sufficient sndbuf_space available
+               if ((msg->msg_flags & MSG_MORE || smc_tx_is_corked(smc) ||
+                    msg->msg_flags & MSG_SENDPAGE_NOTLAST) &&
+                   (atomic_read(&conn->sndbuf_space)))
+                       /* for a corked socket defer the RDMA writes if
+                        * sndbuf_space is still available. The applications
+                        * should known how/when to uncork it.
                         */
-                       queue_delayed_work(conn->lgr->tx_wq, &conn->tx_work,
-                                          SMC_TX_CORK_DELAY);
-               else
-                       smc_tx_sndbuf_nonempty(conn);
+                       continue;
+               smc_tx_sndbuf_nonempty(conn);
 
                trace_smc_tx_sendmsg(smc, copylen);
        } /* while (msg_data_left(msg)) */
@@ -260,6 +258,22 @@ out_err:
        return rc;
 }
 
+int smc_tx_sendpage(struct smc_sock *smc, struct page *page, int offset,
+                   size_t size, int flags)
+{
+       struct msghdr msg = {.msg_flags = flags};
+       char *kaddr = kmap(page);
+       struct kvec iov;
+       int rc;
+
+       iov.iov_base = kaddr + offset;
+       iov.iov_len = size;
+       iov_iter_kvec(&msg.msg_iter, WRITE, &iov, 1, size);
+       rc = smc_tx_sendmsg(smc, &msg, size);
+       kunmap(page);
+       return rc;
+}
+
 /***************************** sndbuf consumer *******************************/
 
 /* sndbuf consumer: actual data transfer of one target chunk with ISM write */
@@ -597,27 +611,32 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
        return rc;
 }
 
-/* Wakeup sndbuf consumers from process context
- * since there is more data to transmit
- */
-void smc_tx_work(struct work_struct *work)
+void smc_tx_pending(struct smc_connection *conn)
 {
-       struct smc_connection *conn = container_of(to_delayed_work(work),
-                                                  struct smc_connection,
-                                                  tx_work);
        struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
        int rc;
 
-       lock_sock(&smc->sk);
        if (smc->sk.sk_err)
-               goto out;
+               return;
 
        rc = smc_tx_sndbuf_nonempty(conn);
        if (!rc && conn->local_rx_ctrl.prod_flags.write_blocked &&
            !atomic_read(&conn->bytes_to_rcv))
                conn->local_rx_ctrl.prod_flags.write_blocked = 0;
+}
+
+/* Wakeup sndbuf consumers from process context
+ * since there is more data to transmit
+ */
+void smc_tx_work(struct work_struct *work)
+{
+       struct smc_connection *conn = container_of(to_delayed_work(work),
+                                                  struct smc_connection,
+                                                  tx_work);
+       struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
 
-out:
+       lock_sock(&smc->sk);
+       smc_tx_pending(conn);
        release_sock(&smc->sk);
 }
 
index 07e6ad7..34b5784 100644 (file)
@@ -27,9 +27,12 @@ static inline int smc_tx_prepared_sends(struct smc_connection *conn)
        return smc_curs_diff(conn->sndbuf_desc->len, &sent, &prep);
 }
 
+void smc_tx_pending(struct smc_connection *conn);
 void smc_tx_work(struct work_struct *work);
 void smc_tx_init(struct smc_sock *smc);
 int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len);
+int smc_tx_sendpage(struct smc_sock *smc, struct page *page, int offset,
+                   size_t size, int flags);
 int smc_tx_sndbuf_nonempty(struct smc_connection *conn);
 void smc_tx_sndbuf_nonfull(struct smc_sock *smc);
 void smc_tx_consumer_update(struct smc_connection *conn, bool force);
index 5f42aa5..8eb7e85 100644 (file)
@@ -72,7 +72,8 @@ struct gss_auth {
        struct gss_api_mech *mech;
        enum rpc_gss_svc service;
        struct rpc_clnt *client;
-       struct net *net;
+       struct net      *net;
+       netns_tracker   ns_tracker;
        /*
         * There are two upcall pipes; dentry[1], named "gssd", is used
         * for the new text-based upcall; dentry[0] is named after the
@@ -1013,7 +1014,8 @@ gss_create_new(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
                        goto err_free;
        }
        gss_auth->client = clnt;
-       gss_auth->net = get_net(rpc_net_ns(clnt));
+       gss_auth->net = get_net_track(rpc_net_ns(clnt), &gss_auth->ns_tracker,
+                                     GFP_KERNEL);
        err = -EINVAL;
        gss_auth->mech = gss_mech_get_by_pseudoflavor(flavor);
        if (!gss_auth->mech)
@@ -1068,7 +1070,7 @@ err_destroy_credcache:
 err_put_mech:
        gss_mech_put(gss_auth->mech);
 err_put_net:
-       put_net(gss_auth->net);
+       put_net_track(gss_auth->net, &gss_auth->ns_tracker);
 err_free:
        kfree(gss_auth->target_name);
        kfree(gss_auth);
@@ -1084,7 +1086,7 @@ gss_free(struct gss_auth *gss_auth)
        gss_pipe_free(gss_auth->gss_pipe[0]);
        gss_pipe_free(gss_auth->gss_pipe[1]);
        gss_mech_put(gss_auth->mech);
-       put_net(gss_auth->net);
+       put_net_track(gss_auth->net, &gss_auth->ns_tracker);
        kfree(gss_auth->target_name);
 
        kfree(gss_auth);
index b21ad79..db878e8 100644 (file)
@@ -162,7 +162,7 @@ static void svc_xprt_free(struct kref *kref)
        if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags))
                svcauth_unix_info_release(xprt);
        put_cred(xprt->xpt_cred);
-       put_net(xprt->xpt_net);
+       put_net_track(xprt->xpt_net, &xprt->ns_tracker);
        /* See comment on corresponding get in xs_setup_bc_tcp(): */
        if (xprt->xpt_bc_xprt)
                xprt_put(xprt->xpt_bc_xprt);
@@ -198,7 +198,7 @@ void svc_xprt_init(struct net *net, struct svc_xprt_class *xcl,
        mutex_init(&xprt->xpt_mutex);
        spin_lock_init(&xprt->xpt_lock);
        set_bit(XPT_BUSY, &xprt->xpt_flags);
-       xprt->xpt_net = get_net(net);
+       xprt->xpt_net = get_net_track(net, &xprt->ns_tracker, GFP_ATOMIC);
        strcpy(xprt->xpt_remotebuf, "uninitialized");
 }
 EXPORT_SYMBOL_GPL(svc_xprt_init);
index a02de2b..5af484d 100644 (file)
@@ -1835,7 +1835,7 @@ EXPORT_SYMBOL_GPL(xprt_alloc);
 
 void xprt_free(struct rpc_xprt *xprt)
 {
-       put_net(xprt->xprt_net);
+       put_net_track(xprt->xprt_net, &xprt->ns_tracker);
        xprt_free_all_slots(xprt);
        xprt_free_id(xprt);
        rpc_sysfs_xprt_destroy(xprt);
@@ -2027,7 +2027,7 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net)
 
        xprt_init_xid(xprt);
 
-       xprt->xprt_net = get_net(net);
+       xprt->xprt_net = get_net_track(net, &xprt->ns_tracker, GFP_KERNEL);
 }
 
 /**
index 64ae4c4..c5eec16 100644 (file)
@@ -226,14 +226,6 @@ static inline void msg_set_bits(struct tipc_msg *m, u32 w,
        m->hdr[w] |= htonl(val);
 }
 
-static inline void msg_swap_words(struct tipc_msg *msg, u32 a, u32 b)
-{
-       u32 temp = msg->hdr[a];
-
-       msg->hdr[a] = msg->hdr[b];
-       msg->hdr[b] = temp;
-}
-
 /*
  * Word 0
  */
@@ -480,11 +472,6 @@ static inline void msg_incr_reroute_cnt(struct tipc_msg *m)
        msg_set_bits(m, 1, 21, 0xf, msg_reroute_cnt(m) + 1);
 }
 
-static inline void msg_reset_reroute_cnt(struct tipc_msg *m)
-{
-       msg_set_bits(m, 1, 21, 0xf, 0);
-}
-
 static inline u32 msg_lookup_scope(struct tipc_msg *m)
 {
        return msg_bits(m, 1, 19, 0x3);
@@ -800,11 +787,6 @@ static inline void msg_set_dest_domain(struct tipc_msg *m, u32 n)
        msg_set_word(m, 2, n);
 }
 
-static inline u32 msg_bcgap_after(struct tipc_msg *m)
-{
-       return msg_bits(m, 2, 16, 0xffff);
-}
-
 static inline void msg_set_bcgap_after(struct tipc_msg *m, u32 n)
 {
        msg_set_bits(m, 2, 16, 0xffff, n);
@@ -868,11 +850,6 @@ static inline void msg_set_next_sent(struct tipc_msg *m, u16 n)
        msg_set_bits(m, 4, 0, 0xffff, n);
 }
 
-static inline void msg_set_long_msgno(struct tipc_msg *m, u32 n)
-{
-       msg_set_bits(m, 4, 0, 0xffff, n);
-}
-
 static inline u32 msg_bc_netid(struct tipc_msg *m)
 {
        return msg_word(m, 4);
index c195698..3e0d628 100644 (file)
@@ -3240,49 +3240,58 @@ static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
        return sk;
 }
 
-static struct sock *unix_next_socket(struct seq_file *seq,
-                                    struct sock *sk,
-                                    loff_t *pos)
+static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos)
 {
        unsigned long bucket = get_bucket(*pos);
+       struct sock *sk;
 
-       while (sk > (struct sock *)SEQ_START_TOKEN) {
-               sk = sk_next(sk);
-               if (!sk)
-                       goto next_bucket;
-               if (sock_net(sk) == seq_file_net(seq))
-                       return sk;
-       }
-
-       do {
+       while (bucket < ARRAY_SIZE(unix_socket_table)) {
                spin_lock(&unix_table_locks[bucket]);
+
                sk = unix_from_bucket(seq, pos);
                if (sk)
                        return sk;
 
-next_bucket:
-               spin_unlock(&unix_table_locks[bucket++]);
-               *pos = set_bucket_offset(bucket, 1);
-       } while (bucket < ARRAY_SIZE(unix_socket_table));
+               spin_unlock(&unix_table_locks[bucket]);
+
+               *pos = set_bucket_offset(++bucket, 1);
+       }
 
        return NULL;
 }
 
+static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk,
+                                 loff_t *pos)
+{
+       unsigned long bucket = get_bucket(*pos);
+
+       for (sk = sk_next(sk); sk; sk = sk_next(sk))
+               if (sock_net(sk) == seq_file_net(seq))
+                       return sk;
+
+       spin_unlock(&unix_table_locks[bucket]);
+
+       *pos = set_bucket_offset(++bucket, 1);
+
+       return unix_get_first(seq, pos);
+}
+
 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
 {
        if (!*pos)
                return SEQ_START_TOKEN;
 
-       if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
-               return NULL;
-
-       return unix_next_socket(seq, NULL, pos);
+       return unix_get_first(seq, pos);
 }
 
 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
        ++*pos;
-       return unix_next_socket(seq, v, pos);
+
+       if (v == SEQ_START_TOKEN)
+               return unix_get_first(seq, pos);
+
+       return unix_get_next(seq, v, pos);
 }
 
 static void unix_seq_stop(struct seq_file *seq, void *v)
@@ -3347,6 +3356,15 @@ static const struct seq_operations unix_seq_ops = {
 };
 
 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL)
+struct bpf_unix_iter_state {
+       struct seq_net_private p;
+       unsigned int cur_sk;
+       unsigned int end_sk;
+       unsigned int max_sk;
+       struct sock **batch;
+       bool st_bucket_done;
+};
+
 struct bpf_iter__unix {
        __bpf_md_ptr(struct bpf_iter_meta *, meta);
        __bpf_md_ptr(struct unix_sock *, unix_sk);
@@ -3365,24 +3383,156 @@ static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
        return bpf_iter_run_prog(prog, &ctx);
 }
 
+static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk)
+
+{
+       struct bpf_unix_iter_state *iter = seq->private;
+       unsigned int expected = 1;
+       struct sock *sk;
+
+       sock_hold(start_sk);
+       iter->batch[iter->end_sk++] = start_sk;
+
+       for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) {
+               if (sock_net(sk) != seq_file_net(seq))
+                       continue;
+
+               if (iter->end_sk < iter->max_sk) {
+                       sock_hold(sk);
+                       iter->batch[iter->end_sk++] = sk;
+               }
+
+               expected++;
+       }
+
+       spin_unlock(&unix_table_locks[start_sk->sk_hash]);
+
+       return expected;
+}
+
+static void bpf_iter_unix_put_batch(struct bpf_unix_iter_state *iter)
+{
+       while (iter->cur_sk < iter->end_sk)
+               sock_put(iter->batch[iter->cur_sk++]);
+}
+
+static int bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state *iter,
+                                      unsigned int new_batch_sz)
+{
+       struct sock **new_batch;
+
+       new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz,
+                            GFP_USER | __GFP_NOWARN);
+       if (!new_batch)
+               return -ENOMEM;
+
+       bpf_iter_unix_put_batch(iter);
+       kvfree(iter->batch);
+       iter->batch = new_batch;
+       iter->max_sk = new_batch_sz;
+
+       return 0;
+}
+
+static struct sock *bpf_iter_unix_batch(struct seq_file *seq,
+                                       loff_t *pos)
+{
+       struct bpf_unix_iter_state *iter = seq->private;
+       unsigned int expected;
+       bool resized = false;
+       struct sock *sk;
+
+       if (iter->st_bucket_done)
+               *pos = set_bucket_offset(get_bucket(*pos) + 1, 1);
+
+again:
+       /* Get a new batch */
+       iter->cur_sk = 0;
+       iter->end_sk = 0;
+
+       sk = unix_get_first(seq, pos);
+       if (!sk)
+               return NULL; /* Done */
+
+       expected = bpf_iter_unix_hold_batch(seq, sk);
+
+       if (iter->end_sk == expected) {
+               iter->st_bucket_done = true;
+               return sk;
+       }
+
+       if (!resized && !bpf_iter_unix_realloc_batch(iter, expected * 3 / 2)) {
+               resized = true;
+               goto again;
+       }
+
+       return sk;
+}
+
+static void *bpf_iter_unix_seq_start(struct seq_file *seq, loff_t *pos)
+{
+       if (!*pos)
+               return SEQ_START_TOKEN;
+
+       /* bpf iter does not support lseek, so it always
+        * continue from where it was stop()-ped.
+        */
+       return bpf_iter_unix_batch(seq, pos);
+}
+
+static void *bpf_iter_unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+       struct bpf_unix_iter_state *iter = seq->private;
+       struct sock *sk;
+
+       /* Whenever seq_next() is called, the iter->cur_sk is
+        * done with seq_show(), so advance to the next sk in
+        * the batch.
+        */
+       if (iter->cur_sk < iter->end_sk)
+               sock_put(iter->batch[iter->cur_sk++]);
+
+       ++*pos;
+
+       if (iter->cur_sk < iter->end_sk)
+               sk = iter->batch[iter->cur_sk];
+       else
+               sk = bpf_iter_unix_batch(seq, pos);
+
+       return sk;
+}
+
 static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v)
 {
        struct bpf_iter_meta meta;
        struct bpf_prog *prog;
        struct sock *sk = v;
        uid_t uid;
+       bool slow;
+       int ret;
 
        if (v == SEQ_START_TOKEN)
                return 0;
 
+       slow = lock_sock_fast(sk);
+
+       if (unlikely(sk_unhashed(sk))) {
+               ret = SEQ_SKIP;
+               goto unlock;
+       }
+
        uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
        meta.seq = seq;
        prog = bpf_iter_get_info(&meta, false);
-       return unix_prog_seq_show(prog, &meta, v, uid);
+       ret = unix_prog_seq_show(prog, &meta, v, uid);
+unlock:
+       unlock_sock_fast(sk, slow);
+       return ret;
 }
 
 static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v)
 {
+       struct bpf_unix_iter_state *iter = seq->private;
        struct bpf_iter_meta meta;
        struct bpf_prog *prog;
 
@@ -3393,12 +3543,13 @@ static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v)
                        (void)unix_prog_seq_show(prog, &meta, v, 0);
        }
 
-       unix_seq_stop(seq, v);
+       if (iter->cur_sk < iter->end_sk)
+               bpf_iter_unix_put_batch(iter);
 }
 
 static const struct seq_operations bpf_iter_unix_seq_ops = {
-       .start  = unix_seq_start,
-       .next   = unix_seq_next,
+       .start  = bpf_iter_unix_seq_start,
+       .next   = bpf_iter_unix_seq_next,
        .stop   = bpf_iter_unix_seq_stop,
        .show   = bpf_iter_unix_seq_show,
 };
@@ -3447,13 +3598,55 @@ static struct pernet_operations unix_net_ops = {
 DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta,
                     struct unix_sock *unix_sk, uid_t uid)
 
+#define INIT_BATCH_SZ 16
+
+static int bpf_iter_init_unix(void *priv_data, struct bpf_iter_aux_info *aux)
+{
+       struct bpf_unix_iter_state *iter = priv_data;
+       int err;
+
+       err = bpf_iter_init_seq_net(priv_data, aux);
+       if (err)
+               return err;
+
+       err = bpf_iter_unix_realloc_batch(iter, INIT_BATCH_SZ);
+       if (err) {
+               bpf_iter_fini_seq_net(priv_data);
+               return err;
+       }
+
+       return 0;
+}
+
+static void bpf_iter_fini_unix(void *priv_data)
+{
+       struct bpf_unix_iter_state *iter = priv_data;
+
+       bpf_iter_fini_seq_net(priv_data);
+       kvfree(iter->batch);
+}
+
 static const struct bpf_iter_seq_info unix_seq_info = {
        .seq_ops                = &bpf_iter_unix_seq_ops,
-       .init_seq_private       = bpf_iter_init_seq_net,
-       .fini_seq_private       = bpf_iter_fini_seq_net,
-       .seq_priv_size          = sizeof(struct seq_net_private),
+       .init_seq_private       = bpf_iter_init_unix,
+       .fini_seq_private       = bpf_iter_fini_unix,
+       .seq_priv_size          = sizeof(struct bpf_unix_iter_state),
 };
 
+static const struct bpf_func_proto *
+bpf_iter_unix_get_func_proto(enum bpf_func_id func_id,
+                            const struct bpf_prog *prog)
+{
+       switch (func_id) {
+       case BPF_FUNC_setsockopt:
+               return &bpf_sk_setsockopt_proto;
+       case BPF_FUNC_getsockopt:
+               return &bpf_sk_getsockopt_proto;
+       default:
+               return NULL;
+       }
+}
+
 static struct bpf_iter_reg unix_reg_info = {
        .target                 = "unix",
        .ctx_arg_info_size      = 1,
@@ -3461,6 +3654,7 @@ static struct bpf_iter_reg unix_reg_info = {
                { offsetof(struct bpf_iter__unix, unix_sk),
                  PTR_TO_BTF_ID_OR_NULL },
        },
+       .get_func_proto         = bpf_iter_unix_get_func_proto,
        .seq_info               = &unix_seq_info,
 };
 
index 8675fa5..3ec8ad9 100644 (file)
@@ -26,12 +26,12 @@ static void int_exit(int sig)
 {
        __u32 curr_prog_id = 0;
 
-       if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) {
-               printf("bpf_get_link_xdp_id failed\n");
+       if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) {
+               printf("bpf_xdp_query_id failed\n");
                exit(1);
        }
        if (prog_id == curr_prog_id)
-               bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+               bpf_xdp_detach(ifindex, xdp_flags, NULL);
        else if (!curr_prog_id)
                printf("couldn't find a prog id on a given interface\n");
        else
@@ -143,7 +143,7 @@ int main(int argc, char **argv)
        signal(SIGINT, int_exit);
        signal(SIGTERM, int_exit);
 
-       if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
+       if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) {
                printf("link set xdp fd failed\n");
                return 1;
        }
index a70b094..6c61d5f 100644 (file)
@@ -34,12 +34,12 @@ static void int_exit(int sig)
        __u32 curr_prog_id = 0;
 
        if (ifindex > -1) {
-               if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) {
-                       printf("bpf_get_link_xdp_id failed\n");
+               if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) {
+                       printf("bpf_xdp_query_id failed\n");
                        exit(1);
                }
                if (prog_id == curr_prog_id)
-                       bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+                       bpf_xdp_detach(ifindex, xdp_flags, NULL);
                else if (!curr_prog_id)
                        printf("couldn't find a prog id on a given iface\n");
                else
@@ -173,7 +173,7 @@ int main(int argc, char **argv)
        signal(SIGINT, int_exit);
        signal(SIGTERM, int_exit);
 
-       if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
+       if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) {
                printf("link set xdp fd failed\n");
                return 1;
        }
index 4ad8967..79ccd98 100644 (file)
@@ -33,7 +33,7 @@ static int do_attach(int idx, int prog_fd, int map_fd, const char *name)
 {
        int err;
 
-       err = bpf_set_link_xdp_fd(idx, prog_fd, xdp_flags);
+       err = bpf_xdp_attach(idx, prog_fd, xdp_flags, NULL);
        if (err < 0) {
                printf("ERROR: failed to attach program to %s\n", name);
                return err;
@@ -51,7 +51,7 @@ static int do_detach(int idx, const char *name)
 {
        int err;
 
-       err = bpf_set_link_xdp_fd(idx, -1, xdp_flags);
+       err = bpf_xdp_detach(idx, xdp_flags, NULL);
        if (err < 0)
                printf("ERROR: failed to detach program from %s\n", name);
 
index cfaf7e5..2d565ba 100644 (file)
@@ -43,13 +43,13 @@ static void int_exit(int sig)
        int i = 0;
 
        for (i = 0; i < total_ifindex; i++) {
-               if (bpf_get_link_xdp_id(ifindex_list[i], &prog_id, flags)) {
-                       printf("bpf_get_link_xdp_id on iface %d failed\n",
+               if (bpf_xdp_query_id(ifindex_list[i], flags, &prog_id)) {
+                       printf("bpf_xdp_query_id on iface %d failed\n",
                               ifindex_list[i]);
                        exit(1);
                }
                if (prog_id_list[i] == prog_id)
-                       bpf_set_link_xdp_fd(ifindex_list[i], -1, flags);
+                       bpf_xdp_detach(ifindex_list[i], flags, NULL);
                else if (!prog_id)
                        printf("couldn't find a prog id on iface %d\n",
                               ifindex_list[i]);
@@ -716,12 +716,12 @@ int main(int ac, char **argv)
        }
        prog_id_list = (__u32 *)calloc(total_ifindex, sizeof(__u32 *));
        for (i = 0; i < total_ifindex; i++) {
-               if (bpf_set_link_xdp_fd(ifindex_list[i], prog_fd, flags) < 0) {
+               if (bpf_xdp_attach(ifindex_list[i], prog_fd, flags, NULL) < 0) {
                        printf("link set xdp fd failed\n");
                        int recovery_index = i;
 
                        for (i = 0; i < recovery_index; i++)
-                               bpf_set_link_xdp_fd(ifindex_list[i], -1, flags);
+                               bpf_xdp_detach(ifindex_list[i], flags, NULL);
 
                        return 1;
                }
index 74a2926..fb2532d 100644 (file)
@@ -62,15 +62,15 @@ static void int_exit(int sig)
        __u32 curr_prog_id = 0;
 
        if (ifindex > -1) {
-               if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) {
-                       printf("bpf_get_link_xdp_id failed\n");
+               if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) {
+                       printf("bpf_xdp_query_id failed\n");
                        exit(EXIT_FAIL);
                }
                if (prog_id == curr_prog_id) {
                        fprintf(stderr,
                                "Interrupted: Removing XDP program on ifindex:%d device:%s\n",
                                ifindex, ifname);
-                       bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+                       bpf_xdp_detach(ifindex, xdp_flags, NULL);
                } else if (!curr_prog_id) {
                        printf("couldn't find a prog id on a given iface\n");
                } else {
@@ -209,7 +209,7 @@ static struct datarec *alloc_record_per_cpu(void)
 
 static struct record *alloc_record_per_rxq(void)
 {
-       unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
+       unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map);
        struct record *array;
 
        array = calloc(nr_rxqs, sizeof(struct record));
@@ -222,7 +222,7 @@ static struct record *alloc_record_per_rxq(void)
 
 static struct stats_record *alloc_stats_record(void)
 {
-       unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
+       unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map);
        struct stats_record *rec;
        int i;
 
@@ -241,7 +241,7 @@ static struct stats_record *alloc_stats_record(void)
 
 static void free_stats_record(struct stats_record *r)
 {
-       unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
+       unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map);
        int i;
 
        for (i = 0; i < nr_rxqs; i++)
@@ -289,7 +289,7 @@ static void stats_collect(struct stats_record *rec)
        map_collect_percpu(fd, 0, &rec->stats);
 
        fd = bpf_map__fd(rx_queue_index_map);
-       max_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
+       max_rxqs = bpf_map__max_entries(rx_queue_index_map);
        for (i = 0; i < max_rxqs; i++)
                map_collect_percpu(fd, i, &rec->rxq[i]);
 }
@@ -335,7 +335,7 @@ static void stats_print(struct stats_record *stats_rec,
                        struct stats_record *stats_prev,
                        int action, __u32 cfg_opt)
 {
-       unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
+       unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map);
        unsigned int nr_cpus = bpf_num_possible_cpus();
        double pps = 0, err = 0;
        struct record *rec, *prev;
@@ -582,7 +582,7 @@ int main(int argc, char **argv)
        signal(SIGINT, int_exit);
        signal(SIGTERM, int_exit);
 
-       if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
+       if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) {
                fprintf(stderr, "link set xdp fd failed\n");
                return EXIT_FAIL_XDP;
        }
index 587eacb..0a2b3e9 100644 (file)
@@ -30,7 +30,7 @@ static int do_attach(int idx, int fd, const char *name)
        __u32 info_len = sizeof(info);
        int err;
 
-       err = bpf_set_link_xdp_fd(idx, fd, xdp_flags);
+       err = bpf_xdp_attach(idx, fd, xdp_flags, NULL);
        if (err < 0) {
                printf("ERROR: failed to attach program to %s\n", name);
                return err;
@@ -51,13 +51,13 @@ static int do_detach(int idx, const char *name)
        __u32 curr_prog_id = 0;
        int err = 0;
 
-       err = bpf_get_link_xdp_id(idx, &curr_prog_id, xdp_flags);
+       err = bpf_xdp_query_id(idx, xdp_flags, &curr_prog_id);
        if (err) {
-               printf("bpf_get_link_xdp_id failed\n");
+               printf("bpf_xdp_query_id failed\n");
                return err;
        }
        if (prog_id == curr_prog_id) {
-               err = bpf_set_link_xdp_fd(idx, -1, xdp_flags);
+               err = bpf_xdp_detach(idx, xdp_flags, NULL);
                if (err < 0)
                        printf("ERROR: failed to detach prog from %s\n", name);
        } else if (!curr_prog_id) {
index 8740838..ae70a79 100644 (file)
@@ -1265,7 +1265,7 @@ static int __sample_remove_xdp(int ifindex, __u32 prog_id, int xdp_flags)
        int ret;
 
        if (prog_id) {
-               ret = bpf_get_link_xdp_id(ifindex, &cur_prog_id, xdp_flags);
+               ret = bpf_xdp_query_id(ifindex, xdp_flags, &cur_prog_id);
                if (ret < 0)
                        return -errno;
 
@@ -1278,7 +1278,7 @@ static int __sample_remove_xdp(int ifindex, __u32 prog_id, int xdp_flags)
                }
        }
 
-       return bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+       return bpf_xdp_detach(ifindex, xdp_flags, NULL);
 }
 
 int sample_install_xdp(struct bpf_program *xdp_prog, int ifindex, bool generic,
@@ -1295,8 +1295,7 @@ int sample_install_xdp(struct bpf_program *xdp_prog, int ifindex, bool generic,
 
        xdp_flags |= !force ? XDP_FLAGS_UPDATE_IF_NOEXIST : 0;
        xdp_flags |= generic ? XDP_FLAGS_SKB_MODE : XDP_FLAGS_DRV_MODE;
-       ret = bpf_set_link_xdp_fd(ifindex, bpf_program__fd(xdp_prog),
-                                 xdp_flags);
+       ret = bpf_xdp_attach(ifindex, bpf_program__fd(xdp_prog), xdp_flags, NULL);
        if (ret < 0) {
                ret = -errno;
                fprintf(stderr,
@@ -1308,7 +1307,7 @@ int sample_install_xdp(struct bpf_program *xdp_prog, int ifindex, bool generic,
                return ret;
        }
 
-       ret = bpf_get_link_xdp_id(ifindex, &prog_id, xdp_flags);
+       ret = bpf_xdp_query_id(ifindex, xdp_flags, &prog_id);
        if (ret < 0) {
                ret = -errno;
                fprintf(stderr,
index 1d4f305..7370c03 100644 (file)
@@ -32,12 +32,12 @@ static void int_exit(int sig)
        __u32 curr_prog_id = 0;
 
        if (ifindex > -1) {
-               if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) {
-                       printf("bpf_get_link_xdp_id failed\n");
+               if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) {
+                       printf("bpf_xdp_query_id failed\n");
                        exit(1);
                }
                if (prog_id == curr_prog_id)
-                       bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+                       bpf_xdp_detach(ifindex, xdp_flags, NULL);
                else if (!curr_prog_id)
                        printf("couldn't find a prog id on a given iface\n");
                else
@@ -288,7 +288,7 @@ int main(int argc, char **argv)
                }
        }
 
-       if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
+       if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) {
                printf("link set xdp fd failed\n");
                return 1;
        }
@@ -302,7 +302,7 @@ int main(int argc, char **argv)
 
        poll_stats(kill_after_s);
 
-       bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+       bpf_xdp_detach(ifindex, xdp_flags, NULL);
 
        return 0;
 }
index cc44087..28b5f2a 100644 (file)
@@ -173,7 +173,7 @@ main(int argc, char **argv)
        unlink(SOCKET_NAME);
 
        /* Unset fd for given ifindex */
-       err = bpf_set_link_xdp_fd(ifindex, -1, 0);
+       err = bpf_xdp_detach(ifindex, 0, NULL);
        if (err) {
                fprintf(stderr, "Error when unsetting bpf prog_fd for ifindex(%d)\n", ifindex);
                return err;
index aa50864..19288a2 100644 (file)
@@ -571,13 +571,13 @@ static void remove_xdp_program(void)
 {
        u32 curr_prog_id = 0;
 
-       if (bpf_get_link_xdp_id(opt_ifindex, &curr_prog_id, opt_xdp_flags)) {
-               printf("bpf_get_link_xdp_id failed\n");
+       if (bpf_xdp_query_id(opt_ifindex, opt_xdp_flags, &curr_prog_id)) {
+               printf("bpf_xdp_query_id failed\n");
                exit(EXIT_FAILURE);
        }
 
        if (prog_id == curr_prog_id)
-               bpf_set_link_xdp_fd(opt_ifindex, -1, opt_xdp_flags);
+               bpf_xdp_detach(opt_ifindex, opt_xdp_flags, NULL);
        else if (!curr_prog_id)
                printf("couldn't find a prog id on a given interface\n");
        else
@@ -1027,7 +1027,7 @@ static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem,
        if (ret)
                exit_with_error(-ret);
 
-       ret = bpf_get_link_xdp_id(opt_ifindex, &prog_id, opt_xdp_flags);
+       ret = bpf_xdp_query_id(opt_ifindex, opt_xdp_flags, &prog_id);
        if (ret)
                exit_with_error(-ret);
 
@@ -1760,7 +1760,7 @@ static void load_xdp_program(char **argv, struct bpf_object **obj)
                exit(EXIT_FAILURE);
        }
 
-       if (bpf_set_link_xdp_fd(opt_ifindex, prog_fd, opt_xdp_flags) < 0) {
+       if (bpf_xdp_attach(opt_ifindex, prog_fd, opt_xdp_flags, NULL) < 0) {
                fprintf(stderr, "ERROR: link set xdp fd failed\n");
                exit(EXIT_FAILURE);
        }
index 52e7c4f..2220509 100644 (file)
@@ -974,8 +974,8 @@ static void remove_xdp_program(void)
        int i;
 
        for (i = 0 ; i < n_ports; i++)
-               bpf_set_link_xdp_fd(if_nametoindex(port_params[i].iface), -1,
-                                   port_params[i].xsk_cfg.xdp_flags);
+               bpf_xdp_detach(if_nametoindex(port_params[i].iface),
+                              port_params[i].xsk_cfg.xdp_flags, NULL);
 }
 
 int main(int argc, char **argv)
index a6403dd..0966252 100755 (executable)
@@ -87,21 +87,25 @@ class HeaderParser(object):
         self.line = ''
         self.helpers = []
         self.commands = []
+        self.desc_unique_helpers = set()
+        self.define_unique_helpers = []
+        self.desc_syscalls = []
+        self.enum_syscalls = []
 
     def parse_element(self):
         proto    = self.parse_symbol()
-        desc     = self.parse_desc()
-        ret      = self.parse_ret()
+        desc     = self.parse_desc(proto)
+        ret      = self.parse_ret(proto)
         return APIElement(proto=proto, desc=desc, ret=ret)
 
     def parse_helper(self):
         proto    = self.parse_proto()
-        desc     = self.parse_desc()
-        ret      = self.parse_ret()
+        desc     = self.parse_desc(proto)
+        ret      = self.parse_ret(proto)
         return Helper(proto=proto, desc=desc, ret=ret)
 
     def parse_symbol(self):
-        p = re.compile(' \* ?(.+)$')
+        p = re.compile(' \* ?(BPF\w+)$')
         capture = p.match(self.line)
         if not capture:
             raise NoSyscallCommandFound
@@ -127,16 +131,15 @@ class HeaderParser(object):
         self.line = self.reader.readline()
         return capture.group(1)
 
-    def parse_desc(self):
+    def parse_desc(self, proto):
         p = re.compile(' \* ?(?:\t| {5,8})Description$')
         capture = p.match(self.line)
         if not capture:
-            # Helper can have empty description and we might be parsing another
-            # attribute: return but do not consume.
-            return ''
+            raise Exception("No description section found for " + proto)
         # Description can be several lines, some of them possibly empty, and it
         # stops when another subsection title is met.
         desc = ''
+        desc_present = False
         while True:
             self.line = self.reader.readline()
             if self.line == ' *\n':
@@ -145,21 +148,24 @@ class HeaderParser(object):
                 p = re.compile(' \* ?(?:\t| {5,8})(?:\t| {8})(.*)')
                 capture = p.match(self.line)
                 if capture:
+                    desc_present = True
                     desc += capture.group(1) + '\n'
                 else:
                     break
+
+        if not desc_present:
+            raise Exception("No description found for " + proto)
         return desc
 
-    def parse_ret(self):
+    def parse_ret(self, proto):
         p = re.compile(' \* ?(?:\t| {5,8})Return$')
         capture = p.match(self.line)
         if not capture:
-            # Helper can have empty retval and we might be parsing another
-            # attribute: return but do not consume.
-            return ''
+            raise Exception("No return section found for " + proto)
         # Return value description can be several lines, some of them possibly
         # empty, and it stops when another subsection title is met.
         ret = ''
+        ret_present = False
         while True:
             self.line = self.reader.readline()
             if self.line == ' *\n':
@@ -168,44 +174,101 @@ class HeaderParser(object):
                 p = re.compile(' \* ?(?:\t| {5,8})(?:\t| {8})(.*)')
                 capture = p.match(self.line)
                 if capture:
+                    ret_present = True
                     ret += capture.group(1) + '\n'
                 else:
                     break
+
+        if not ret_present:
+            raise Exception("No return found for " + proto)
         return ret
 
-    def seek_to(self, target, help_message):
+    def seek_to(self, target, help_message, discard_lines = 1):
         self.reader.seek(0)
         offset = self.reader.read().find(target)
         if offset == -1:
             raise Exception(help_message)
         self.reader.seek(offset)
         self.reader.readline()
-        self.reader.readline()
+        for _ in range(discard_lines):
+            self.reader.readline()
         self.line = self.reader.readline()
 
-    def parse_syscall(self):
+    def parse_desc_syscall(self):
         self.seek_to('* DOC: eBPF Syscall Commands',
                      'Could not find start of eBPF syscall descriptions list')
         while True:
             try:
                 command = self.parse_element()
                 self.commands.append(command)
+                self.desc_syscalls.append(command.proto)
+
             except NoSyscallCommandFound:
                 break
 
-    def parse_helpers(self):
+    def parse_enum_syscall(self):
+        self.seek_to('enum bpf_cmd {',
+                     'Could not find start of bpf_cmd enum', 0)
+        # Searches for either one or more BPF\w+ enums
+        bpf_p = re.compile('\s*(BPF\w+)+')
+        # Searches for an enum entry assigned to another entry,
+        # for e.g. BPF_PROG_RUN = BPF_PROG_TEST_RUN, which is
+        # not documented hence should be skipped in check to
+        # determine if the right number of syscalls are documented
+        assign_p = re.compile('\s*(BPF\w+)\s*=\s*(BPF\w+)')
+        bpf_cmd_str = ''
+        while True:
+            capture = assign_p.match(self.line)
+            if capture:
+                # Skip line if an enum entry is assigned to another entry
+                self.line = self.reader.readline()
+                continue
+            capture = bpf_p.match(self.line)
+            if capture:
+                bpf_cmd_str += self.line
+            else:
+                break
+            self.line = self.reader.readline()
+        # Find the number of occurences of BPF\w+
+        self.enum_syscalls = re.findall('(BPF\w+)+', bpf_cmd_str)
+
+    def parse_desc_helpers(self):
         self.seek_to('* Start of BPF helper function descriptions:',
                      'Could not find start of eBPF helper descriptions list')
         while True:
             try:
                 helper = self.parse_helper()
                 self.helpers.append(helper)
+                proto = helper.proto_break_down()
+                self.desc_unique_helpers.add(proto['name'])
             except NoHelperFound:
                 break
 
+    def parse_define_helpers(self):
+        # Parse the number of FN(...) in #define __BPF_FUNC_MAPPER to compare
+        # later with the number of unique function names present in description.
+        # Note: seek_to(..) discards the first line below the target search text,
+        # resulting in FN(unspec) being skipped and not added to self.define_unique_helpers.
+        self.seek_to('#define __BPF_FUNC_MAPPER(FN)',
+                     'Could not find start of eBPF helper definition list')
+        # Searches for either one or more FN(\w+) defines or a backslash for newline
+        p = re.compile('\s*(FN\(\w+\))+|\\\\')
+        fn_defines_str = ''
+        while True:
+            capture = p.match(self.line)
+            if capture:
+                fn_defines_str += self.line
+            else:
+                break
+            self.line = self.reader.readline()
+        # Find the number of occurences of FN(\w+)
+        self.define_unique_helpers = re.findall('FN\(\w+\)', fn_defines_str)
+
     def run(self):
-        self.parse_syscall()
-        self.parse_helpers()
+        self.parse_desc_syscall()
+        self.parse_enum_syscall()
+        self.parse_desc_helpers()
+        self.parse_define_helpers()
         self.reader.close()
 
 ###############################################################################
@@ -235,6 +298,25 @@ class Printer(object):
             self.print_one(elem)
         self.print_footer()
 
+    def elem_number_check(self, desc_unique_elem, define_unique_elem, type, instance):
+        """
+        Checks the number of helpers/syscalls documented within the header file
+        description with those defined as part of enum/macro and raise an
+        Exception if they don't match.
+        """
+        nr_desc_unique_elem = len(desc_unique_elem)
+        nr_define_unique_elem = len(define_unique_elem)
+        if nr_desc_unique_elem != nr_define_unique_elem:
+            exception_msg = '''
+The number of unique %s in description (%d) doesn\'t match the number of unique %s defined in %s (%d)
+''' % (type, nr_desc_unique_elem, type, instance, nr_define_unique_elem)
+            if nr_desc_unique_elem < nr_define_unique_elem:
+                # Function description is parsed until no helper is found (which can be due to
+                # misformatting). Hence, only print the first missing/misformatted helper/enum.
+                exception_msg += '''
+The description for %s is not present or formatted correctly.
+''' % (define_unique_elem[nr_desc_unique_elem])
+            raise Exception(exception_msg)
 
 class PrinterRST(Printer):
     """
@@ -295,7 +377,6 @@ class PrinterRST(Printer):
 
         print('')
 
-
 class PrinterHelpersRST(PrinterRST):
     """
     A printer for dumping collected information about helpers as a ReStructured
@@ -305,6 +386,7 @@ class PrinterHelpersRST(PrinterRST):
     """
     def __init__(self, parser):
         self.elements = parser.helpers
+        self.elem_number_check(parser.desc_unique_helpers, parser.define_unique_helpers, 'helper', '__BPF_FUNC_MAPPER')
 
     def print_header(self):
         header = '''\
@@ -478,6 +560,7 @@ class PrinterSyscallRST(PrinterRST):
     """
     def __init__(self, parser):
         self.elements = parser.commands
+        self.elem_number_check(parser.desc_syscalls, parser.enum_syscalls, 'syscall', 'bpf_cmd')
 
     def print_header(self):
         header = '''\
@@ -509,6 +592,7 @@ class PrinterHelpers(Printer):
     """
     def __init__(self, parser):
         self.elements = parser.helpers
+        self.elem_number_check(parser.desc_unique_helpers, parser.define_unique_helpers, 'helper', '__BPF_FUNC_MAPPER')
 
     type_fwds = [
             'struct bpf_fib_lookup',
index 842889f..a9f8c63 100644 (file)
@@ -838,7 +838,7 @@ int devcgroup_check_permission(short type, u32 major, u32 minor, short access)
        int rc = BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type, major, minor, access);
 
        if (rc)
-               return -EPERM;
+               return rc;
 
        #ifdef CONFIG_CGROUP_DEVICE
        return devcgroup_legacy_check_permission(type, major, minor, access);
index 5983312..a2c665b 100644 (file)
@@ -902,7 +902,7 @@ static int do_show(int argc, char **argv)
                                      equal_fn_for_key_as_id, NULL);
        btf_map_table = hashmap__new(hash_fn_for_key_as_id,
                                     equal_fn_for_key_as_id, NULL);
-       if (!btf_prog_table || !btf_map_table) {
+       if (IS_ERR(btf_prog_table) || IS_ERR(btf_map_table)) {
                hashmap__free(btf_prog_table);
                hashmap__free(btf_map_table);
                if (fd >= 0)
index 3571a28..effe136 100644 (file)
@@ -50,6 +50,7 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type,
                         const char *attach_flags_str,
                         int level)
 {
+       char prog_name[MAX_PROG_FULL_NAME];
        struct bpf_prog_info info = {};
        __u32 info_len = sizeof(info);
        int prog_fd;
@@ -63,6 +64,7 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type,
                return -1;
        }
 
+       get_prog_full_name(&info, prog_fd, prog_name, sizeof(prog_name));
        if (json_output) {
                jsonw_start_object(json_wtr);
                jsonw_uint_field(json_wtr, "id", info.id);
@@ -73,7 +75,7 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type,
                        jsonw_uint_field(json_wtr, "attach_type", attach_type);
                jsonw_string_field(json_wtr, "attach_flags",
                                   attach_flags_str);
-               jsonw_string_field(json_wtr, "name", info.name);
+               jsonw_string_field(json_wtr, "name", prog_name);
                jsonw_end_object(json_wtr);
        } else {
                printf("%s%-8u ", level ? "    " : "", info.id);
@@ -81,7 +83,7 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type,
                        printf("%-15s", attach_type_name[attach_type]);
                else
                        printf("type %-10u", attach_type);
-               printf(" %-15s %-15s\n", attach_flags_str, info.name);
+               printf(" %-15s %-15s\n", attach_flags_str, prog_name);
        }
 
        close(prog_fd);
index fa8eb81..111dff8 100644 (file)
@@ -24,6 +24,7 @@
 #include <bpf/bpf.h>
 #include <bpf/hashmap.h>
 #include <bpf/libbpf.h> /* libbpf_num_possible_cpus */
+#include <bpf/btf.h>
 
 #include "main.h"
 
@@ -304,6 +305,49 @@ const char *get_fd_type_name(enum bpf_obj_type type)
        return names[type];
 }
 
+void get_prog_full_name(const struct bpf_prog_info *prog_info, int prog_fd,
+                       char *name_buff, size_t buff_len)
+{
+       const char *prog_name = prog_info->name;
+       const struct btf_type *func_type;
+       const struct bpf_func_info finfo;
+       struct bpf_prog_info info = {};
+       __u32 info_len = sizeof(info);
+       struct btf *prog_btf = NULL;
+
+       if (buff_len <= BPF_OBJ_NAME_LEN ||
+           strlen(prog_info->name) < BPF_OBJ_NAME_LEN - 1)
+               goto copy_name;
+
+       if (!prog_info->btf_id || prog_info->nr_func_info == 0)
+               goto copy_name;
+
+       info.nr_func_info = 1;
+       info.func_info_rec_size = prog_info->func_info_rec_size;
+       if (info.func_info_rec_size > sizeof(finfo))
+               info.func_info_rec_size = sizeof(finfo);
+       info.func_info = ptr_to_u64(&finfo);
+
+       if (bpf_obj_get_info_by_fd(prog_fd, &info, &info_len))
+               goto copy_name;
+
+       prog_btf = btf__load_from_kernel_by_id(info.btf_id);
+       if (!prog_btf)
+               goto copy_name;
+
+       func_type = btf__type_by_id(prog_btf, finfo.type_id);
+       if (!func_type || !btf_is_func(func_type))
+               goto copy_name;
+
+       prog_name = btf__name_by_offset(prog_btf, func_type->name_off);
+
+copy_name:
+       snprintf(name_buff, buff_len, "%s", prog_name);
+
+       if (prog_btf)
+               btf__free(prog_btf);
+}
+
 int get_fd_type(int fd)
 {
        char path[PATH_MAX];
index b4695df..43e3f87 100644 (file)
@@ -227,7 +227,7 @@ static int codegen_datasecs(struct bpf_object *obj, const char *obj_name)
                /* only generate definitions for memory-mapped internal maps */
                if (!bpf_map__is_internal(map))
                        continue;
-               if (!(bpf_map__def(map)->map_flags & BPF_F_MMAPABLE))
+               if (!(bpf_map__map_flags(map) & BPF_F_MMAPABLE))
                        continue;
 
                if (!get_map_ident(map, map_ident, sizeof(map_ident)))
@@ -468,7 +468,7 @@ static void codegen_destroy(struct bpf_object *obj, const char *obj_name)
                if (!get_map_ident(map, ident, sizeof(ident)))
                        continue;
                if (bpf_map__is_internal(map) &&
-                   (bpf_map__def(map)->map_flags & BPF_F_MMAPABLE))
+                   (bpf_map__map_flags(map) & BPF_F_MMAPABLE))
                        printf("\tmunmap(skel->%1$s, %2$zd);\n",
                               ident, bpf_map_mmap_sz(map));
                codegen("\
@@ -536,7 +536,7 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h
                        continue;
 
                if (!bpf_map__is_internal(map) ||
-                   !(bpf_map__def(map)->map_flags & BPF_F_MMAPABLE))
+                   !(bpf_map__map_flags(map) & BPF_F_MMAPABLE))
                        continue;
 
                codegen("\
@@ -600,10 +600,10 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h
                        continue;
 
                if (!bpf_map__is_internal(map) ||
-                   !(bpf_map__def(map)->map_flags & BPF_F_MMAPABLE))
+                   !(bpf_map__map_flags(map) & BPF_F_MMAPABLE))
                        continue;
 
-               if (bpf_map__def(map)->map_flags & BPF_F_RDONLY_PROG)
+               if (bpf_map__map_flags(map) & BPF_F_RDONLY_PROG)
                        mmap_flags = "PROT_READ";
                else
                        mmap_flags = "PROT_READ | PROT_WRITE";
@@ -927,7 +927,6 @@ static int do_skeleton(int argc, char **argv)
                        s = (struct bpf_object_skeleton *)calloc(1, sizeof(*s));\n\
                        if (!s)                                             \n\
                                goto err;                                   \n\
-                       obj->skeleton = s;                                  \n\
                                                                            \n\
                        s->sz = sizeof(*s);                                 \n\
                        s->name = \"%1$s\";                                 \n\
@@ -962,7 +961,7 @@ static int do_skeleton(int argc, char **argv)
                                i, bpf_map__name(map), i, ident);
                        /* memory-mapped internal maps */
                        if (bpf_map__is_internal(map) &&
-                           (bpf_map__def(map)->map_flags & BPF_F_MMAPABLE)) {
+                           (bpf_map__map_flags(map) & BPF_F_MMAPABLE)) {
                                printf("\ts->maps[%zu].mmaped = (void **)&obj->%s;\n",
                                       i, ident);
                        }
@@ -1000,6 +999,7 @@ static int do_skeleton(int argc, char **argv)
                                                                            \n\
                        s->data = (void *)%2$s__elf_bytes(&s->data_sz);     \n\
                                                                            \n\
+                       obj->skeleton = s;                                  \n\
                        return 0;                                           \n\
                err:                                                        \n\
                        bpf_object__destroy_skeleton(s);                    \n\
index 2c258db..97dec81 100644 (file)
@@ -2,6 +2,7 @@
 /* Copyright (C) 2020 Facebook */
 
 #include <errno.h>
+#include <linux/err.h>
 #include <net/if.h>
 #include <stdio.h>
 #include <unistd.h>
@@ -306,7 +307,7 @@ static int do_show(int argc, char **argv)
        if (show_pinned) {
                link_table = hashmap__new(hash_fn_for_key_as_id,
                                          equal_fn_for_key_as_id, NULL);
-               if (!link_table) {
+               if (IS_ERR(link_table)) {
                        p_err("failed to create hashmap for pinned paths");
                        return -1;
                }
index 020e91a..9d01fa9 100644 (file)
@@ -478,7 +478,14 @@ int main(int argc, char **argv)
        }
 
        if (!legacy_libbpf) {
-               ret = libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+               enum libbpf_strict_mode mode;
+
+               /* Allow legacy map definitions for skeleton generation.
+                * It will still be rejected if users use LIBBPF_STRICT_ALL
+                * mode for loading generated skeleton.
+                */
+               mode = (__LIBBPF_STRICT_LAST - 1) & ~LIBBPF_STRICT_MAP_DEFINITIONS;
+               ret = libbpf_set_strict_mode(mode);
                if (ret)
                        p_err("failed to enable libbpf strict mode: %d", ret);
        }
index 8d76d93..0c38405 100644 (file)
@@ -140,6 +140,10 @@ struct cmd {
 int cmd_select(const struct cmd *cmds, int argc, char **argv,
               int (*help)(int argc, char **argv));
 
+#define MAX_PROG_FULL_NAME 128
+void get_prog_full_name(const struct bpf_prog_info *prog_info, int prog_fd,
+                       char *name_buff, size_t buff_len);
+
 int get_fd_type(int fd);
 const char *get_fd_type_name(enum bpf_obj_type type);
 char *get_fdinfo(int fd, const char *key);
index cc530a2..c66a3c9 100644 (file)
@@ -699,7 +699,7 @@ static int do_show(int argc, char **argv)
        if (show_pinned) {
                map_table = hashmap__new(hash_fn_for_key_as_id,
                                         equal_fn_for_key_as_id, NULL);
-               if (!map_table) {
+               if (IS_ERR(map_table)) {
                        p_err("failed to create hashmap for pinned paths");
                        return -1;
                }
index 6490537..526a332 100644 (file)
@@ -551,7 +551,7 @@ static int do_attach_detach_xdp(int progfd, enum net_attach_type attach_type,
        if (attach_type == NET_ATTACH_TYPE_XDP_OFFLOAD)
                flags |= XDP_FLAGS_HW_MODE;
 
-       return bpf_set_link_xdp_fd(ifindex, progfd, flags);
+       return bpf_xdp_attach(ifindex, progfd, flags, NULL);
 }
 
 static int do_attach(int argc, char **argv)
index 56b598e..7c384d1 100644 (file)
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
 /* Copyright (C) 2020 Facebook */
 #include <errno.h>
+#include <linux/err.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -101,7 +102,7 @@ int build_obj_refs_table(struct hashmap **map, enum bpf_obj_type type)
        libbpf_print_fn_t default_print;
 
        *map = hashmap__new(hash_fn_for_key_as_id, equal_fn_for_key_as_id, NULL);
-       if (!*map) {
+       if (IS_ERR(*map)) {
                p_err("failed to create hashmap for PID references");
                return -1;
        }
index 2a21d50..cf935c6 100644 (file)
@@ -424,8 +424,10 @@ out_free:
        free(value);
 }
 
-static void print_prog_header_json(struct bpf_prog_info *info)
+static void print_prog_header_json(struct bpf_prog_info *info, int fd)
 {
+       char prog_name[MAX_PROG_FULL_NAME];
+
        jsonw_uint_field(json_wtr, "id", info->id);
        if (info->type < ARRAY_SIZE(prog_type_name))
                jsonw_string_field(json_wtr, "type",
@@ -433,8 +435,10 @@ static void print_prog_header_json(struct bpf_prog_info *info)
        else
                jsonw_uint_field(json_wtr, "type", info->type);
 
-       if (*info->name)
-               jsonw_string_field(json_wtr, "name", info->name);
+       if (*info->name) {
+               get_prog_full_name(info, fd, prog_name, sizeof(prog_name));
+               jsonw_string_field(json_wtr, "name", prog_name);
+       }
 
        jsonw_name(json_wtr, "tag");
        jsonw_printf(json_wtr, "\"" BPF_TAG_FMT "\"",
@@ -455,7 +459,7 @@ static void print_prog_json(struct bpf_prog_info *info, int fd)
        char *memlock;
 
        jsonw_start_object(json_wtr);
-       print_prog_header_json(info);
+       print_prog_header_json(info, fd);
        print_dev_json(info->ifindex, info->netns_dev, info->netns_ino);
 
        if (info->load_time) {
@@ -507,16 +511,20 @@ static void print_prog_json(struct bpf_prog_info *info, int fd)
        jsonw_end_object(json_wtr);
 }
 
-static void print_prog_header_plain(struct bpf_prog_info *info)
+static void print_prog_header_plain(struct bpf_prog_info *info, int fd)
 {
+       char prog_name[MAX_PROG_FULL_NAME];
+
        printf("%u: ", info->id);
        if (info->type < ARRAY_SIZE(prog_type_name))
                printf("%s  ", prog_type_name[info->type]);
        else
                printf("type %u  ", info->type);
 
-       if (*info->name)
-               printf("name %s  ", info->name);
+       if (*info->name) {
+               get_prog_full_name(info, fd, prog_name, sizeof(prog_name));
+               printf("name %s  ", prog_name);
+       }
 
        printf("tag ");
        fprint_hex(stdout, info->tag, BPF_TAG_SIZE, "");
@@ -534,7 +542,7 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd)
 {
        char *memlock;
 
-       print_prog_header_plain(info);
+       print_prog_header_plain(info, fd);
 
        if (info->load_time) {
                char buf[32];
@@ -641,7 +649,7 @@ static int do_show(int argc, char **argv)
        if (show_pinned) {
                prog_table = hashmap__new(hash_fn_for_key_as_id,
                                          equal_fn_for_key_as_id, NULL);
-               if (!prog_table) {
+               if (IS_ERR(prog_table)) {
                        p_err("failed to create hashmap for pinned paths");
                        return -1;
                }
@@ -972,10 +980,10 @@ static int do_dump(int argc, char **argv)
 
                if (json_output && nb_fds > 1) {
                        jsonw_start_object(json_wtr);   /* prog object */
-                       print_prog_header_json(&info);
+                       print_prog_header_json(&info, fds[i]);
                        jsonw_name(json_wtr, "insns");
                } else if (nb_fds > 1) {
-                       print_prog_header_plain(&info);
+                       print_prog_header_plain(&info, fds[i]);
                }
 
                err = prog_dump(&info, mode, filepath, opcodes, visual, linum);
index 2f693b0..e08a6ff 100644 (file)
@@ -480,7 +480,6 @@ static int do_unregister(int argc, char **argv)
 static int do_register(int argc, char **argv)
 {
        LIBBPF_OPTS(bpf_object_open_opts, open_opts);
-       const struct bpf_map_def *def;
        struct bpf_map_info info = {};
        __u32 info_len = sizeof(info);
        int nr_errs = 0, nr_maps = 0;
@@ -510,8 +509,7 @@ static int do_register(int argc, char **argv)
        }
 
        bpf_object__for_each_map(map, obj) {
-               def = bpf_map__def(map);
-               if (def->type != BPF_MAP_TYPE_STRUCT_OPS)
+               if (bpf_map__type(map) != BPF_MAP_TYPE_STRUCT_OPS)
                        continue;
 
                link = bpf_map__attach_struct_ops(map);
index 320a88a..19a3112 100644 (file)
@@ -24,6 +24,8 @@ LD       = $(HOSTLD)
 ARCH     = $(HOSTARCH)
 RM      ?= rm
 CROSS_COMPILE =
+CFLAGS  := $(KBUILD_HOSTCFLAGS)
+LDFLAGS := $(KBUILD_HOSTLDFLAGS)
 
 OUTPUT ?= $(srctree)/tools/bpf/resolve_btfids/
 
@@ -51,10 +53,10 @@ $(SUBCMDOBJ): fixdep FORCE | $(OUTPUT)/libsubcmd
 
 $(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(LIBBPF_OUT)
        $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(LIBBPF_OUT)    \
-                   DESTDIR=$(LIBBPF_DESTDIR) prefix=                          \
+                   DESTDIR=$(LIBBPF_DESTDIR) prefix= EXTRA_CFLAGS="$(CFLAGS)" \
                    $(abspath $@) install_headers
 
-CFLAGS := -g \
+CFLAGS += -g \
           -I$(srctree)/tools/include \
           -I$(srctree)/tools/include/uapi \
           -I$(LIBBPF_INCLUDE) \
index b0383d3..16a7574 100644 (file)
@@ -330,6 +330,8 @@ union bpf_iter_link_info {
  *                     *ctx_out*, *data_in* and *data_out* must be NULL.
  *                     *repeat* must be zero.
  *
+ *             BPF_PROG_RUN is an alias for BPF_PROG_TEST_RUN.
+ *
  *     Return
  *             Returns zero on success. On error, -1 is returned and *errno*
  *             is set appropriately.
@@ -1111,6 +1113,11 @@ enum bpf_link_type {
  */
 #define BPF_F_SLEEPABLE                (1U << 4)
 
+/* If BPF_F_XDP_HAS_FRAGS is used in BPF_PROG_LOAD command, the loaded program
+ * fully support xdp frags.
+ */
+#define BPF_F_XDP_HAS_FRAGS    (1U << 5)
+
 /* When BPF ldimm64's insn[0].src_reg != 0 then this can have
  * the following extensions:
  *
@@ -1775,6 +1782,8 @@ union bpf_attr {
  *             0 on success, or a negative error in case of failure.
  *
  * u64 bpf_get_current_pid_tgid(void)
+ *     Description
+ *             Get the current pid and tgid.
  *     Return
  *             A 64-bit integer containing the current tgid and pid, and
  *             created as such:
@@ -1782,6 +1791,8 @@ union bpf_attr {
  *             *current_task*\ **->pid**.
  *
  * u64 bpf_get_current_uid_gid(void)
+ *     Description
+ *             Get the current uid and gid.
  *     Return
  *             A 64-bit integer containing the current GID and UID, and
  *             created as such: *current_gid* **<< 32 \|** *current_uid*.
@@ -2256,6 +2267,8 @@ union bpf_attr {
  *             The 32-bit hash.
  *
  * u64 bpf_get_current_task(void)
+ *     Description
+ *             Get the current task.
  *     Return
  *             A pointer to the current task struct.
  *
@@ -2369,6 +2382,8 @@ union bpf_attr {
  *             indicate that the hash is outdated and to trigger a
  *             recalculation the next time the kernel tries to access this
  *             hash or when the **bpf_get_hash_recalc**\ () helper is called.
+ *     Return
+ *             void.
  *
  * long bpf_get_numa_node_id(void)
  *     Description
@@ -2466,6 +2481,8 @@ union bpf_attr {
  *             A 8-byte long unique number or 0 if *sk* is NULL.
  *
  * u32 bpf_get_socket_uid(struct sk_buff *skb)
+ *     Description
+ *             Get the owner UID of the socked associated to *skb*.
  *     Return
  *             The owner UID of the socket associated to *skb*. If the socket
  *             is **NULL**, or if it is not a full socket (i.e. if it is a
@@ -3240,6 +3257,9 @@ union bpf_attr {
  *             The id is returned or 0 in case the id could not be retrieved.
  *
  * u64 bpf_get_current_cgroup_id(void)
+ *     Description
+ *             Get the current cgroup id based on the cgroup within which
+ *             the current task is running.
  *     Return
  *             A 64-bit integer containing the current cgroup id based
  *             on the cgroup within which the current task is running.
@@ -5018,6 +5038,44 @@ union bpf_attr {
  *
  *     Return
  *             The number of arguments of the traced function.
+ *
+ * int bpf_get_retval(void)
+ *     Description
+ *             Get the syscall's return value that will be returned to userspace.
+ *
+ *             This helper is currently supported by cgroup programs only.
+ *     Return
+ *             The syscall's return value.
+ *
+ * int bpf_set_retval(int retval)
+ *     Description
+ *             Set the syscall's return value that will be returned to userspace.
+ *
+ *             This helper is currently supported by cgroup programs only.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * u64 bpf_xdp_get_buff_len(struct xdp_buff *xdp_md)
+ *     Description
+ *             Get the total size of a given xdp buff (linear and paged area)
+ *     Return
+ *             The total size of a given xdp buffer.
+ *
+ * long bpf_xdp_load_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len)
+ *     Description
+ *             This helper is provided as an easy way to load data from a
+ *             xdp buffer. It can be used to load *len* bytes from *offset* from
+ *             the frame associated to *xdp_md*, into the buffer pointed by
+ *             *buf*.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * long bpf_xdp_store_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len)
+ *     Description
+ *             Store *len* bytes from buffer *buf* into the frame
+ *             associated to *xdp_md*, at *offset*.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)          \
        FN(unspec),                     \
@@ -5206,6 +5264,11 @@ union bpf_attr {
        FN(get_func_arg),               \
        FN(get_func_ret),               \
        FN(get_func_arg_cnt),           \
+       FN(get_retval),                 \
+       FN(set_retval),                 \
+       FN(xdp_get_buff_len),           \
+       FN(xdp_load_bytes),             \
+       FN(xdp_store_bytes),            \
        /* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
index 550b4cb..418b259 100644 (file)
@@ -754,10 +754,10 @@ int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type,
                .flags = flags,
        );
 
-       return bpf_prog_attach_xattr(prog_fd, target_fd, type, &opts);
+       return bpf_prog_attach_opts(prog_fd, target_fd, type, &opts);
 }
 
-int bpf_prog_attach_xattr(int prog_fd, int target_fd,
+int bpf_prog_attach_opts(int prog_fd, int target_fd,
                          enum bpf_attach_type type,
                          const struct bpf_prog_attach_opts *opts)
 {
@@ -778,6 +778,11 @@ int bpf_prog_attach_xattr(int prog_fd, int target_fd,
        return libbpf_err_errno(ret);
 }
 
+__attribute__((alias("bpf_prog_attach_opts")))
+int bpf_prog_attach_xattr(int prog_fd, int target_fd,
+                         enum bpf_attach_type type,
+                         const struct bpf_prog_attach_opts *opts);
+
 int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
 {
        union bpf_attr attr;
index 14e0d97..c2e8327 100644 (file)
@@ -391,6 +391,10 @@ struct bpf_prog_attach_opts {
 
 LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd,
                               enum bpf_attach_type type, unsigned int flags);
+LIBBPF_API int bpf_prog_attach_opts(int prog_fd, int attachable_fd,
+                                    enum bpf_attach_type type,
+                                    const struct bpf_prog_attach_opts *opts);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_prog_attach_opts() instead")
 LIBBPF_API int bpf_prog_attach_xattr(int prog_fd, int attachable_fd,
                                     enum bpf_attach_type type,
                                     const struct bpf_prog_attach_opts *opts);
index 963b106..44df982 100644 (file)
@@ -133,7 +133,7 @@ struct bpf_map_def {
        unsigned int value_size;
        unsigned int max_entries;
        unsigned int map_flags;
-};
+} __attribute__((deprecated("use BTF-defined maps in .maps section")));
 
 enum libbpf_pin_type {
        LIBBPF_PIN_NONE,
index 9aa19c8..1383e26 100644 (file)
@@ -1620,20 +1620,37 @@ static int btf_commit_type(struct btf *btf, int data_sz)
 struct btf_pipe {
        const struct btf *src;
        struct btf *dst;
+       struct hashmap *str_off_map; /* map string offsets from src to dst */
 };
 
 static int btf_rewrite_str(__u32 *str_off, void *ctx)
 {
        struct btf_pipe *p = ctx;
-       int off;
+       void *mapped_off;
+       int off, err;
 
        if (!*str_off) /* nothing to do for empty strings */
                return 0;
 
+       if (p->str_off_map &&
+           hashmap__find(p->str_off_map, (void *)(long)*str_off, &mapped_off)) {
+               *str_off = (__u32)(long)mapped_off;
+               return 0;
+       }
+
        off = btf__add_str(p->dst, btf__str_by_offset(p->src, *str_off));
        if (off < 0)
                return off;
 
+       /* Remember string mapping from src to dst.  It avoids
+        * performing expensive string comparisons.
+        */
+       if (p->str_off_map) {
+               err = hashmap__append(p->str_off_map, (void *)(long)*str_off, (void *)(long)off);
+               if (err)
+                       return err;
+       }
+
        *str_off = off;
        return 0;
 }
@@ -1680,6 +1697,9 @@ static int btf_rewrite_type_ids(__u32 *type_id, void *ctx)
        return 0;
 }
 
+static size_t btf_dedup_identity_hash_fn(const void *key, void *ctx);
+static bool btf_dedup_equal_fn(const void *k1, const void *k2, void *ctx);
+
 int btf__add_btf(struct btf *btf, const struct btf *src_btf)
 {
        struct btf_pipe p = { .src = src_btf, .dst = btf };
@@ -1713,6 +1733,11 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf)
        if (!off)
                return libbpf_err(-ENOMEM);
 
+       /* Map the string offsets from src_btf to the offsets from btf to improve performance */
+       p.str_off_map = hashmap__new(btf_dedup_identity_hash_fn, btf_dedup_equal_fn, NULL);
+       if (IS_ERR(p.str_off_map))
+               return libbpf_err(-ENOMEM);
+
        /* bulk copy types data for all types from src_btf */
        memcpy(t, src_btf->types_data, data_sz);
 
@@ -1754,6 +1779,8 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf)
        btf->hdr->str_off += data_sz;
        btf->nr_types += cnt;
 
+       hashmap__free(p.str_off_map);
+
        /* return type ID of the first added BTF type */
        return btf->start_id + btf->nr_types - cnt;
 err_out:
@@ -1767,6 +1794,8 @@ err_out:
         * wasn't modified, so doesn't need restoring, see big comment above */
        btf->hdr->str_len = old_strs_len;
 
+       hashmap__free(p.str_off_map);
+
        return libbpf_err(err);
 }
 
index 061839f..51862fd 100644 (file)
@@ -375,8 +375,28 @@ btf_dump__dump_type_data(struct btf_dump *d, __u32 id,
                         const struct btf_dump_type_data_opts *opts);
 
 /*
- * A set of helpers for easier BTF types handling
+ * A set of helpers for easier BTF types handling.
+ *
+ * The inline functions below rely on constants from the kernel headers which
+ * may not be available for applications including this header file. To avoid
+ * compilation errors, we define all the constants here that were added after
+ * the initial introduction of the BTF_KIND* constants.
  */
+#ifndef BTF_KIND_FUNC
+#define BTF_KIND_FUNC          12      /* Function     */
+#define BTF_KIND_FUNC_PROTO    13      /* Function Proto       */
+#endif
+#ifndef BTF_KIND_VAR
+#define BTF_KIND_VAR           14      /* Variable     */
+#define BTF_KIND_DATASEC       15      /* Section      */
+#endif
+#ifndef BTF_KIND_FLOAT
+#define BTF_KIND_FLOAT         16      /* Floating point       */
+#endif
+/* The kernel header switched to enums, so these two were never #defined */
+#define BTF_KIND_DECL_TAG      17      /* Decl Tag */
+#define BTF_KIND_TYPE_TAG      18      /* Type Tag */
+
 static inline __u16 btf_kind(const struct btf_type *t)
 {
        return BTF_INFO_KIND(t->info);
index 3c20b12..aeb09c2 100644 (file)
@@ -75,7 +75,7 @@ void hashmap__clear(struct hashmap *map)
 
 void hashmap__free(struct hashmap *map)
 {
-       if (!map)
+       if (IS_ERR_OR_NULL(map))
                return;
 
        hashmap__clear(map);
@@ -238,4 +238,3 @@ bool hashmap__delete(struct hashmap *map, const void *key,
 
        return true;
 }
-
index 7f10dd5..a8c7503 100644 (file)
@@ -235,6 +235,8 @@ enum sec_def_flags {
        SEC_SLEEPABLE = 8,
        /* allow non-strict prefix matching */
        SEC_SLOPPY_PFX = 16,
+       /* BPF program support non-linear XDP buffer */
+       SEC_XDP_FRAGS = 32,
 };
 
 struct bpf_sec_def {
@@ -1937,6 +1939,11 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
        if (obj->efile.maps_shndx < 0)
                return 0;
 
+       if (libbpf_mode & LIBBPF_STRICT_MAP_DEFINITIONS) {
+               pr_warn("legacy map definitions in SEC(\"maps\") are not supported\n");
+               return -EOPNOTSUPP;
+       }
+
        if (!symbols)
                return -EINVAL;
 
@@ -1999,6 +2006,8 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
                        return -LIBBPF_ERRNO__FORMAT;
                }
 
+               pr_warn("map '%s' (legacy): legacy map definitions are deprecated, use BTF-defined maps instead\n", map_name);
+
                if (ELF64_ST_BIND(sym->st_info) == STB_LOCAL) {
                        pr_warn("map '%s' (legacy): static maps are not supported\n", map_name);
                        return -ENOTSUP;
@@ -4190,6 +4199,7 @@ static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map)
                return 0;
 
        if (!bpf_map__is_internal(map)) {
+               pr_warn("Use of BPF_ANNOTATE_KV_PAIR is deprecated, use BTF-defined maps in .maps section instead\n");
                ret = btf__get_map_kv_tids(obj->btf, map->name, def->key_size,
                                           def->value_size, &key_type_id,
                                           &value_type_id);
@@ -6562,6 +6572,9 @@ static int libbpf_preload_prog(struct bpf_program *prog,
        if (def & SEC_SLEEPABLE)
                opts->prog_flags |= BPF_F_SLEEPABLE;
 
+       if (prog->type == BPF_PROG_TYPE_XDP && (def & SEC_XDP_FRAGS))
+               opts->prog_flags |= BPF_F_XDP_HAS_FRAGS;
+
        if ((prog->type == BPF_PROG_TYPE_TRACING ||
             prog->type == BPF_PROG_TYPE_LSM ||
             prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) {
@@ -8600,8 +8613,11 @@ static const struct bpf_sec_def section_defs[] = {
        SEC_DEF("lsm.s/",               LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm),
        SEC_DEF("iter/",                TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter),
        SEC_DEF("syscall",              SYSCALL, 0, SEC_SLEEPABLE),
+       SEC_DEF("xdp.frags/devmap",     XDP, BPF_XDP_DEVMAP, SEC_XDP_FRAGS),
        SEC_DEF("xdp_devmap/",          XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE),
+       SEC_DEF("xdp.frags/cpumap",     XDP, BPF_XDP_CPUMAP, SEC_XDP_FRAGS),
        SEC_DEF("xdp_cpumap/",          XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE),
+       SEC_DEF("xdp.frags",            XDP, BPF_XDP, SEC_XDP_FRAGS),
        SEC_DEF("xdp",                  XDP, BPF_XDP, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
        SEC_DEF("perf_event",           PERF_EVENT, 0, SEC_NONE | SEC_SLOPPY_PFX),
        SEC_DEF("lwt_in",               LWT_IN, 0, SEC_NONE | SEC_SLOPPY_PFX),
@@ -11795,6 +11811,9 @@ void bpf_object__detach_skeleton(struct bpf_object_skeleton *s)
 
 void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s)
 {
+       if (!s)
+               return;
+
        if (s->progs)
                bpf_object__detach_skeleton(s);
        if (s->obj)
index 8b9bc5e..9467006 100644 (file)
@@ -706,7 +706,8 @@ bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *map);
 LIBBPF_API int bpf_map__fd(const struct bpf_map *map);
 LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd);
 /* get map definition */
-LIBBPF_API const struct bpf_map_def *bpf_map__def(const struct bpf_map *map);
+LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 8, "use appropriate getters or setters instead")
+const struct bpf_map_def *bpf_map__def(const struct bpf_map *map);
 /* get map name */
 LIBBPF_API const char *bpf_map__name(const struct bpf_map *map);
 /* get/set map type */
@@ -832,13 +833,42 @@ struct bpf_xdp_set_link_opts {
 };
 #define bpf_xdp_set_link_opts__last_field old_fd
 
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_attach() instead")
 LIBBPF_API int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_attach() instead")
 LIBBPF_API int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags,
                                        const struct bpf_xdp_set_link_opts *opts);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_query_id() instead")
 LIBBPF_API int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_query() instead")
 LIBBPF_API int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
                                     size_t info_size, __u32 flags);
 
+struct bpf_xdp_attach_opts {
+       size_t sz;
+       int old_prog_fd;
+       size_t :0;
+};
+#define bpf_xdp_attach_opts__last_field old_prog_fd
+
+struct bpf_xdp_query_opts {
+       size_t sz;
+       __u32 prog_id;          /* output */
+       __u32 drv_prog_id;      /* output */
+       __u32 hw_prog_id;       /* output */
+       __u32 skb_prog_id;      /* output */
+       __u8 attach_mode;       /* output */
+       size_t :0;
+};
+#define bpf_xdp_query_opts__last_field attach_mode
+
+LIBBPF_API int bpf_xdp_attach(int ifindex, int prog_fd, __u32 flags,
+                             const struct bpf_xdp_attach_opts *opts);
+LIBBPF_API int bpf_xdp_detach(int ifindex, __u32 flags,
+                             const struct bpf_xdp_attach_opts *opts);
+LIBBPF_API int bpf_xdp_query(int ifindex, int flags, struct bpf_xdp_query_opts *opts);
+LIBBPF_API int bpf_xdp_query_id(int ifindex, int flags, __u32 *prog_id);
+
 /* TC related API */
 enum bpf_tc_attach_point {
        BPF_TC_INGRESS = 1 << 0,
index 5297839..e10f082 100644 (file)
@@ -247,6 +247,7 @@ LIBBPF_0.0.8 {
                bpf_link_create;
                bpf_link_update;
                bpf_map__set_initial_value;
+               bpf_prog_attach_opts;
                bpf_program__attach_cgroup;
                bpf_program__attach_lsm;
                bpf_program__is_lsm;
@@ -427,6 +428,10 @@ LIBBPF_0.7.0 {
                bpf_program__log_level;
                bpf_program__set_log_buf;
                bpf_program__set_log_level;
+               bpf_xdp_attach;
+               bpf_xdp_detach;
+               bpf_xdp_query;
+               bpf_xdp_query_id;
                libbpf_probe_bpf_helper;
                libbpf_probe_bpf_map_type;
                libbpf_probe_bpf_prog_type;
index 79131f7..3c2b281 100644 (file)
@@ -73,6 +73,11 @@ enum libbpf_strict_mode {
         * operation.
         */
        LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK = 0x10,
+       /*
+        * Error out on any SEC("maps") map definition, which are deprecated
+        * in favor of BTF-defined map definitions in SEC(".maps").
+        */
+       LIBBPF_STRICT_MAP_DEFINITIONS = 0x20,
 
        __LIBBPF_STRICT_LAST,
 };
index 39f25e0..c39c37f 100644 (file)
@@ -217,6 +217,28 @@ static int __bpf_set_link_xdp_fd_replace(int ifindex, int fd, int old_fd,
        return libbpf_netlink_send_recv(&req, NULL, NULL, NULL);
 }
 
+int bpf_xdp_attach(int ifindex, int prog_fd, __u32 flags, const struct bpf_xdp_attach_opts *opts)
+{
+       int old_prog_fd, err;
+
+       if (!OPTS_VALID(opts, bpf_xdp_attach_opts))
+               return libbpf_err(-EINVAL);
+
+       old_prog_fd = OPTS_GET(opts, old_prog_fd, 0);
+       if (old_prog_fd)
+               flags |= XDP_FLAGS_REPLACE;
+       else
+               old_prog_fd = -1;
+
+       err = __bpf_set_link_xdp_fd_replace(ifindex, prog_fd, old_prog_fd, flags);
+       return libbpf_err(err);
+}
+
+int bpf_xdp_detach(int ifindex, __u32 flags, const struct bpf_xdp_attach_opts *opts)
+{
+       return bpf_xdp_attach(ifindex, -1, flags, opts);
+}
+
 int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags,
                             const struct bpf_xdp_set_link_opts *opts)
 {
@@ -303,69 +325,98 @@ static int get_xdp_info(void *cookie, void *msg, struct nlattr **tb)
        return 0;
 }
 
-int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
-                         size_t info_size, __u32 flags)
+int bpf_xdp_query(int ifindex, int xdp_flags, struct bpf_xdp_query_opts *opts)
 {
-       struct xdp_id_md xdp_id = {};
-       __u32 mask;
-       int ret;
        struct libbpf_nla_req req = {
                .nh.nlmsg_len      = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
                .nh.nlmsg_type     = RTM_GETLINK,
                .nh.nlmsg_flags    = NLM_F_DUMP | NLM_F_REQUEST,
                .ifinfo.ifi_family = AF_PACKET,
        };
+       struct xdp_id_md xdp_id = {};
+       int err;
 
-       if (flags & ~XDP_FLAGS_MASK || !info_size)
+       if (!OPTS_VALID(opts, bpf_xdp_query_opts))
+               return libbpf_err(-EINVAL);
+
+       if (xdp_flags & ~XDP_FLAGS_MASK)
                return libbpf_err(-EINVAL);
 
        /* Check whether the single {HW,DRV,SKB} mode is set */
-       flags &= (XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE);
-       mask = flags - 1;
-       if (flags && flags & mask)
+       xdp_flags &= XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE;
+       if (xdp_flags & (xdp_flags - 1))
                return libbpf_err(-EINVAL);
 
        xdp_id.ifindex = ifindex;
-       xdp_id.flags = flags;
+       xdp_id.flags = xdp_flags;
 
-       ret = libbpf_netlink_send_recv(&req, __dump_link_nlmsg,
+       err = libbpf_netlink_send_recv(&req, __dump_link_nlmsg,
                                       get_xdp_info, &xdp_id);
-       if (!ret) {
-               size_t sz = min(info_size, sizeof(xdp_id.info));
+       if (err)
+               return libbpf_err(err);
 
-               memcpy(info, &xdp_id.info, sz);
-               memset((void *) info + sz, 0, info_size - sz);
-       }
+       OPTS_SET(opts, prog_id, xdp_id.info.prog_id);
+       OPTS_SET(opts, drv_prog_id, xdp_id.info.drv_prog_id);
+       OPTS_SET(opts, hw_prog_id, xdp_id.info.hw_prog_id);
+       OPTS_SET(opts, skb_prog_id, xdp_id.info.skb_prog_id);
+       OPTS_SET(opts, attach_mode, xdp_id.info.attach_mode);
 
-       return libbpf_err(ret);
+       return 0;
 }
 
-static __u32 get_xdp_id(struct xdp_link_info *info, __u32 flags)
+int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
+                         size_t info_size, __u32 flags)
 {
-       flags &= XDP_FLAGS_MODES;
+       LIBBPF_OPTS(bpf_xdp_query_opts, opts);
+       size_t sz;
+       int err;
+
+       if (!info_size)
+               return libbpf_err(-EINVAL);
 
-       if (info->attach_mode != XDP_ATTACHED_MULTI && !flags)
-               return info->prog_id;
-       if (flags & XDP_FLAGS_DRV_MODE)
-               return info->drv_prog_id;
-       if (flags & XDP_FLAGS_HW_MODE)
-               return info->hw_prog_id;
-       if (flags & XDP_FLAGS_SKB_MODE)
-               return info->skb_prog_id;
+       err = bpf_xdp_query(ifindex, flags, &opts);
+       if (err)
+               return libbpf_err(err);
+
+       /* struct xdp_link_info field layout matches struct bpf_xdp_query_opts
+        * layout after sz field
+        */
+       sz = min(info_size, offsetofend(struct xdp_link_info, attach_mode));
+       memcpy(info, &opts.prog_id, sz);
+       memset((void *)info + sz, 0, info_size - sz);
 
        return 0;
 }
 
-int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags)
+int bpf_xdp_query_id(int ifindex, int flags, __u32 *prog_id)
 {
-       struct xdp_link_info info;
+       LIBBPF_OPTS(bpf_xdp_query_opts, opts);
        int ret;
 
-       ret = bpf_get_link_xdp_info(ifindex, &info, sizeof(info), flags);
-       if (!ret)
-               *prog_id = get_xdp_id(&info, flags);
+       ret = bpf_xdp_query(ifindex, flags, &opts);
+       if (ret)
+               return libbpf_err(ret);
+
+       flags &= XDP_FLAGS_MODES;
 
-       return libbpf_err(ret);
+       if (opts.attach_mode != XDP_ATTACHED_MULTI && !flags)
+               *prog_id = opts.prog_id;
+       else if (flags & XDP_FLAGS_DRV_MODE)
+               *prog_id = opts.drv_prog_id;
+       else if (flags & XDP_FLAGS_HW_MODE)
+               *prog_id = opts.hw_prog_id;
+       else if (flags & XDP_FLAGS_SKB_MODE)
+               *prog_id = opts.skb_prog_id;
+       else
+               *prog_id = 0;
+
+       return 0;
+}
+
+
+int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags)
+{
+       return bpf_xdp_query_id(ifindex, flags, prog_id);
 }
 
 typedef int (*qdisc_config_t)(struct libbpf_nla_req *req);
index 7ecfaac..ef2832b 100644 (file)
@@ -1005,24 +1005,22 @@ __bpf_map__config_value(struct bpf_map *map,
 {
        struct bpf_map_op *op;
        const char *map_name = bpf_map__name(map);
-       const struct bpf_map_def *def = bpf_map__def(map);
 
-       if (IS_ERR(def)) {
-               pr_debug("Unable to get map definition from '%s'\n",
-                        map_name);
+       if (!map) {
+               pr_debug("Map '%s' is invalid\n", map_name);
                return -BPF_LOADER_ERRNO__INTERNAL;
        }
 
-       if (def->type != BPF_MAP_TYPE_ARRAY) {
+       if (bpf_map__type(map) != BPF_MAP_TYPE_ARRAY) {
                pr_debug("Map %s type is not BPF_MAP_TYPE_ARRAY\n",
                         map_name);
                return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
        }
-       if (def->key_size < sizeof(unsigned int)) {
+       if (bpf_map__key_size(map) < sizeof(unsigned int)) {
                pr_debug("Map %s has incorrect key size\n", map_name);
                return -BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE;
        }
-       switch (def->value_size) {
+       switch (bpf_map__value_size(map)) {
        case 1:
        case 2:
        case 4:
@@ -1064,7 +1062,6 @@ __bpf_map__config_event(struct bpf_map *map,
                        struct parse_events_term *term,
                        struct evlist *evlist)
 {
-       const struct bpf_map_def *def;
        struct bpf_map_op *op;
        const char *map_name = bpf_map__name(map);
        struct evsel *evsel = evlist__find_evsel_by_str(evlist, term->val.str);
@@ -1075,18 +1072,16 @@ __bpf_map__config_event(struct bpf_map *map,
                return -BPF_LOADER_ERRNO__OBJCONF_MAP_NOEVT;
        }
 
-       def = bpf_map__def(map);
-       if (IS_ERR(def)) {
-               pr_debug("Unable to get map definition from '%s'\n",
-                        map_name);
-               return PTR_ERR(def);
+       if (!map) {
+               pr_debug("Map '%s' is invalid\n", map_name);
+               return PTR_ERR(map);
        }
 
        /*
         * No need to check key_size and value_size:
         * kernel has already checked them.
         */
-       if (def->type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
+       if (bpf_map__type(map) != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
                pr_debug("Map %s type is not BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
                         map_name);
                return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
@@ -1135,7 +1130,6 @@ config_map_indices_range_check(struct parse_events_term *term,
                               const char *map_name)
 {
        struct parse_events_array *array = &term->array;
-       const struct bpf_map_def *def;
        unsigned int i;
 
        if (!array->nr_ranges)
@@ -1146,10 +1140,8 @@ config_map_indices_range_check(struct parse_events_term *term,
                return -BPF_LOADER_ERRNO__INTERNAL;
        }
 
-       def = bpf_map__def(map);
-       if (IS_ERR(def)) {
-               pr_debug("ERROR: Unable to get map definition from '%s'\n",
-                        map_name);
+       if (!map) {
+               pr_debug("Map '%s' is invalid\n", map_name);
                return -BPF_LOADER_ERRNO__INTERNAL;
        }
 
@@ -1158,7 +1150,7 @@ config_map_indices_range_check(struct parse_events_term *term,
                size_t length = array->ranges[i].length;
                unsigned int idx = start + length - 1;
 
-               if (idx >= def->max_entries) {
+               if (idx >= bpf_map__max_entries(map)) {
                        pr_debug("ERROR: index %d too large\n", idx);
                        return -BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG;
                }
@@ -1251,21 +1243,21 @@ out:
 }
 
 typedef int (*map_config_func_t)(const char *name, int map_fd,
-                                const struct bpf_map_def *pdef,
+                                const struct bpf_map *map,
                                 struct bpf_map_op *op,
                                 void *pkey, void *arg);
 
 static int
 foreach_key_array_all(map_config_func_t func,
                      void *arg, const char *name,
-                     int map_fd, const struct bpf_map_def *pdef,
+                     int map_fd, const struct bpf_map *map,
                      struct bpf_map_op *op)
 {
        unsigned int i;
        int err;
 
-       for (i = 0; i < pdef->max_entries; i++) {
-               err = func(name, map_fd, pdef, op, &i, arg);
+       for (i = 0; i < bpf_map__max_entries(map); i++) {
+               err = func(name, map_fd, map, op, &i, arg);
                if (err) {
                        pr_debug("ERROR: failed to insert value to %s[%u]\n",
                                 name, i);
@@ -1278,7 +1270,7 @@ foreach_key_array_all(map_config_func_t func,
 static int
 foreach_key_array_ranges(map_config_func_t func, void *arg,
                         const char *name, int map_fd,
-                        const struct bpf_map_def *pdef,
+                        const struct bpf_map *map,
                         struct bpf_map_op *op)
 {
        unsigned int i, j;
@@ -1291,7 +1283,7 @@ foreach_key_array_ranges(map_config_func_t func, void *arg,
                for (j = 0; j < length; j++) {
                        unsigned int idx = start + j;
 
-                       err = func(name, map_fd, pdef, op, &idx, arg);
+                       err = func(name, map_fd, map, op, &idx, arg);
                        if (err) {
                                pr_debug("ERROR: failed to insert value to %s[%u]\n",
                                         name, idx);
@@ -1307,9 +1299,8 @@ bpf_map_config_foreach_key(struct bpf_map *map,
                           map_config_func_t func,
                           void *arg)
 {
-       int err, map_fd;
+       int err, map_fd, type;
        struct bpf_map_op *op;
-       const struct bpf_map_def *def;
        const char *name = bpf_map__name(map);
        struct bpf_map_priv *priv = bpf_map__priv(map);
 
@@ -1322,9 +1313,8 @@ bpf_map_config_foreach_key(struct bpf_map *map,
                return 0;
        }
 
-       def = bpf_map__def(map);
-       if (IS_ERR(def)) {
-               pr_debug("ERROR: failed to get definition from map %s\n", name);
+       if (!map) {
+               pr_debug("Map '%s' is invalid\n", name);
                return -BPF_LOADER_ERRNO__INTERNAL;
        }
        map_fd = bpf_map__fd(map);
@@ -1333,19 +1323,19 @@ bpf_map_config_foreach_key(struct bpf_map *map,
                return map_fd;
        }
 
+       type = bpf_map__type(map);
        list_for_each_entry(op, &priv->ops_list, list) {
-               switch (def->type) {
+               switch (type) {
                case BPF_MAP_TYPE_ARRAY:
                case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
                        switch (op->key_type) {
                        case BPF_MAP_KEY_ALL:
                                err = foreach_key_array_all(func, arg, name,
-                                                           map_fd, def, op);
+                                                           map_fd, map, op);
                                break;
                        case BPF_MAP_KEY_RANGES:
                                err = foreach_key_array_ranges(func, arg, name,
-                                                              map_fd, def,
-                                                              op);
+                                                              map_fd, map, op);
                                break;
                        default:
                                pr_debug("ERROR: keytype for map '%s' invalid\n",
@@ -1454,7 +1444,7 @@ apply_config_evsel_for_key(const char *name, int map_fd, void *pkey,
 
 static int
 apply_obj_config_map_for_key(const char *name, int map_fd,
-                            const struct bpf_map_def *pdef,
+                            const struct bpf_map *map,
                             struct bpf_map_op *op,
                             void *pkey, void *arg __maybe_unused)
 {
@@ -1463,7 +1453,7 @@ apply_obj_config_map_for_key(const char *name, int map_fd,
        switch (op->op_type) {
        case BPF_MAP_OP_SET_VALUE:
                err = apply_config_value_for_key(map_fd, pkey,
-                                                pdef->value_size,
+                                                bpf_map__value_size(map),
                                                 op->v.value);
                break;
        case BPF_MAP_OP_SET_EVSEL:
index eb853ca..c863ae0 100644 (file)
@@ -9,25 +9,25 @@
 #include <stdlib.h>
 #include <unistd.h>
 
-static bool bpf_map_def__is_per_cpu(const struct bpf_map_def *def)
+static bool bpf_map__is_per_cpu(enum bpf_map_type type)
 {
-       return def->type == BPF_MAP_TYPE_PERCPU_HASH ||
-              def->type == BPF_MAP_TYPE_PERCPU_ARRAY ||
-              def->type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
-              def->type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE;
+       return type == BPF_MAP_TYPE_PERCPU_HASH ||
+              type == BPF_MAP_TYPE_PERCPU_ARRAY ||
+              type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
+              type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE;
 }
 
-static void *bpf_map_def__alloc_value(const struct bpf_map_def *def)
+static void *bpf_map__alloc_value(const struct bpf_map *map)
 {
-       if (bpf_map_def__is_per_cpu(def))
-               return malloc(round_up(def->value_size, 8) * sysconf(_SC_NPROCESSORS_CONF));
+       if (bpf_map__is_per_cpu(bpf_map__type(map)))
+               return malloc(round_up(bpf_map__value_size(map), 8) *
+                             sysconf(_SC_NPROCESSORS_CONF));
 
-       return malloc(def->value_size);
+       return malloc(bpf_map__value_size(map));
 }
 
 int bpf_map__fprintf(struct bpf_map *map, FILE *fp)
 {
-       const struct bpf_map_def *def = bpf_map__def(map);
        void *prev_key = NULL, *key, *value;
        int fd = bpf_map__fd(map), err;
        int printed = 0;
@@ -35,15 +35,15 @@ int bpf_map__fprintf(struct bpf_map *map, FILE *fp)
        if (fd < 0)
                return fd;
 
-       if (IS_ERR(def))
-               return PTR_ERR(def);
+       if (!map)
+               return PTR_ERR(map);
 
        err = -ENOMEM;
-       key = malloc(def->key_size);
+       key = malloc(bpf_map__key_size(map));
        if (key == NULL)
                goto out;
 
-       value = bpf_map_def__alloc_value(def);
+       value = bpf_map__alloc_value(map);
        if (value == NULL)
                goto out_free_key;
 
index 42ffc24..945f92d 100644 (file)
@@ -21,7 +21,7 @@ endif
 
 BPF_GCC                ?= $(shell command -v bpf-gcc;)
 SAN_CFLAGS     ?=
-CFLAGS += -g -O0 -rdynamic -Wall $(GENFLAGS) $(SAN_CFLAGS)             \
+CFLAGS += -g -O0 -rdynamic -Wall -Werror $(GENFLAGS) $(SAN_CFLAGS)     \
          -I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR)          \
          -I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT)
 LDFLAGS += $(SAN_CFLAGS)
@@ -292,7 +292,7 @@ IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - </dev/null | \
 MENDIAN=$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian)
 
 CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG))
-BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN)                  \
+BPF_CFLAGS = -g -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN)          \
             -I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR)                   \
             -I$(abspath $(OUTPUT)/../usr/include)
 
index df3b292..bdbacf5 100644 (file)
@@ -109,26 +109,31 @@ static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = {
        .write = bpf_testmod_test_write,
 };
 
-BTF_SET_START(bpf_testmod_kfunc_ids)
+BTF_SET_START(bpf_testmod_check_kfunc_ids)
 BTF_ID(func, bpf_testmod_test_mod_kfunc)
-BTF_SET_END(bpf_testmod_kfunc_ids)
+BTF_SET_END(bpf_testmod_check_kfunc_ids)
 
-static DEFINE_KFUNC_BTF_ID_SET(&bpf_testmod_kfunc_ids, bpf_testmod_kfunc_btf_set);
+static const struct btf_kfunc_id_set bpf_testmod_kfunc_set = {
+       .owner     = THIS_MODULE,
+       .check_set = &bpf_testmod_check_kfunc_ids,
+};
+
+extern int bpf_fentry_test1(int a);
 
 static int bpf_testmod_init(void)
 {
        int ret;
 
-       ret = sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
-       if (ret)
+       ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_testmod_kfunc_set);
+       if (ret < 0)
                return ret;
-       register_kfunc_btf_id_set(&prog_test_kfunc_list, &bpf_testmod_kfunc_btf_set);
-       return 0;
+       if (bpf_fentry_test1(0) < 0)
+               return -EINVAL;
+       return sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
 }
 
 static void bpf_testmod_exit(void)
 {
-       unregister_kfunc_btf_id_set(&prog_test_kfunc_list, &bpf_testmod_kfunc_btf_set);
        return sysfs_remove_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
 }
 
index f628713..763db63 100644 (file)
@@ -48,3 +48,8 @@ CONFIG_IMA_READ_POLICY=y
 CONFIG_BLK_DEV_LOOP=y
 CONFIG_FUNCTION_TRACER=y
 CONFIG_DYNAMIC_FTRACE=y
+CONFIG_NETFILTER=y
+CONFIG_NF_DEFRAG_IPV4=y
+CONFIG_NF_DEFRAG_IPV6=y
+CONFIG_NF_CONNTRACK=y
+CONFIG_USERFAULTFD=y
index d0f06e4..eac71fb 100644 (file)
@@ -1,13 +1,24 @@
 // SPDX-License-Identifier: GPL-2.0
-#include <test_progs.h>
-#include "bind_perm.skel.h"
-
+#define _GNU_SOURCE
+#include <sched.h>
+#include <stdlib.h>
 #include <sys/types.h>
 #include <sys/socket.h>
 #include <sys/capability.h>
 
+#include "test_progs.h"
+#include "bind_perm.skel.h"
+
 static int duration;
 
+static int create_netns(void)
+{
+       if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
+               return -1;
+
+       return 0;
+}
+
 void try_bind(int family, int port, int expected_errno)
 {
        struct sockaddr_storage addr = {};
@@ -75,6 +86,9 @@ void test_bind_perm(void)
        struct bind_perm *skel;
        int cgroup_fd;
 
+       if (create_netns())
+               return;
+
        cgroup_fd = test__join_cgroup("/bind_perm");
        if (CHECK(cgroup_fd < 0, "cg-join", "errno %d", errno))
                return;
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt_unix.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt_unix.c
new file mode 100644 (file)
index 0000000..ee725d4
--- /dev/null
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <test_progs.h>
+#include "bpf_iter_setsockopt_unix.skel.h"
+
+#define NR_CASES 5
+
+static int create_unix_socket(struct bpf_iter_setsockopt_unix *skel)
+{
+       struct sockaddr_un addr = {
+               .sun_family = AF_UNIX,
+               .sun_path = "",
+       };
+       socklen_t len;
+       int fd, err;
+
+       fd = socket(AF_UNIX, SOCK_STREAM, 0);
+       if (!ASSERT_NEQ(fd, -1, "socket"))
+               return -1;
+
+       len = offsetof(struct sockaddr_un, sun_path);
+       err = bind(fd, (struct sockaddr *)&addr, len);
+       if (!ASSERT_OK(err, "bind"))
+               return -1;
+
+       len = sizeof(addr);
+       err = getsockname(fd, (struct sockaddr *)&addr, &len);
+       if (!ASSERT_OK(err, "getsockname"))
+               return -1;
+
+       memcpy(&skel->bss->sun_path, &addr.sun_path,
+              len - offsetof(struct sockaddr_un, sun_path));
+
+       return fd;
+}
+
+static void test_sndbuf(struct bpf_iter_setsockopt_unix *skel, int fd)
+{
+       socklen_t optlen;
+       int i, err;
+
+       for (i = 0; i < NR_CASES; i++) {
+               if (!ASSERT_NEQ(skel->data->sndbuf_getsockopt[i], -1,
+                               "bpf_(get|set)sockopt"))
+                       return;
+
+               err = setsockopt(fd, SOL_SOCKET, SO_SNDBUF,
+                                &(skel->data->sndbuf_setsockopt[i]),
+                                sizeof(skel->data->sndbuf_setsockopt[i]));
+               if (!ASSERT_OK(err, "setsockopt"))
+                       return;
+
+               optlen = sizeof(skel->bss->sndbuf_getsockopt_expected[i]);
+               err = getsockopt(fd, SOL_SOCKET, SO_SNDBUF,
+                                &(skel->bss->sndbuf_getsockopt_expected[i]),
+                                &optlen);
+               if (!ASSERT_OK(err, "getsockopt"))
+                       return;
+
+               if (!ASSERT_EQ(skel->data->sndbuf_getsockopt[i],
+                              skel->bss->sndbuf_getsockopt_expected[i],
+                              "bpf_(get|set)sockopt"))
+                       return;
+       }
+}
+
+void test_bpf_iter_setsockopt_unix(void)
+{
+       struct bpf_iter_setsockopt_unix *skel;
+       int err, unix_fd, iter_fd;
+       char buf;
+
+       skel = bpf_iter_setsockopt_unix__open_and_load();
+       if (!ASSERT_OK_PTR(skel, "open_and_load"))
+               return;
+
+       unix_fd = create_unix_socket(skel);
+       if (!ASSERT_NEQ(unix_fd, -1, "create_unix_server"))
+               goto destroy;
+
+       skel->links.change_sndbuf = bpf_program__attach_iter(skel->progs.change_sndbuf, NULL);
+       if (!ASSERT_OK_PTR(skel->links.change_sndbuf, "bpf_program__attach_iter"))
+               goto destroy;
+
+       iter_fd = bpf_iter_create(bpf_link__fd(skel->links.change_sndbuf));
+       if (!ASSERT_GE(iter_fd, 0, "bpf_iter_create"))
+               goto destroy;
+
+       while ((err = read(iter_fd, &buf, sizeof(buf))) == -1 &&
+              errno == EAGAIN)
+               ;
+       if (!ASSERT_OK(err, "read iter error"))
+               goto destroy;
+
+       test_sndbuf(skel, unix_fd);
+destroy:
+       bpf_iter_setsockopt_unix__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c
new file mode 100644 (file)
index 0000000..d43f548
--- /dev/null
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <unistd.h>
+#include <pthread.h>
+#include <sys/mman.h>
+#include <stdatomic.h>
+#include <test_progs.h>
+#include <sys/syscall.h>
+#include <linux/module.h>
+#include <linux/userfaultfd.h>
+
+#include "ksym_race.skel.h"
+#include "bpf_mod_race.skel.h"
+#include "kfunc_call_race.skel.h"
+
+/* This test crafts a race between btf_try_get_module and do_init_module, and
+ * checks whether btf_try_get_module handles the invocation for a well-formed
+ * but uninitialized module correctly. Unless the module has completed its
+ * initcalls, the verifier should fail the program load and return ENXIO.
+ *
+ * userfaultfd is used to trigger a fault in an fmod_ret program, and make it
+ * sleep, then the BPF program is loaded and the return value from verifier is
+ * inspected. After this, the userfaultfd is closed so that the module loading
+ * thread makes forward progress, and fmod_ret injects an error so that the
+ * module load fails and it is freed.
+ *
+ * If the verifier succeeded in loading the supplied program, it will end up
+ * taking reference to freed module, and trigger a crash when the program fd
+ * is closed later. This is true for both kfuncs and ksyms. In both cases,
+ * the crash is triggered inside bpf_prog_free_deferred, when module reference
+ * is finally released.
+ */
+
+struct test_config {
+       const char *str_open;
+       void *(*bpf_open_and_load)();
+       void (*bpf_destroy)(void *);
+};
+
+enum test_state {
+       _TS_INVALID,
+       TS_MODULE_LOAD,
+       TS_MODULE_LOAD_FAIL,
+};
+
+static _Atomic enum test_state state = _TS_INVALID;
+
+static int sys_finit_module(int fd, const char *param_values, int flags)
+{
+       return syscall(__NR_finit_module, fd, param_values, flags);
+}
+
+static int sys_delete_module(const char *name, unsigned int flags)
+{
+       return syscall(__NR_delete_module, name, flags);
+}
+
+static int load_module(const char *mod)
+{
+       int ret, fd;
+
+       fd = open("bpf_testmod.ko", O_RDONLY);
+       if (fd < 0)
+               return fd;
+
+       ret = sys_finit_module(fd, "", 0);
+       close(fd);
+       if (ret < 0)
+               return ret;
+       return 0;
+}
+
+static void *load_module_thread(void *p)
+{
+
+       if (!ASSERT_NEQ(load_module("bpf_testmod.ko"), 0, "load_module_thread must fail"))
+               atomic_store(&state, TS_MODULE_LOAD);
+       else
+               atomic_store(&state, TS_MODULE_LOAD_FAIL);
+       return p;
+}
+
+static int sys_userfaultfd(int flags)
+{
+       return syscall(__NR_userfaultfd, flags);
+}
+
+static int test_setup_uffd(void *fault_addr)
+{
+       struct uffdio_register uffd_register = {};
+       struct uffdio_api uffd_api = {};
+       int uffd;
+
+       uffd = sys_userfaultfd(O_CLOEXEC);
+       if (uffd < 0)
+               return -errno;
+
+       uffd_api.api = UFFD_API;
+       uffd_api.features = 0;
+       if (ioctl(uffd, UFFDIO_API, &uffd_api)) {
+               close(uffd);
+               return -1;
+       }
+
+       uffd_register.range.start = (unsigned long)fault_addr;
+       uffd_register.range.len = 4096;
+       uffd_register.mode = UFFDIO_REGISTER_MODE_MISSING;
+       if (ioctl(uffd, UFFDIO_REGISTER, &uffd_register)) {
+               close(uffd);
+               return -1;
+       }
+       return uffd;
+}
+
+static void test_bpf_mod_race_config(const struct test_config *config)
+{
+       void *fault_addr, *skel_fail;
+       struct bpf_mod_race *skel;
+       struct uffd_msg uffd_msg;
+       pthread_t load_mod_thrd;
+       _Atomic int *blockingp;
+       int uffd, ret;
+
+       fault_addr = mmap(0, 4096, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+       if (!ASSERT_NEQ(fault_addr, MAP_FAILED, "mmap for uffd registration"))
+               return;
+
+       if (!ASSERT_OK(sys_delete_module("bpf_testmod", 0), "unload bpf_testmod"))
+               goto end_mmap;
+
+       skel = bpf_mod_race__open();
+       if (!ASSERT_OK_PTR(skel, "bpf_mod_kfunc_race__open"))
+               goto end_module;
+
+       skel->rodata->bpf_mod_race_config.tgid = getpid();
+       skel->rodata->bpf_mod_race_config.inject_error = -4242;
+       skel->rodata->bpf_mod_race_config.fault_addr = fault_addr;
+       if (!ASSERT_OK(bpf_mod_race__load(skel), "bpf_mod___load"))
+               goto end_destroy;
+       blockingp = (_Atomic int *)&skel->bss->bpf_blocking;
+
+       if (!ASSERT_OK(bpf_mod_race__attach(skel), "bpf_mod_kfunc_race__attach"))
+               goto end_destroy;
+
+       uffd = test_setup_uffd(fault_addr);
+       if (!ASSERT_GE(uffd, 0, "userfaultfd open + register address"))
+               goto end_destroy;
+
+       if (!ASSERT_OK(pthread_create(&load_mod_thrd, NULL, load_module_thread, NULL),
+                      "load module thread"))
+               goto end_uffd;
+
+       /* Now, we either fail loading module, or block in bpf prog, spin to find out */
+       while (!atomic_load(&state) && !atomic_load(blockingp))
+               ;
+       if (!ASSERT_EQ(state, _TS_INVALID, "module load should block"))
+               goto end_join;
+       if (!ASSERT_EQ(*blockingp, 1, "module load blocked")) {
+               pthread_kill(load_mod_thrd, SIGKILL);
+               goto end_uffd;
+       }
+
+       /* We might have set bpf_blocking to 1, but may have not blocked in
+        * bpf_copy_from_user. Read userfaultfd descriptor to verify that.
+        */
+       if (!ASSERT_EQ(read(uffd, &uffd_msg, sizeof(uffd_msg)), sizeof(uffd_msg),
+                      "read uffd block event"))
+               goto end_join;
+       if (!ASSERT_EQ(uffd_msg.event, UFFD_EVENT_PAGEFAULT, "read uffd event is pagefault"))
+               goto end_join;
+
+       /* We know that load_mod_thrd is blocked in the fmod_ret program, the
+        * module state is still MODULE_STATE_COMING because mod->init hasn't
+        * returned. This is the time we try to load a program calling kfunc and
+        * check if we get ENXIO from verifier.
+        */
+       skel_fail = config->bpf_open_and_load();
+       ret = errno;
+       if (!ASSERT_EQ(skel_fail, NULL, config->str_open)) {
+               /* Close uffd to unblock load_mod_thrd */
+               close(uffd);
+               uffd = -1;
+               while (atomic_load(blockingp) != 2)
+                       ;
+               ASSERT_OK(kern_sync_rcu(), "kern_sync_rcu");
+               config->bpf_destroy(skel_fail);
+               goto end_join;
+
+       }
+       ASSERT_EQ(ret, ENXIO, "verifier returns ENXIO");
+       ASSERT_EQ(skel->data->res_try_get_module, false, "btf_try_get_module == false");
+
+       close(uffd);
+       uffd = -1;
+end_join:
+       pthread_join(load_mod_thrd, NULL);
+       if (uffd < 0)
+               ASSERT_EQ(atomic_load(&state), TS_MODULE_LOAD_FAIL, "load_mod_thrd success");
+end_uffd:
+       if (uffd >= 0)
+               close(uffd);
+end_destroy:
+       bpf_mod_race__destroy(skel);
+       ASSERT_OK(kern_sync_rcu(), "kern_sync_rcu");
+end_module:
+       sys_delete_module("bpf_testmod", 0);
+       ASSERT_OK(load_module("bpf_testmod.ko"), "restore bpf_testmod");
+end_mmap:
+       munmap(fault_addr, 4096);
+       atomic_store(&state, _TS_INVALID);
+}
+
+static const struct test_config ksym_config = {
+       .str_open = "ksym_race__open_and_load",
+       .bpf_open_and_load = (void *)ksym_race__open_and_load,
+       .bpf_destroy = (void *)ksym_race__destroy,
+};
+
+static const struct test_config kfunc_config = {
+       .str_open = "kfunc_call_race__open_and_load",
+       .bpf_open_and_load = (void *)kfunc_call_race__open_and_load,
+       .bpf_destroy = (void *)kfunc_call_race__destroy,
+};
+
+void serial_test_bpf_mod_race(void)
+{
+       if (test__start_subtest("ksym (used_btfs UAF)"))
+               test_bpf_mod_race_config(&ksym_config);
+       if (test__start_subtest("kfunc (kfunc_btf_tab UAF)"))
+               test_bpf_mod_race_config(&kfunc_config);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
new file mode 100644 (file)
index 0000000..e3166a8
--- /dev/null
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "test_bpf_nf.skel.h"
+
+enum {
+       TEST_XDP,
+       TEST_TC_BPF,
+};
+
+void test_bpf_nf_ct(int mode)
+{
+       struct test_bpf_nf *skel;
+       int prog_fd, err, retval;
+
+       skel = test_bpf_nf__open_and_load();
+       if (!ASSERT_OK_PTR(skel, "test_bpf_nf__open_and_load"))
+               return;
+
+       if (mode == TEST_XDP)
+               prog_fd = bpf_program__fd(skel->progs.nf_xdp_ct_test);
+       else
+               prog_fd = bpf_program__fd(skel->progs.nf_skb_ct_test);
+
+       err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), NULL, NULL,
+                               (__u32 *)&retval, NULL);
+       if (!ASSERT_OK(err, "bpf_prog_test_run"))
+               goto end;
+
+       ASSERT_EQ(skel->bss->test_einval_bpf_tuple, -EINVAL, "Test EINVAL for NULL bpf_tuple");
+       ASSERT_EQ(skel->bss->test_einval_reserved, -EINVAL, "Test EINVAL for reserved not set to 0");
+       ASSERT_EQ(skel->bss->test_einval_netns_id, -EINVAL, "Test EINVAL for netns_id < -1");
+       ASSERT_EQ(skel->bss->test_einval_len_opts, -EINVAL, "Test EINVAL for len__opts != NF_BPF_CT_OPTS_SZ");
+       ASSERT_EQ(skel->bss->test_eproto_l4proto, -EPROTO, "Test EPROTO for l4proto != TCP or UDP");
+       ASSERT_EQ(skel->bss->test_enonet_netns_id, -ENONET, "Test ENONET for bad but valid netns_id");
+       ASSERT_EQ(skel->bss->test_enoent_lookup, -ENOENT, "Test ENOENT for failed lookup");
+       ASSERT_EQ(skel->bss->test_eafnosupport, -EAFNOSUPPORT, "Test EAFNOSUPPORT for invalid len__tuple");
+end:
+       test_bpf_nf__destroy(skel);
+}
+
+void test_bpf_nf(void)
+{
+       if (test__start_subtest("xdp-ct"))
+               test_bpf_nf_ct(TEST_XDP);
+       if (test__start_subtest("tc-bpf-ct"))
+               test_bpf_nf_ct(TEST_TC_BPF);
+}
index 8ba53ac..14f9b61 100644 (file)
@@ -4560,6 +4560,8 @@ static void do_test_file(unsigned int test_num)
        has_btf_ext = btf_ext != NULL;
        btf_ext__free(btf_ext);
 
+       /* temporary disable LIBBPF_STRICT_MAP_DEFINITIONS to test legacy maps */
+       libbpf_set_strict_mode((__LIBBPF_STRICT_LAST - 1) & ~LIBBPF_STRICT_MAP_DEFINITIONS);
        obj = bpf_object__open(test->file);
        err = libbpf_get_error(obj);
        if (CHECK(err, "obj: %d", err))
@@ -4684,6 +4686,8 @@ skip:
        fprintf(stderr, "OK");
 
 done:
+       libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
        btf__free(btf);
        free(func_info);
        bpf_object__close(obj);
index d3e8f72..38b3c47 100644 (file)
@@ -194,14 +194,14 @@ void serial_test_cgroup_attach_multi(void)
 
        attach_opts.flags = BPF_F_ALLOW_OVERRIDE | BPF_F_REPLACE;
        attach_opts.replace_prog_fd = allow_prog[0];
-       if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1,
+       if (CHECK(!bpf_prog_attach_opts(allow_prog[6], cg1,
                                         BPF_CGROUP_INET_EGRESS, &attach_opts),
                  "fail_prog_replace_override", "unexpected success\n"))
                goto err;
        CHECK_FAIL(errno != EINVAL);
 
        attach_opts.flags = BPF_F_REPLACE;
-       if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1,
+       if (CHECK(!bpf_prog_attach_opts(allow_prog[6], cg1,
                                         BPF_CGROUP_INET_EGRESS, &attach_opts),
                  "fail_prog_replace_no_multi", "unexpected success\n"))
                goto err;
@@ -209,7 +209,7 @@ void serial_test_cgroup_attach_multi(void)
 
        attach_opts.flags = BPF_F_ALLOW_MULTI | BPF_F_REPLACE;
        attach_opts.replace_prog_fd = -1;
-       if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1,
+       if (CHECK(!bpf_prog_attach_opts(allow_prog[6], cg1,
                                         BPF_CGROUP_INET_EGRESS, &attach_opts),
                  "fail_prog_replace_bad_fd", "unexpected success\n"))
                goto err;
@@ -217,7 +217,7 @@ void serial_test_cgroup_attach_multi(void)
 
        /* replacing a program that is not attached to cgroup should fail  */
        attach_opts.replace_prog_fd = allow_prog[3];
-       if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1,
+       if (CHECK(!bpf_prog_attach_opts(allow_prog[6], cg1,
                                         BPF_CGROUP_INET_EGRESS, &attach_opts),
                  "fail_prog_replace_no_ent", "unexpected success\n"))
                goto err;
@@ -225,14 +225,14 @@ void serial_test_cgroup_attach_multi(void)
 
        /* replace 1st from the top program */
        attach_opts.replace_prog_fd = allow_prog[0];
-       if (CHECK(bpf_prog_attach_xattr(allow_prog[6], cg1,
+       if (CHECK(bpf_prog_attach_opts(allow_prog[6], cg1,
                                        BPF_CGROUP_INET_EGRESS, &attach_opts),
                  "prog_replace", "errno=%d\n", errno))
                goto err;
 
        /* replace program with itself */
        attach_opts.replace_prog_fd = allow_prog[6];
-       if (CHECK(bpf_prog_attach_xattr(allow_prog[6], cg1,
+       if (CHECK(bpf_prog_attach_opts(allow_prog[6], cg1,
                                        BPF_CGROUP_INET_EGRESS, &attach_opts),
                  "prog_replace", "errno=%d\n", errno))
                goto err;
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c b/tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c
new file mode 100644 (file)
index 0000000..0b47c3c
--- /dev/null
@@ -0,0 +1,481 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2021 Google LLC.
+ */
+
+#include <test_progs.h>
+#include <cgroup_helpers.h>
+#include <network_helpers.h>
+
+#include "cgroup_getset_retval_setsockopt.skel.h"
+#include "cgroup_getset_retval_getsockopt.skel.h"
+
+#define SOL_CUSTOM     0xdeadbeef
+
+static int zero;
+
+static void test_setsockopt_set(int cgroup_fd, int sock_fd)
+{
+       struct cgroup_getset_retval_setsockopt *obj;
+       struct bpf_link *link_set_eunatch = NULL;
+
+       obj = cgroup_getset_retval_setsockopt__open_and_load();
+       if (!ASSERT_OK_PTR(obj, "skel-load"))
+               return;
+
+       /* Attach setsockopt that sets EUNATCH, assert that
+        * we actually get that error when we run setsockopt()
+        */
+       link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch,
+                                                     cgroup_fd);
+       if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch"))
+               goto close_bpf_object;
+
+       if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+                                  &zero, sizeof(int)), "setsockopt"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(errno, EUNATCH, "setsockopt-errno"))
+               goto close_bpf_object;
+
+       if (!ASSERT_EQ(obj->bss->invocations, 1, "invocations"))
+               goto close_bpf_object;
+       if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+               goto close_bpf_object;
+
+close_bpf_object:
+       bpf_link__destroy(link_set_eunatch);
+
+       cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_set_and_get(int cgroup_fd, int sock_fd)
+{
+       struct cgroup_getset_retval_setsockopt *obj;
+       struct bpf_link *link_set_eunatch = NULL, *link_get_retval = NULL;
+
+       obj = cgroup_getset_retval_setsockopt__open_and_load();
+       if (!ASSERT_OK_PTR(obj, "skel-load"))
+               return;
+
+       /* Attach setsockopt that sets EUNATCH, and one that gets the
+        * previously set errno. Assert that we get the same errno back.
+        */
+       link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch,
+                                                     cgroup_fd);
+       if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch"))
+               goto close_bpf_object;
+       link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+                                                    cgroup_fd);
+       if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+               goto close_bpf_object;
+
+       if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+                                  &zero, sizeof(int)), "setsockopt"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(errno, EUNATCH, "setsockopt-errno"))
+               goto close_bpf_object;
+
+       if (!ASSERT_EQ(obj->bss->invocations, 2, "invocations"))
+               goto close_bpf_object;
+       if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(obj->bss->retval_value, -EUNATCH, "retval_value"))
+               goto close_bpf_object;
+
+close_bpf_object:
+       bpf_link__destroy(link_set_eunatch);
+       bpf_link__destroy(link_get_retval);
+
+       cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_default_zero(int cgroup_fd, int sock_fd)
+{
+       struct cgroup_getset_retval_setsockopt *obj;
+       struct bpf_link *link_get_retval = NULL;
+
+       obj = cgroup_getset_retval_setsockopt__open_and_load();
+       if (!ASSERT_OK_PTR(obj, "skel-load"))
+               return;
+
+       /* Attach setsockopt that gets the previously set errno.
+        * Assert that, without anything setting one, we get 0.
+        */
+       link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+                                                    cgroup_fd);
+       if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+               goto close_bpf_object;
+
+       if (!ASSERT_OK(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+                                 &zero, sizeof(int)), "setsockopt"))
+               goto close_bpf_object;
+
+       if (!ASSERT_EQ(obj->bss->invocations, 1, "invocations"))
+               goto close_bpf_object;
+       if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(obj->bss->retval_value, 0, "retval_value"))
+               goto close_bpf_object;
+
+close_bpf_object:
+       bpf_link__destroy(link_get_retval);
+
+       cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_default_zero_and_set(int cgroup_fd, int sock_fd)
+{
+       struct cgroup_getset_retval_setsockopt *obj;
+       struct bpf_link *link_get_retval = NULL, *link_set_eunatch = NULL;
+
+       obj = cgroup_getset_retval_setsockopt__open_and_load();
+       if (!ASSERT_OK_PTR(obj, "skel-load"))
+               return;
+
+       /* Attach setsockopt that gets the previously set errno, and then
+        * one that sets the errno to EUNATCH. Assert that the get does not
+        * see EUNATCH set later, and does not prevent EUNATCH from being set.
+        */
+       link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+                                                    cgroup_fd);
+       if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+               goto close_bpf_object;
+       link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch,
+                                                     cgroup_fd);
+       if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch"))
+               goto close_bpf_object;
+
+       if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+                                  &zero, sizeof(int)), "setsockopt"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(errno, EUNATCH, "setsockopt-errno"))
+               goto close_bpf_object;
+
+       if (!ASSERT_EQ(obj->bss->invocations, 2, "invocations"))
+               goto close_bpf_object;
+       if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(obj->bss->retval_value, 0, "retval_value"))
+               goto close_bpf_object;
+
+close_bpf_object:
+       bpf_link__destroy(link_get_retval);
+       bpf_link__destroy(link_set_eunatch);
+
+       cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_override(int cgroup_fd, int sock_fd)
+{
+       struct cgroup_getset_retval_setsockopt *obj;
+       struct bpf_link *link_set_eunatch = NULL, *link_set_eisconn = NULL;
+       struct bpf_link *link_get_retval = NULL;
+
+       obj = cgroup_getset_retval_setsockopt__open_and_load();
+       if (!ASSERT_OK_PTR(obj, "skel-load"))
+               return;
+
+       /* Attach setsockopt that sets EUNATCH, then one that sets EISCONN,
+        * and then one that gets the exported errno. Assert both the syscall
+        * and the helper sees the last set errno.
+        */
+       link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch,
+                                                     cgroup_fd);
+       if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch"))
+               goto close_bpf_object;
+       link_set_eisconn = bpf_program__attach_cgroup(obj->progs.set_eisconn,
+                                                     cgroup_fd);
+       if (!ASSERT_OK_PTR(link_set_eisconn, "cg-attach-set_eisconn"))
+               goto close_bpf_object;
+       link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+                                                    cgroup_fd);
+       if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+               goto close_bpf_object;
+
+       if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+                                  &zero, sizeof(int)), "setsockopt"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(errno, EISCONN, "setsockopt-errno"))
+               goto close_bpf_object;
+
+       if (!ASSERT_EQ(obj->bss->invocations, 3, "invocations"))
+               goto close_bpf_object;
+       if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(obj->bss->retval_value, -EISCONN, "retval_value"))
+               goto close_bpf_object;
+
+close_bpf_object:
+       bpf_link__destroy(link_set_eunatch);
+       bpf_link__destroy(link_set_eisconn);
+       bpf_link__destroy(link_get_retval);
+
+       cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_legacy_eperm(int cgroup_fd, int sock_fd)
+{
+       struct cgroup_getset_retval_setsockopt *obj;
+       struct bpf_link *link_legacy_eperm = NULL, *link_get_retval = NULL;
+
+       obj = cgroup_getset_retval_setsockopt__open_and_load();
+       if (!ASSERT_OK_PTR(obj, "skel-load"))
+               return;
+
+       /* Attach setsockopt that return a reject without setting errno
+        * (legacy reject), and one that gets the errno. Assert that for
+        * backward compatibility the syscall result in EPERM, and this
+        * is also visible to the helper.
+        */
+       link_legacy_eperm = bpf_program__attach_cgroup(obj->progs.legacy_eperm,
+                                                      cgroup_fd);
+       if (!ASSERT_OK_PTR(link_legacy_eperm, "cg-attach-legacy_eperm"))
+               goto close_bpf_object;
+       link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+                                                    cgroup_fd);
+       if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+               goto close_bpf_object;
+
+       if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+                                  &zero, sizeof(int)), "setsockopt"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(errno, EPERM, "setsockopt-errno"))
+               goto close_bpf_object;
+
+       if (!ASSERT_EQ(obj->bss->invocations, 2, "invocations"))
+               goto close_bpf_object;
+       if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(obj->bss->retval_value, -EPERM, "retval_value"))
+               goto close_bpf_object;
+
+close_bpf_object:
+       bpf_link__destroy(link_legacy_eperm);
+       bpf_link__destroy(link_get_retval);
+
+       cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_legacy_no_override(int cgroup_fd, int sock_fd)
+{
+       struct cgroup_getset_retval_setsockopt *obj;
+       struct bpf_link *link_set_eunatch = NULL, *link_legacy_eperm = NULL;
+       struct bpf_link *link_get_retval = NULL;
+
+       obj = cgroup_getset_retval_setsockopt__open_and_load();
+       if (!ASSERT_OK_PTR(obj, "skel-load"))
+               return;
+
+       /* Attach setsockopt that sets EUNATCH, then one that return a reject
+        * without setting errno, and then one that gets the exported errno.
+        * Assert both the syscall and the helper's errno are unaffected by
+        * the second prog (i.e. legacy rejects does not override the errno
+        * to EPERM).
+        */
+       link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch,
+                                                     cgroup_fd);
+       if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch"))
+               goto close_bpf_object;
+       link_legacy_eperm = bpf_program__attach_cgroup(obj->progs.legacy_eperm,
+                                                      cgroup_fd);
+       if (!ASSERT_OK_PTR(link_legacy_eperm, "cg-attach-legacy_eperm"))
+               goto close_bpf_object;
+       link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+                                                    cgroup_fd);
+       if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+               goto close_bpf_object;
+
+       if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+                                  &zero, sizeof(int)), "setsockopt"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(errno, EUNATCH, "setsockopt-errno"))
+               goto close_bpf_object;
+
+       if (!ASSERT_EQ(obj->bss->invocations, 3, "invocations"))
+               goto close_bpf_object;
+       if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(obj->bss->retval_value, -EUNATCH, "retval_value"))
+               goto close_bpf_object;
+
+close_bpf_object:
+       bpf_link__destroy(link_set_eunatch);
+       bpf_link__destroy(link_legacy_eperm);
+       bpf_link__destroy(link_get_retval);
+
+       cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_getsockopt_get(int cgroup_fd, int sock_fd)
+{
+       struct cgroup_getset_retval_getsockopt *obj;
+       struct bpf_link *link_get_retval = NULL;
+       int buf;
+       socklen_t optlen = sizeof(buf);
+
+       obj = cgroup_getset_retval_getsockopt__open_and_load();
+       if (!ASSERT_OK_PTR(obj, "skel-load"))
+               return;
+
+       /* Attach getsockopt that gets previously set errno. Assert that the
+        * error from kernel is in both ctx_retval_value and retval_value.
+        */
+       link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+                                                    cgroup_fd);
+       if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+               goto close_bpf_object;
+
+       if (!ASSERT_ERR(getsockopt(sock_fd, SOL_CUSTOM, 0,
+                                  &buf, &optlen), "getsockopt"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(errno, EOPNOTSUPP, "getsockopt-errno"))
+               goto close_bpf_object;
+
+       if (!ASSERT_EQ(obj->bss->invocations, 1, "invocations"))
+               goto close_bpf_object;
+       if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(obj->bss->retval_value, -EOPNOTSUPP, "retval_value"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(obj->bss->ctx_retval_value, -EOPNOTSUPP, "ctx_retval_value"))
+               goto close_bpf_object;
+
+close_bpf_object:
+       bpf_link__destroy(link_get_retval);
+
+       cgroup_getset_retval_getsockopt__destroy(obj);
+}
+
+static void test_getsockopt_override(int cgroup_fd, int sock_fd)
+{
+       struct cgroup_getset_retval_getsockopt *obj;
+       struct bpf_link *link_set_eisconn = NULL;
+       int buf;
+       socklen_t optlen = sizeof(buf);
+
+       obj = cgroup_getset_retval_getsockopt__open_and_load();
+       if (!ASSERT_OK_PTR(obj, "skel-load"))
+               return;
+
+       /* Attach getsockopt that sets retval to -EISCONN. Assert that this
+        * overrides the value from kernel.
+        */
+       link_set_eisconn = bpf_program__attach_cgroup(obj->progs.set_eisconn,
+                                                     cgroup_fd);
+       if (!ASSERT_OK_PTR(link_set_eisconn, "cg-attach-set_eisconn"))
+               goto close_bpf_object;
+
+       if (!ASSERT_ERR(getsockopt(sock_fd, SOL_CUSTOM, 0,
+                                  &buf, &optlen), "getsockopt"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(errno, EISCONN, "getsockopt-errno"))
+               goto close_bpf_object;
+
+       if (!ASSERT_EQ(obj->bss->invocations, 1, "invocations"))
+               goto close_bpf_object;
+       if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+               goto close_bpf_object;
+
+close_bpf_object:
+       bpf_link__destroy(link_set_eisconn);
+
+       cgroup_getset_retval_getsockopt__destroy(obj);
+}
+
+static void test_getsockopt_retval_sync(int cgroup_fd, int sock_fd)
+{
+       struct cgroup_getset_retval_getsockopt *obj;
+       struct bpf_link *link_set_eisconn = NULL, *link_clear_retval = NULL;
+       struct bpf_link *link_get_retval = NULL;
+       int buf;
+       socklen_t optlen = sizeof(buf);
+
+       obj = cgroup_getset_retval_getsockopt__open_and_load();
+       if (!ASSERT_OK_PTR(obj, "skel-load"))
+               return;
+
+       /* Attach getsockopt that sets retval to -EISCONN, and one that clears
+        * ctx retval. Assert that the clearing ctx retval is synced to helper
+        * and clears any errors both from kernel and BPF..
+        */
+       link_set_eisconn = bpf_program__attach_cgroup(obj->progs.set_eisconn,
+                                                     cgroup_fd);
+       if (!ASSERT_OK_PTR(link_set_eisconn, "cg-attach-set_eisconn"))
+               goto close_bpf_object;
+       link_clear_retval = bpf_program__attach_cgroup(obj->progs.clear_retval,
+                                                      cgroup_fd);
+       if (!ASSERT_OK_PTR(link_clear_retval, "cg-attach-clear_retval"))
+               goto close_bpf_object;
+       link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+                                                    cgroup_fd);
+       if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+               goto close_bpf_object;
+
+       if (!ASSERT_OK(getsockopt(sock_fd, SOL_CUSTOM, 0,
+                                 &buf, &optlen), "getsockopt"))
+               goto close_bpf_object;
+
+       if (!ASSERT_EQ(obj->bss->invocations, 3, "invocations"))
+               goto close_bpf_object;
+       if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(obj->bss->retval_value, 0, "retval_value"))
+               goto close_bpf_object;
+       if (!ASSERT_EQ(obj->bss->ctx_retval_value, 0, "ctx_retval_value"))
+               goto close_bpf_object;
+
+close_bpf_object:
+       bpf_link__destroy(link_set_eisconn);
+       bpf_link__destroy(link_clear_retval);
+       bpf_link__destroy(link_get_retval);
+
+       cgroup_getset_retval_getsockopt__destroy(obj);
+}
+
+void test_cgroup_getset_retval(void)
+{
+       int cgroup_fd = -1;
+       int sock_fd = -1;
+
+       cgroup_fd = test__join_cgroup("/cgroup_getset_retval");
+       if (!ASSERT_GE(cgroup_fd, 0, "cg-create"))
+               goto close_fd;
+
+       sock_fd = start_server(AF_INET, SOCK_DGRAM, NULL, 0, 0);
+       if (!ASSERT_GE(sock_fd, 0, "start-server"))
+               goto close_fd;
+
+       if (test__start_subtest("setsockopt-set"))
+               test_setsockopt_set(cgroup_fd, sock_fd);
+
+       if (test__start_subtest("setsockopt-set_and_get"))
+               test_setsockopt_set_and_get(cgroup_fd, sock_fd);
+
+       if (test__start_subtest("setsockopt-default_zero"))
+               test_setsockopt_default_zero(cgroup_fd, sock_fd);
+
+       if (test__start_subtest("setsockopt-default_zero_and_set"))
+               test_setsockopt_default_zero_and_set(cgroup_fd, sock_fd);
+
+       if (test__start_subtest("setsockopt-override"))
+               test_setsockopt_override(cgroup_fd, sock_fd);
+
+       if (test__start_subtest("setsockopt-legacy_eperm"))
+               test_setsockopt_legacy_eperm(cgroup_fd, sock_fd);
+
+       if (test__start_subtest("setsockopt-legacy_no_override"))
+               test_setsockopt_legacy_no_override(cgroup_fd, sock_fd);
+
+       if (test__start_subtest("getsockopt-get"))
+               test_getsockopt_get(cgroup_fd, sock_fd);
+
+       if (test__start_subtest("getsockopt-override"))
+               test_getsockopt_override(cgroup_fd, sock_fd);
+
+       if (test__start_subtest("getsockopt-retval_sync"))
+               test_getsockopt_retval_sync(cgroup_fd, sock_fd);
+
+close_fd:
+       close(cgroup_fd);
+}
index ac54e3f..dfafd62 100644 (file)
@@ -457,7 +457,7 @@ static int init_prog_array(struct bpf_object *obj, struct bpf_map *prog_array)
        if (map_fd < 0)
                return -1;
 
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                snprintf(prog_name, sizeof(prog_name), "flow_dissector_%d", i);
 
                prog = bpf_object__find_program_by_name(obj, prog_name);
index 9da131b..917165e 100644 (file)
@@ -121,7 +121,7 @@ static void test_global_data_rdonly(struct bpf_object *obj, __u32 duration)
        if (CHECK_FAIL(map_fd < 0))
                return;
 
-       buff = malloc(bpf_map__def(map)->value_size);
+       buff = malloc(bpf_map__value_size(map));
        if (buff)
                err = bpf_map_update_elem(map_fd, &zero, buff, 0);
        free(buff);
index 1db86ea..57331c6 100644 (file)
@@ -20,7 +20,7 @@ void test_global_data_init(void)
        if (CHECK_FAIL(!map || !bpf_map__is_internal(map)))
                goto out;
 
-       sz = bpf_map__def(map)->value_size;
+       sz = bpf_map__value_size(map);
        newval = malloc(sz);
        if (CHECK_FAIL(!newval))
                goto out;
index 7d7445c..b39a4f0 100644 (file)
@@ -27,6 +27,12 @@ static void test_main(void)
        ASSERT_OK(err, "bpf_prog_test_run(test2)");
        ASSERT_EQ(retval, 3, "test2-retval");
 
+       prog_fd = skel->progs.kfunc_call_test_ref_btf_id.prog_fd;
+       err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+                               NULL, NULL, (__u32 *)&retval, NULL);
+       ASSERT_OK(err, "bpf_prog_test_run(test_ref_btf_id)");
+       ASSERT_EQ(retval, 0, "test_ref_btf_id-retval");
+
        kfunc_call_test_lskel__destroy(skel);
 }
 
index 85db0f4..b97a8f2 100644 (file)
@@ -8,6 +8,7 @@
 #include "test_sockmap_update.skel.h"
 #include "test_sockmap_invalid_update.skel.h"
 #include "test_sockmap_skb_verdict_attach.skel.h"
+#include "test_sockmap_progs_query.skel.h"
 #include "bpf_iter_sockmap.skel.h"
 
 #define TCP_REPAIR             19      /* TCP sock is under repair right now */
@@ -315,6 +316,63 @@ out:
        test_sockmap_skb_verdict_attach__destroy(skel);
 }
 
+static __u32 query_prog_id(int prog_fd)
+{
+       struct bpf_prog_info info = {};
+       __u32 info_len = sizeof(info);
+       int err;
+
+       err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+       if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd") ||
+           !ASSERT_EQ(info_len, sizeof(info), "bpf_obj_get_info_by_fd"))
+               return 0;
+
+       return info.id;
+}
+
+static void test_sockmap_progs_query(enum bpf_attach_type attach_type)
+{
+       struct test_sockmap_progs_query *skel;
+       int err, map_fd, verdict_fd;
+       __u32 attach_flags = 0;
+       __u32 prog_ids[3] = {};
+       __u32 prog_cnt = 3;
+
+       skel = test_sockmap_progs_query__open_and_load();
+       if (!ASSERT_OK_PTR(skel, "test_sockmap_progs_query__open_and_load"))
+               return;
+
+       map_fd = bpf_map__fd(skel->maps.sock_map);
+
+       if (attach_type == BPF_SK_MSG_VERDICT)
+               verdict_fd = bpf_program__fd(skel->progs.prog_skmsg_verdict);
+       else
+               verdict_fd = bpf_program__fd(skel->progs.prog_skb_verdict);
+
+       err = bpf_prog_query(map_fd, attach_type, 0 /* query flags */,
+                            &attach_flags, prog_ids, &prog_cnt);
+       ASSERT_OK(err, "bpf_prog_query failed");
+       ASSERT_EQ(attach_flags,  0, "wrong attach_flags on query");
+       ASSERT_EQ(prog_cnt, 0, "wrong program count on query");
+
+       err = bpf_prog_attach(verdict_fd, map_fd, attach_type, 0);
+       if (!ASSERT_OK(err, "bpf_prog_attach failed"))
+               goto out;
+
+       prog_cnt = 1;
+       err = bpf_prog_query(map_fd, attach_type, 0 /* query flags */,
+                            &attach_flags, prog_ids, &prog_cnt);
+       ASSERT_OK(err, "bpf_prog_query failed");
+       ASSERT_EQ(attach_flags, 0, "wrong attach_flags on query");
+       ASSERT_EQ(prog_cnt, 1, "wrong program count on query");
+       ASSERT_EQ(prog_ids[0], query_prog_id(verdict_fd),
+                 "wrong prog_ids on query");
+
+       bpf_prog_detach2(verdict_fd, map_fd, attach_type);
+out:
+       test_sockmap_progs_query__destroy(skel);
+}
+
 void test_sockmap_basic(void)
 {
        if (test__start_subtest("sockmap create_update_free"))
@@ -341,4 +399,12 @@ void test_sockmap_basic(void)
                test_sockmap_skb_verdict_attach(BPF_SK_SKB_STREAM_VERDICT,
                                                BPF_SK_SKB_VERDICT);
        }
+       if (test__start_subtest("sockmap msg_verdict progs query"))
+               test_sockmap_progs_query(BPF_SK_MSG_VERDICT);
+       if (test__start_subtest("sockmap stream_parser progs query"))
+               test_sockmap_progs_query(BPF_SK_SKB_STREAM_PARSER);
+       if (test__start_subtest("sockmap stream_verdict progs query"))
+               test_sockmap_progs_query(BPF_SK_SKB_STREAM_VERDICT);
+       if (test__start_subtest("sockmap skb_verdict progs query"))
+               test_sockmap_progs_query(BPF_SK_SKB_VERDICT);
 }
index 7e21bfa..2cf0c7a 100644 (file)
@@ -1413,14 +1413,12 @@ close_srv1:
 
 static void test_ops_cleanup(const struct bpf_map *map)
 {
-       const struct bpf_map_def *def;
        int err, mapfd;
        u32 key;
 
-       def = bpf_map__def(map);
        mapfd = bpf_map__fd(map);
 
-       for (key = 0; key < def->max_entries; key++) {
+       for (key = 0; key < bpf_map__max_entries(map); key++) {
                err = bpf_map_delete_elem(mapfd, &key);
                if (err && errno != EINVAL && errno != ENOENT)
                        FAIL_ERRNO("map_delete: expected EINVAL/ENOENT");
@@ -1443,13 +1441,13 @@ static const char *family_str(sa_family_t family)
 
 static const char *map_type_str(const struct bpf_map *map)
 {
-       const struct bpf_map_def *def;
+       int type;
 
-       def = bpf_map__def(map);
-       if (IS_ERR(def))
+       if (!map)
                return "invalid";
+       type = bpf_map__type(map);
 
-       switch (def->type) {
+       switch (type) {
        case BPF_MAP_TYPE_SOCKMAP:
                return "sockmap";
        case BPF_MAP_TYPE_SOCKHASH:
index 4b937e5..30a99d2 100644 (file)
@@ -173,11 +173,11 @@ static int getsetsockopt(void)
        }
 
        memset(&buf, 0, sizeof(buf));
-       buf.zc.address = 12345; /* rejected by BPF */
+       buf.zc.address = 12345; /* Not page aligned. Rejected by tcp_zerocopy_receive() */
        optlen = sizeof(buf.zc);
        errno = 0;
        err = getsockopt(fd, SOL_TCP, TCP_ZEROCOPY_RECEIVE, &buf, &optlen);
-       if (errno != EPERM) {
+       if (errno != EINVAL) {
                log_err("Unexpected getsockopt(TCP_ZEROCOPY_RECEIVE) err=%d errno=%d",
                        err, errno);
                goto err;
index 5dc0f42..796f231 100644 (file)
@@ -37,7 +37,7 @@ static void test_tailcall_1(void)
        if (CHECK_FAIL(map_fd < 0))
                goto out;
 
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
 
                prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -53,7 +53,7 @@ static void test_tailcall_1(void)
                        goto out;
        }
 
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
                                        &duration, &retval, NULL);
                CHECK(err || retval != i, "tailcall",
@@ -69,7 +69,7 @@ static void test_tailcall_1(void)
        CHECK(err || retval != 3, "tailcall", "err %d errno %d retval %d\n",
              err, errno, retval);
 
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
 
                prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -90,8 +90,8 @@ static void test_tailcall_1(void)
        CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n",
              err, errno, retval);
 
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
-               j = bpf_map__def(prog_array)->max_entries - 1 - i;
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
+               j = bpf_map__max_entries(prog_array) - 1 - i;
                snprintf(prog_name, sizeof(prog_name), "classifier_%d", j);
 
                prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -107,8 +107,8 @@ static void test_tailcall_1(void)
                        goto out;
        }
 
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
-               j = bpf_map__def(prog_array)->max_entries - 1 - i;
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
+               j = bpf_map__max_entries(prog_array) - 1 - i;
 
                err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
                                        &duration, &retval, NULL);
@@ -125,7 +125,7 @@ static void test_tailcall_1(void)
        CHECK(err || retval != 3, "tailcall", "err %d errno %d retval %d\n",
              err, errno, retval);
 
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                err = bpf_map_delete_elem(map_fd, &i);
                if (CHECK_FAIL(err >= 0 || errno != ENOENT))
                        goto out;
@@ -175,7 +175,7 @@ static void test_tailcall_2(void)
        if (CHECK_FAIL(map_fd < 0))
                goto out;
 
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
 
                prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -353,7 +353,7 @@ static void test_tailcall_4(void)
        if (CHECK_FAIL(map_fd < 0))
                return;
 
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
 
                prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -369,7 +369,7 @@ static void test_tailcall_4(void)
                        goto out;
        }
 
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                err = bpf_map_update_elem(data_fd, &zero, &i, BPF_ANY);
                if (CHECK_FAIL(err))
                        goto out;
@@ -380,7 +380,7 @@ static void test_tailcall_4(void)
                      "err %d errno %d retval %d\n", err, errno, retval);
        }
 
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                err = bpf_map_update_elem(data_fd, &zero, &i, BPF_ANY);
                if (CHECK_FAIL(err))
                        goto out;
@@ -441,7 +441,7 @@ static void test_tailcall_5(void)
        if (CHECK_FAIL(map_fd < 0))
                return;
 
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
 
                prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -457,7 +457,7 @@ static void test_tailcall_5(void)
                        goto out;
        }
 
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                err = bpf_map_update_elem(data_fd, &zero, &key[i], BPF_ANY);
                if (CHECK_FAIL(err))
                        goto out;
@@ -468,7 +468,7 @@ static void test_tailcall_5(void)
                      "err %d errno %d retval %d\n", err, errno, retval);
        }
 
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                err = bpf_map_update_elem(data_fd, &zero, &key[i], BPF_ANY);
                if (CHECK_FAIL(err))
                        goto out;
@@ -520,7 +520,7 @@ static void test_tailcall_bpf2bpf_1(void)
                goto out;
 
        /* nop -> jmp */
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
 
                prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -681,7 +681,7 @@ static void test_tailcall_bpf2bpf_3(void)
        if (CHECK_FAIL(map_fd < 0))
                goto out;
 
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
 
                prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -778,7 +778,7 @@ static void test_tailcall_bpf2bpf_4(bool noise)
        if (CHECK_FAIL(map_fd < 0))
                goto out;
 
-       for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+       for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
                snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
 
                prog = bpf_object__find_program_by_name(obj, prog_name);
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c
new file mode 100644 (file)
index 0000000..31c1886
--- /dev/null
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+
+void test_xdp_update_frags(void)
+{
+       const char *file = "./test_xdp_update_frags.o";
+       __u32 duration, retval, size;
+       struct bpf_program *prog;
+       struct bpf_object *obj;
+       int err, prog_fd;
+       __u32 *offset;
+       __u8 *buf;
+
+       obj = bpf_object__open(file);
+       if (libbpf_get_error(obj))
+               return;
+
+       prog = bpf_object__next_program(obj, NULL);
+       if (bpf_object__load(obj))
+               return;
+
+       prog_fd = bpf_program__fd(prog);
+
+       buf = malloc(128);
+       if (!ASSERT_OK_PTR(buf, "alloc buf 128b"))
+               goto out;
+
+       memset(buf, 0, 128);
+       offset = (__u32 *)buf;
+       *offset = 16;
+       buf[*offset] = 0xaa;            /* marker at offset 16 (head) */
+       buf[*offset + 15] = 0xaa;       /* marker at offset 31 (head) */
+
+       err = bpf_prog_test_run(prog_fd, 1, buf, 128,
+                               buf, &size, &retval, &duration);
+
+       /* test_xdp_update_frags: buf[16,31]: 0xaa -> 0xbb */
+       ASSERT_OK(err, "xdp_update_frag");
+       ASSERT_EQ(retval, XDP_PASS, "xdp_update_frag retval");
+       ASSERT_EQ(buf[16], 0xbb, "xdp_update_frag buf[16]");
+       ASSERT_EQ(buf[31], 0xbb, "xdp_update_frag buf[31]");
+
+       free(buf);
+
+       buf = malloc(9000);
+       if (!ASSERT_OK_PTR(buf, "alloc buf 9Kb"))
+               goto out;
+
+       memset(buf, 0, 9000);
+       offset = (__u32 *)buf;
+       *offset = 5000;
+       buf[*offset] = 0xaa;            /* marker at offset 5000 (frag0) */
+       buf[*offset + 15] = 0xaa;       /* marker at offset 5015 (frag0) */
+
+       err = bpf_prog_test_run(prog_fd, 1, buf, 9000,
+                               buf, &size, &retval, &duration);
+
+       /* test_xdp_update_frags: buf[5000,5015]: 0xaa -> 0xbb */
+       ASSERT_OK(err, "xdp_update_frag");
+       ASSERT_EQ(retval, XDP_PASS, "xdp_update_frag retval");
+       ASSERT_EQ(buf[5000], 0xbb, "xdp_update_frag buf[5000]");
+       ASSERT_EQ(buf[5015], 0xbb, "xdp_update_frag buf[5015]");
+
+       memset(buf, 0, 9000);
+       offset = (__u32 *)buf;
+       *offset = 3510;
+       buf[*offset] = 0xaa;            /* marker at offset 3510 (head) */
+       buf[*offset + 15] = 0xaa;       /* marker at offset 3525 (frag0) */
+
+       err = bpf_prog_test_run(prog_fd, 1, buf, 9000,
+                               buf, &size, &retval, &duration);
+
+       /* test_xdp_update_frags: buf[3510,3525]: 0xaa -> 0xbb */
+       ASSERT_OK(err, "xdp_update_frag");
+       ASSERT_EQ(retval, XDP_PASS, "xdp_update_frag retval");
+       ASSERT_EQ(buf[3510], 0xbb, "xdp_update_frag buf[3510]");
+       ASSERT_EQ(buf[3525], 0xbb, "xdp_update_frag buf[3525]");
+
+       memset(buf, 0, 9000);
+       offset = (__u32 *)buf;
+       *offset = 7606;
+       buf[*offset] = 0xaa;            /* marker at offset 7606 (frag0) */
+       buf[*offset + 15] = 0xaa;       /* marker at offset 7621 (frag1) */
+
+       err = bpf_prog_test_run(prog_fd, 1, buf, 9000,
+                               buf, &size, &retval, &duration);
+
+       /* test_xdp_update_frags: buf[7606,7621]: 0xaa -> 0xbb */
+       ASSERT_OK(err, "xdp_update_frag");
+       ASSERT_EQ(retval, XDP_PASS, "xdp_update_frag retval");
+       ASSERT_EQ(buf[7606], 0xbb, "xdp_update_frag buf[7606]");
+       ASSERT_EQ(buf[7621], 0xbb, "xdp_update_frag buf[7621]");
+
+       free(buf);
+out:
+       bpf_object__close(obj);
+}
+
+void test_xdp_adjust_frags(void)
+{
+       if (test__start_subtest("xdp_adjust_frags"))
+               test_xdp_update_frags();
+}
index 3f5a17c..ccc9e63 100644 (file)
@@ -11,22 +11,21 @@ static void test_xdp_adjust_tail_shrink(void)
        char buf[128];
 
        err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
-       if (CHECK_FAIL(err))
+       if (ASSERT_OK(err, "test_xdp_adjust_tail_shrink"))
                return;
 
        err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
                                buf, &size, &retval, &duration);
-
-       CHECK(err || retval != XDP_DROP,
-             "ipv4", "err %d errno %d retval %d size %d\n",
-             err, errno, retval, size);
+       ASSERT_OK(err, "ipv4");
+       ASSERT_EQ(retval, XDP_DROP, "ipv4 retval");
 
        expect_sz = sizeof(pkt_v6) - 20;  /* Test shrink with 20 bytes */
        err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6),
                                buf, &size, &retval, &duration);
-       CHECK(err || retval != XDP_TX || size != expect_sz,
-             "ipv6", "err %d errno %d retval %d size %d expect-size %d\n",
-             err, errno, retval, size, expect_sz);
+       ASSERT_OK(err, "ipv6");
+       ASSERT_EQ(retval, XDP_TX, "ipv6 retval");
+       ASSERT_EQ(size, expect_sz, "ipv6 size");
+
        bpf_object__close(obj);
 }
 
@@ -39,21 +38,20 @@ static void test_xdp_adjust_tail_grow(void)
        int err, prog_fd;
 
        err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
-       if (CHECK_FAIL(err))
+       if (ASSERT_OK(err, "test_xdp_adjust_tail_grow"))
                return;
 
        err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
                                buf, &size, &retval, &duration);
-       CHECK(err || retval != XDP_DROP,
-             "ipv4", "err %d errno %d retval %d size %d\n",
-             err, errno, retval, size);
+       ASSERT_OK(err, "ipv4");
+       ASSERT_EQ(retval, XDP_DROP, "ipv4 retval");
 
        expect_sz = sizeof(pkt_v6) + 40; /* Test grow with 40 bytes */
        err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6) /* 74 */,
                                buf, &size, &retval, &duration);
-       CHECK(err || retval != XDP_TX || size != expect_sz,
-             "ipv6", "err %d errno %d retval %d size %d expect-size %d\n",
-             err, errno, retval, size, expect_sz);
+       ASSERT_OK(err, "ipv6");
+       ASSERT_EQ(retval, XDP_TX, "ipv6 retval");
+       ASSERT_EQ(size, expect_sz, "ipv6 size");
 
        bpf_object__close(obj);
 }
@@ -76,7 +74,7 @@ static void test_xdp_adjust_tail_grow2(void)
        };
 
        err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &tattr.prog_fd);
-       if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno))
+       if (ASSERT_OK(err, "test_xdp_adjust_tail_grow"))
                return;
 
        /* Test case-64 */
@@ -86,21 +84,17 @@ static void test_xdp_adjust_tail_grow2(void)
        /* Kernel side alloc packet memory area that is zero init */
        err = bpf_prog_test_run_xattr(&tattr);
 
-       CHECK_ATTR(errno != ENOSPC /* Due limit copy_size in bpf_test_finish */
-                  || tattr.retval != XDP_TX
-                  || tattr.data_size_out != 192, /* Expected grow size */
-                  "case-64",
-                  "err %d errno %d retval %d size %d\n",
-                  err, errno, tattr.retval, tattr.data_size_out);
+       ASSERT_EQ(errno, ENOSPC, "case-64 errno"); /* Due limit copy_size in bpf_test_finish */
+       ASSERT_EQ(tattr.retval, XDP_TX, "case-64 retval");
+       ASSERT_EQ(tattr.data_size_out, 192, "case-64 data_size_out"); /* Expected grow size */
 
        /* Extra checks for data contents */
-       CHECK_ATTR(tattr.data_size_out != 192
-                  || buf[0]   != 1 ||  buf[63]  != 1  /*  0-63  memset to 1 */
-                  || buf[64]  != 0 ||  buf[127] != 0  /* 64-127 memset to 0 */
-                  || buf[128] != 1 ||  buf[191] != 1, /*128-191 memset to 1 */
-                  "case-64-data",
-                  "err %d errno %d retval %d size %d\n",
-                  err, errno, tattr.retval, tattr.data_size_out);
+       ASSERT_EQ(buf[0], 1, "case-64-data buf[0]"); /*  0-63  memset to 1 */
+       ASSERT_EQ(buf[63], 1, "case-64-data buf[63]");
+       ASSERT_EQ(buf[64], 0, "case-64-data buf[64]"); /* 64-127 memset to 0 */
+       ASSERT_EQ(buf[127], 0, "case-64-data buf[127]");
+       ASSERT_EQ(buf[128], 1, "case-64-data buf[128]"); /* 128-191 memset to 1 */
+       ASSERT_EQ(buf[191], 1, "case-64-data buf[191]");
 
        /* Test case-128 */
        memset(buf, 2, sizeof(buf));
@@ -109,24 +103,139 @@ static void test_xdp_adjust_tail_grow2(void)
        err = bpf_prog_test_run_xattr(&tattr);
 
        max_grow = 4096 - XDP_PACKET_HEADROOM - tailroom; /* 3520 */
-       CHECK_ATTR(err
-                  || tattr.retval != XDP_TX
-                  || tattr.data_size_out != max_grow,/* Expect max grow size */
-                  "case-128",
-                  "err %d errno %d retval %d size %d expect-size %d\n",
-                  err, errno, tattr.retval, tattr.data_size_out, max_grow);
+       ASSERT_OK(err, "case-128");
+       ASSERT_EQ(tattr.retval, XDP_TX, "case-128 retval");
+       ASSERT_EQ(tattr.data_size_out, max_grow, "case-128 data_size_out"); /* Expect max grow */
 
        /* Extra checks for data content: Count grow size, will contain zeros */
        for (i = 0, cnt = 0; i < sizeof(buf); i++) {
                if (buf[i] == 0)
                        cnt++;
        }
-       CHECK_ATTR((cnt != (max_grow - tattr.data_size_in)) /* Grow increase */
-                  || tattr.data_size_out != max_grow, /* Total grow size */
-                  "case-128-data",
-                  "err %d errno %d retval %d size %d grow-size %d\n",
-                  err, errno, tattr.retval, tattr.data_size_out, cnt);
+       ASSERT_EQ(cnt, max_grow - tattr.data_size_in, "case-128-data cnt"); /* Grow increase */
+       ASSERT_EQ(tattr.data_size_out, max_grow, "case-128-data data_size_out"); /* Total grow */
+
+       bpf_object__close(obj);
+}
+
+void test_xdp_adjust_frags_tail_shrink(void)
+{
+       const char *file = "./test_xdp_adjust_tail_shrink.o";
+       __u32 duration, retval, size, exp_size;
+       struct bpf_program *prog;
+       struct bpf_object *obj;
+       int err, prog_fd;
+       __u8 *buf;
+
+       /* For the individual test cases, the first byte in the packet
+        * indicates which test will be run.
+        */
+       obj = bpf_object__open(file);
+       if (libbpf_get_error(obj))
+               return;
+
+       prog = bpf_object__next_program(obj, NULL);
+       if (bpf_object__load(obj))
+               return;
+
+       prog_fd = bpf_program__fd(prog);
+
+       buf = malloc(9000);
+       if (!ASSERT_OK_PTR(buf, "alloc buf 9Kb"))
+               goto out;
+
+       memset(buf, 0, 9000);
+
+       /* Test case removing 10 bytes from last frag, NOT freeing it */
+       exp_size = 8990; /* 9000 - 10 */
+       err = bpf_prog_test_run(prog_fd, 1, buf, 9000,
+                               buf, &size, &retval, &duration);
+
+       ASSERT_OK(err, "9Kb-10b");
+       ASSERT_EQ(retval, XDP_TX, "9Kb-10b retval");
+       ASSERT_EQ(size, exp_size, "9Kb-10b size");
+
+       /* Test case removing one of two pages, assuming 4K pages */
+       buf[0] = 1;
+       exp_size = 4900; /* 9000 - 4100 */
+       err = bpf_prog_test_run(prog_fd, 1, buf, 9000,
+                               buf, &size, &retval, &duration);
+
+       ASSERT_OK(err, "9Kb-4Kb");
+       ASSERT_EQ(retval, XDP_TX, "9Kb-4Kb retval");
+       ASSERT_EQ(size, exp_size, "9Kb-4Kb size");
+
+       /* Test case removing two pages resulting in a linear xdp_buff */
+       buf[0] = 2;
+       exp_size = 800; /* 9000 - 8200 */
+       err = bpf_prog_test_run(prog_fd, 1, buf, 9000,
+                               buf, &size, &retval, &duration);
+
+       ASSERT_OK(err, "9Kb-9Kb");
+       ASSERT_EQ(retval, XDP_TX, "9Kb-9Kb retval");
+       ASSERT_EQ(size, exp_size, "9Kb-9Kb size");
+
+       free(buf);
+out:
+       bpf_object__close(obj);
+}
+
+void test_xdp_adjust_frags_tail_grow(void)
+{
+       const char *file = "./test_xdp_adjust_tail_grow.o";
+       __u32 duration, retval, size, exp_size;
+       struct bpf_program *prog;
+       struct bpf_object *obj;
+       int err, i, prog_fd;
+       __u8 *buf;
+
+       obj = bpf_object__open(file);
+       if (libbpf_get_error(obj))
+               return;
+
+       prog = bpf_object__next_program(obj, NULL);
+       if (bpf_object__load(obj))
+               return;
+
+       prog_fd = bpf_program__fd(prog);
+
+       buf = malloc(16384);
+       if (!ASSERT_OK_PTR(buf, "alloc buf 16Kb"))
+               goto out;
+
+       /* Test case add 10 bytes to last frag */
+       memset(buf, 1, 16384);
+       size = 9000;
+       exp_size = size + 10;
+       err = bpf_prog_test_run(prog_fd, 1, buf, size,
+                               buf, &size, &retval, &duration);
+
+       ASSERT_OK(err, "9Kb+10b");
+       ASSERT_EQ(retval, XDP_TX, "9Kb+10b retval");
+       ASSERT_EQ(size, exp_size, "9Kb+10b size");
+
+       for (i = 0; i < 9000; i++)
+               ASSERT_EQ(buf[i], 1, "9Kb+10b-old");
+
+       for (i = 9000; i < 9010; i++)
+               ASSERT_EQ(buf[i], 0, "9Kb+10b-new");
+
+       for (i = 9010; i < 16384; i++)
+               ASSERT_EQ(buf[i], 1, "9Kb+10b-untouched");
+
+       /* Test a too large grow */
+       memset(buf, 1, 16384);
+       size = 9001;
+       exp_size = size;
+       err = bpf_prog_test_run(prog_fd, 1, buf, size,
+                               buf, &size, &retval, &duration);
+
+       ASSERT_OK(err, "9Kb+10b");
+       ASSERT_EQ(retval, XDP_DROP, "9Kb+10b retval");
+       ASSERT_EQ(size, exp_size, "9Kb+10b size");
 
+       free(buf);
+out:
        bpf_object__close(obj);
 }
 
@@ -138,4 +247,8 @@ void test_xdp_adjust_tail(void)
                test_xdp_adjust_tail_grow();
        if (test__start_subtest("xdp_adjust_tail_grow2"))
                test_xdp_adjust_tail_grow2();
+       if (test__start_subtest("xdp_adjust_frags_tail_shrink"))
+               test_xdp_adjust_frags_tail_shrink();
+       if (test__start_subtest("xdp_adjust_frags_tail_grow"))
+               test_xdp_adjust_frags_tail_grow();
 }
index c98a897..9c395ea 100644 (file)
@@ -10,40 +10,97 @@ struct meta {
        int pkt_len;
 };
 
+struct test_ctx_s {
+       bool passed;
+       int pkt_size;
+};
+
+struct test_ctx_s test_ctx;
+
 static void on_sample(void *ctx, int cpu, void *data, __u32 size)
 {
-       int duration = 0;
        struct meta *meta = (struct meta *)data;
        struct ipv4_packet *trace_pkt_v4 = data + sizeof(*meta);
+       unsigned char *raw_pkt = data + sizeof(*meta);
+       struct test_ctx_s *tst_ctx = ctx;
+
+       ASSERT_GE(size, sizeof(pkt_v4) + sizeof(*meta), "check_size");
+       ASSERT_EQ(meta->ifindex, if_nametoindex("lo"), "check_meta_ifindex");
+       ASSERT_EQ(meta->pkt_len, tst_ctx->pkt_size, "check_meta_pkt_len");
+       ASSERT_EQ(memcmp(trace_pkt_v4, &pkt_v4, sizeof(pkt_v4)), 0,
+                 "check_packet_content");
+
+       if (meta->pkt_len > sizeof(pkt_v4)) {
+               for (int i = 0; i < meta->pkt_len - sizeof(pkt_v4); i++)
+                       ASSERT_EQ(raw_pkt[i + sizeof(pkt_v4)], (unsigned char)i,
+                                 "check_packet_content");
+       }
+
+       tst_ctx->passed = true;
+}
 
-       if (CHECK(size < sizeof(pkt_v4) + sizeof(*meta),
-                 "check_size", "size %u < %zu\n",
-                 size, sizeof(pkt_v4) + sizeof(*meta)))
-               return;
+#define BUF_SZ 9000
 
-       if (CHECK(meta->ifindex != if_nametoindex("lo"), "check_meta_ifindex",
-                 "meta->ifindex = %d\n", meta->ifindex))
+static void run_xdp_bpf2bpf_pkt_size(int pkt_fd, struct perf_buffer *pb,
+                                    struct test_xdp_bpf2bpf *ftrace_skel,
+                                    int pkt_size)
+{
+       __u32 duration = 0, retval, size;
+       __u8 *buf, *buf_in;
+       int err;
+
+       if (!ASSERT_LE(pkt_size, BUF_SZ, "pkt_size") ||
+           !ASSERT_GE(pkt_size, sizeof(pkt_v4), "pkt_size"))
                return;
 
-       if (CHECK(meta->pkt_len != sizeof(pkt_v4), "check_meta_pkt_len",
-                 "meta->pkt_len = %zd\n", sizeof(pkt_v4)))
+       buf_in = malloc(BUF_SZ);
+       if (!ASSERT_OK_PTR(buf_in, "buf_in malloc()"))
                return;
 
-       if (CHECK(memcmp(trace_pkt_v4, &pkt_v4, sizeof(pkt_v4)),
-                 "check_packet_content", "content not the same\n"))
+       buf = malloc(BUF_SZ);
+       if (!ASSERT_OK_PTR(buf, "buf malloc()")) {
+               free(buf_in);
                return;
+       }
+
+       test_ctx.passed = false;
+       test_ctx.pkt_size = pkt_size;
+
+       memcpy(buf_in, &pkt_v4, sizeof(pkt_v4));
+       if (pkt_size > sizeof(pkt_v4)) {
+               for (int i = 0; i < (pkt_size - sizeof(pkt_v4)); i++)
+                       buf_in[i + sizeof(pkt_v4)] = i;
+       }
+
+       /* Run test program */
+       err = bpf_prog_test_run(pkt_fd, 1, buf_in, pkt_size,
+                               buf, &size, &retval, &duration);
+
+       ASSERT_OK(err, "ipv4");
+       ASSERT_EQ(retval, XDP_PASS, "ipv4 retval");
+       ASSERT_EQ(size, pkt_size, "ipv4 size");
+
+       /* Make sure bpf_xdp_output() was triggered and it sent the expected
+        * data to the perf ring buffer.
+        */
+       err = perf_buffer__poll(pb, 100);
 
-       *(bool *)ctx = true;
+       ASSERT_GE(err, 0, "perf_buffer__poll");
+       ASSERT_TRUE(test_ctx.passed, "test passed");
+       /* Verify test results */
+       ASSERT_EQ(ftrace_skel->bss->test_result_fentry, if_nametoindex("lo"),
+                 "fentry result");
+       ASSERT_EQ(ftrace_skel->bss->test_result_fexit, XDP_PASS, "fexit result");
+
+       free(buf);
+       free(buf_in);
 }
 
 void test_xdp_bpf2bpf(void)
 {
-       __u32 duration = 0, retval, size;
-       char buf[128];
        int err, pkt_fd, map_fd;
-       bool passed = false;
-       struct iphdr iph;
-       struct iptnl_info value4 = {.family = AF_INET};
+       int pkt_sizes[] = {sizeof(pkt_v4), 1024, 4100, 8200};
+       struct iptnl_info value4 = {.family = AF_INET6};
        struct test_xdp *pkt_skel = NULL;
        struct test_xdp_bpf2bpf *ftrace_skel = NULL;
        struct vip key4 = {.protocol = 6, .family = AF_INET};
@@ -52,7 +109,7 @@ void test_xdp_bpf2bpf(void)
 
        /* Load XDP program to introspect */
        pkt_skel = test_xdp__open_and_load();
-       if (CHECK(!pkt_skel, "pkt_skel_load", "test_xdp skeleton failed\n"))
+       if (!ASSERT_OK_PTR(pkt_skel, "test_xdp__open_and_load"))
                return;
 
        pkt_fd = bpf_program__fd(pkt_skel->progs._xdp_tx_iptunnel);
@@ -62,7 +119,7 @@ void test_xdp_bpf2bpf(void)
 
        /* Load trace program */
        ftrace_skel = test_xdp_bpf2bpf__open();
-       if (CHECK(!ftrace_skel, "__open", "ftrace skeleton failed\n"))
+       if (!ASSERT_OK_PTR(ftrace_skel, "test_xdp_bpf2bpf__open"))
                goto out;
 
        /* Demonstrate the bpf_program__set_attach_target() API rather than
@@ -77,50 +134,24 @@ void test_xdp_bpf2bpf(void)
        bpf_program__set_attach_target(prog, pkt_fd, "_xdp_tx_iptunnel");
 
        err = test_xdp_bpf2bpf__load(ftrace_skel);
-       if (CHECK(err, "__load", "ftrace skeleton failed\n"))
+       if (!ASSERT_OK(err, "test_xdp_bpf2bpf__load"))
                goto out;
 
        err = test_xdp_bpf2bpf__attach(ftrace_skel);
-       if (CHECK(err, "ftrace_attach", "ftrace attach failed: %d\n", err))
+       if (!ASSERT_OK(err, "test_xdp_bpf2bpf__attach"))
                goto out;
 
        /* Set up perf buffer */
-       pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map), 1,
-                             on_sample, NULL, &passed, NULL);
+       pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map), 8,
+                             on_sample, NULL, &test_ctx, NULL);
        if (!ASSERT_OK_PTR(pb, "perf_buf__new"))
                goto out;
 
-       /* Run test program */
-       err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4),
-                               buf, &size, &retval, &duration);
-       memcpy(&iph, buf + sizeof(struct ethhdr), sizeof(iph));
-       if (CHECK(err || retval != XDP_TX || size != 74 ||
-                 iph.protocol != IPPROTO_IPIP, "ipv4",
-                 "err %d errno %d retval %d size %d\n",
-                 err, errno, retval, size))
-               goto out;
-
-       /* Make sure bpf_xdp_output() was triggered and it sent the expected
-        * data to the perf ring buffer.
-        */
-       err = perf_buffer__poll(pb, 100);
-       if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err))
-               goto out;
-
-       CHECK_FAIL(!passed);
-
-       /* Verify test results */
-       if (CHECK(ftrace_skel->bss->test_result_fentry != if_nametoindex("lo"),
-                 "result", "fentry failed err %llu\n",
-                 ftrace_skel->bss->test_result_fentry))
-               goto out;
-
-       CHECK(ftrace_skel->bss->test_result_fexit != XDP_TX, "result",
-             "fexit failed err %llu\n", ftrace_skel->bss->test_result_fexit);
-
+       for (int i = 0; i < ARRAY_SIZE(pkt_sizes); i++)
+               run_xdp_bpf2bpf_pkt_size(pkt_fd, pb, ftrace_skel,
+                                        pkt_sizes[i]);
 out:
-       if (pb)
-               perf_buffer__free(pb);
+       perf_buffer__free(pb);
        test_xdp__destroy(pkt_skel);
        test_xdp_bpf2bpf__destroy(ftrace_skel);
 }
index fd812bd..13aabb3 100644 (file)
@@ -3,11 +3,12 @@
 #include <linux/if_link.h>
 #include <test_progs.h>
 
+#include "test_xdp_with_cpumap_frags_helpers.skel.h"
 #include "test_xdp_with_cpumap_helpers.skel.h"
 
 #define IFINDEX_LO     1
 
-void serial_test_xdp_cpumap_attach(void)
+void test_xdp_with_cpumap_helpers(void)
 {
        struct test_xdp_with_cpumap_helpers *skel;
        struct bpf_prog_info info = {};
@@ -54,6 +55,67 @@ void serial_test_xdp_cpumap_attach(void)
        err = bpf_map_update_elem(map_fd, &idx, &val, 0);
        ASSERT_NEQ(err, 0, "Add non-BPF_XDP_CPUMAP program to cpumap entry");
 
+       /* Try to attach BPF_XDP program with frags to cpumap when we have
+        * already loaded a BPF_XDP program on the map
+        */
+       idx = 1;
+       val.qsize = 192;
+       val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_cm_frags);
+       err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+       ASSERT_NEQ(err, 0, "Add BPF_XDP program with frags to cpumap entry");
+
 out_close:
        test_xdp_with_cpumap_helpers__destroy(skel);
 }
+
+void test_xdp_with_cpumap_frags_helpers(void)
+{
+       struct test_xdp_with_cpumap_frags_helpers *skel;
+       struct bpf_prog_info info = {};
+       __u32 len = sizeof(info);
+       struct bpf_cpumap_val val = {
+               .qsize = 192,
+       };
+       int err, frags_prog_fd, map_fd;
+       __u32 idx = 0;
+
+       skel = test_xdp_with_cpumap_frags_helpers__open_and_load();
+       if (!ASSERT_OK_PTR(skel, "test_xdp_with_cpumap_helpers__open_and_load"))
+               return;
+
+       frags_prog_fd = bpf_program__fd(skel->progs.xdp_dummy_cm_frags);
+       map_fd = bpf_map__fd(skel->maps.cpu_map);
+       err = bpf_obj_get_info_by_fd(frags_prog_fd, &info, &len);
+       if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd"))
+               goto out_close;
+
+       val.bpf_prog.fd = frags_prog_fd;
+       err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+       ASSERT_OK(err, "Add program to cpumap entry");
+
+       err = bpf_map_lookup_elem(map_fd, &idx, &val);
+       ASSERT_OK(err, "Read cpumap entry");
+       ASSERT_EQ(info.id, val.bpf_prog.id,
+                 "Match program id to cpumap entry prog_id");
+
+       /* Try to attach BPF_XDP program to cpumap when we have
+        * already loaded a BPF_XDP program with frags on the map
+        */
+       idx = 1;
+       val.qsize = 192;
+       val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_cm);
+       err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+       ASSERT_NEQ(err, 0, "Add BPF_XDP program to cpumap entry");
+
+out_close:
+       test_xdp_with_cpumap_frags_helpers__destroy(skel);
+}
+
+void serial_test_xdp_cpumap_attach(void)
+{
+       if (test__start_subtest("CPUMAP with programs in entries"))
+               test_xdp_with_cpumap_helpers();
+
+       if (test__start_subtest("CPUMAP with frags programs in entries"))
+               test_xdp_with_cpumap_frags_helpers();
+}
index 3079d55..2a784cc 100644 (file)
@@ -4,6 +4,7 @@
 #include <test_progs.h>
 
 #include "test_xdp_devmap_helpers.skel.h"
+#include "test_xdp_with_devmap_frags_helpers.skel.h"
 #include "test_xdp_with_devmap_helpers.skel.h"
 
 #define IFINDEX_LO 1
@@ -56,6 +57,15 @@ static void test_xdp_with_devmap_helpers(void)
        err = bpf_map_update_elem(map_fd, &idx, &val, 0);
        ASSERT_NEQ(err, 0, "Add non-BPF_XDP_DEVMAP program to devmap entry");
 
+       /* Try to attach BPF_XDP program with frags to devmap when we have
+        * already loaded a BPF_XDP program on the map
+        */
+       idx = 1;
+       val.ifindex = 1;
+       val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_dm_frags);
+       err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+       ASSERT_NEQ(err, 0, "Add BPF_XDP program with frags to devmap entry");
+
 out_close:
        test_xdp_with_devmap_helpers__destroy(skel);
 }
@@ -71,12 +81,57 @@ static void test_neg_xdp_devmap_helpers(void)
        }
 }
 
+void test_xdp_with_devmap_frags_helpers(void)
+{
+       struct test_xdp_with_devmap_frags_helpers *skel;
+       struct bpf_prog_info info = {};
+       struct bpf_devmap_val val = {
+               .ifindex = IFINDEX_LO,
+       };
+       __u32 len = sizeof(info);
+       int err, dm_fd_frags, map_fd;
+       __u32 idx = 0;
+
+       skel = test_xdp_with_devmap_frags_helpers__open_and_load();
+       if (!ASSERT_OK_PTR(skel, "test_xdp_with_devmap_helpers__open_and_load"))
+               return;
+
+       dm_fd_frags = bpf_program__fd(skel->progs.xdp_dummy_dm_frags);
+       map_fd = bpf_map__fd(skel->maps.dm_ports);
+       err = bpf_obj_get_info_by_fd(dm_fd_frags, &info, &len);
+       if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd"))
+               goto out_close;
+
+       val.bpf_prog.fd = dm_fd_frags;
+       err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+       ASSERT_OK(err, "Add frags program to devmap entry");
+
+       err = bpf_map_lookup_elem(map_fd, &idx, &val);
+       ASSERT_OK(err, "Read devmap entry");
+       ASSERT_EQ(info.id, val.bpf_prog.id,
+                 "Match program id to devmap entry prog_id");
+
+       /* Try to attach BPF_XDP program to devmap when we have
+        * already loaded a BPF_XDP program with frags on the map
+        */
+       idx = 1;
+       val.ifindex = 1;
+       val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_dm);
+       err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+       ASSERT_NEQ(err, 0, "Add BPF_XDP program to devmap entry");
+
+out_close:
+       test_xdp_with_devmap_frags_helpers__destroy(skel);
+}
 
 void serial_test_xdp_devmap_attach(void)
 {
        if (test__start_subtest("DEVMAP with programs in entries"))
                test_xdp_with_devmap_helpers();
 
+       if (test__start_subtest("DEVMAP with frags programs in entries"))
+               test_xdp_with_devmap_frags_helpers();
+
        if (test__start_subtest("Verifier check of DEVMAP programs"))
                test_neg_xdp_devmap_helpers();
 }
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt_unix.c b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt_unix.c
new file mode 100644 (file)
index 0000000..eafc877
--- /dev/null
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <limits.h>
+
+#define AUTOBIND_LEN 6
+char sun_path[AUTOBIND_LEN];
+
+#define NR_CASES 5
+int sndbuf_setsockopt[NR_CASES] = {-1, 0, 8192, INT_MAX / 2, INT_MAX};
+int sndbuf_getsockopt[NR_CASES] = {-1, -1, -1, -1, -1};
+int sndbuf_getsockopt_expected[NR_CASES];
+
+static inline int cmpname(struct unix_sock *unix_sk)
+{
+       int i;
+
+       for (i = 0; i < AUTOBIND_LEN; i++) {
+               if (unix_sk->addr->name->sun_path[i] != sun_path[i])
+                       return -1;
+       }
+
+       return 0;
+}
+
+SEC("iter/unix")
+int change_sndbuf(struct bpf_iter__unix *ctx)
+{
+       struct unix_sock *unix_sk = ctx->unix_sk;
+       int i, err;
+
+       if (!unix_sk || !unix_sk->addr)
+               return 0;
+
+       if (unix_sk->addr->name->sun_path[0])
+               return 0;
+
+       if (cmpname(unix_sk))
+               return 0;
+
+       for (i = 0; i < NR_CASES; i++) {
+               err = bpf_setsockopt(unix_sk, SOL_SOCKET, SO_SNDBUF,
+                                    &sndbuf_setsockopt[i],
+                                    sizeof(sndbuf_setsockopt[i]));
+               if (err)
+                       break;
+
+               err = bpf_getsockopt(unix_sk, SOL_SOCKET, SO_SNDBUF,
+                                    &sndbuf_getsockopt[i],
+                                    sizeof(sndbuf_getsockopt[i]));
+               if (err)
+                       break;
+       }
+
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
index c21e3f5..e6aefae 100644 (file)
@@ -63,7 +63,7 @@ int dump_unix(struct bpf_iter__unix *ctx)
                        BPF_SEQ_PRINTF(seq, " @");
 
                        for (i = 1; i < len; i++) {
-                               /* unix_mkname() tests this upper bound. */
+                               /* unix_validate_addr() tests this upper bound. */
                                if (i >= sizeof(struct sockaddr_un))
                                        break;
 
diff --git a/tools/testing/selftests/bpf/progs/bpf_mod_race.c b/tools/testing/selftests/bpf/progs/bpf_mod_race.c
new file mode 100644 (file)
index 0000000..82a5c6c
--- /dev/null
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+const volatile struct {
+       /* thread to activate trace programs for */
+       pid_t tgid;
+       /* return error from __init function */
+       int inject_error;
+       /* uffd monitored range start address */
+       void *fault_addr;
+} bpf_mod_race_config = { -1 };
+
+int bpf_blocking = 0;
+int res_try_get_module = -1;
+
+static __always_inline bool check_thread_id(void)
+{
+       struct task_struct *task = bpf_get_current_task_btf();
+
+       return task->tgid == bpf_mod_race_config.tgid;
+}
+
+/* The trace of execution is something like this:
+ *
+ * finit_module()
+ *   load_module()
+ *     prepare_coming_module()
+ *       notifier_call(MODULE_STATE_COMING)
+ *         btf_parse_module()
+ *         btf_alloc_id()              // Visible to userspace at this point
+ *         list_add(btf_mod->list, &btf_modules)
+ *     do_init_module()
+ *       freeinit = kmalloc()
+ *       ret = mod->init()
+ *         bpf_prog_widen_race()
+ *           bpf_copy_from_user()
+ *             ...<sleep>...
+ *       if (ret < 0)
+ *         ...
+ *         free_module()
+ * return ret
+ *
+ * At this point, module loading thread is blocked, we now load the program:
+ *
+ * bpf_check
+ *   add_kfunc_call/check_pseudo_btf_id
+ *     btf_try_get_module
+ *       try_get_module_live == false
+ *     return -ENXIO
+ *
+ * Without the fix (try_get_module_live in btf_try_get_module):
+ *
+ * bpf_check
+ *   add_kfunc_call/check_pseudo_btf_id
+ *     btf_try_get_module
+ *       try_get_module == true
+ *     <store module reference in btf_kfunc_tab or used_btf array>
+ *   ...
+ * return fd
+ *
+ * Now, if we inject an error in the blocked program, our module will be freed
+ * (going straight from MODULE_STATE_COMING to MODULE_STATE_GOING).
+ * Later, when bpf program is freed, it will try to module_put already freed
+ * module. This is why try_get_module_live returns false if mod->state is not
+ * MODULE_STATE_LIVE.
+ */
+
+SEC("fmod_ret.s/bpf_fentry_test1")
+int BPF_PROG(widen_race, int a, int ret)
+{
+       char dst;
+
+       if (!check_thread_id())
+               return 0;
+       /* Indicate that we will attempt to block */
+       bpf_blocking = 1;
+       bpf_copy_from_user(&dst, 1, bpf_mod_race_config.fault_addr);
+       return bpf_mod_race_config.inject_error;
+}
+
+SEC("fexit/do_init_module")
+int BPF_PROG(fexit_init_module, struct module *mod, int ret)
+{
+       if (!check_thread_id())
+               return 0;
+       /* Indicate that we finished blocking */
+       bpf_blocking = 2;
+       return 0;
+}
+
+SEC("fexit/btf_try_get_module")
+int BPF_PROG(fexit_module_get, const struct btf *btf, struct module *mod)
+{
+       res_try_get_module = !!mod;
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
index e0f4260..1c1289b 100644 (file)
@@ -5,6 +5,8 @@
 #define AF_INET                        2
 #define AF_INET6               10
 
+#define SOL_SOCKET             1
+#define SO_SNDBUF              7
 #define __SO_ACCEPTCON         (1 << 16)
 
 #define SOL_TCP                        6
diff --git a/tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c
new file mode 100644 (file)
index 0000000..b2a409e
--- /dev/null
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2021 Google LLC.
+ */
+
+#include <errno.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+__u32 invocations = 0;
+__u32 assertion_error = 0;
+__u32 retval_value = 0;
+__u32 ctx_retval_value = 0;
+
+SEC("cgroup/getsockopt")
+int get_retval(struct bpf_sockopt *ctx)
+{
+       retval_value = bpf_get_retval();
+       ctx_retval_value = ctx->retval;
+       __sync_fetch_and_add(&invocations, 1);
+
+       return 1;
+}
+
+SEC("cgroup/getsockopt")
+int set_eisconn(struct bpf_sockopt *ctx)
+{
+       __sync_fetch_and_add(&invocations, 1);
+
+       if (bpf_set_retval(-EISCONN))
+               assertion_error = 1;
+
+       return 1;
+}
+
+SEC("cgroup/getsockopt")
+int clear_retval(struct bpf_sockopt *ctx)
+{
+       __sync_fetch_and_add(&invocations, 1);
+
+       ctx->retval = 0;
+
+       return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c
new file mode 100644 (file)
index 0000000..d6e5903
--- /dev/null
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2021 Google LLC.
+ */
+
+#include <errno.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+__u32 invocations = 0;
+__u32 assertion_error = 0;
+__u32 retval_value = 0;
+
+SEC("cgroup/setsockopt")
+int get_retval(struct bpf_sockopt *ctx)
+{
+       retval_value = bpf_get_retval();
+       __sync_fetch_and_add(&invocations, 1);
+
+       return 1;
+}
+
+SEC("cgroup/setsockopt")
+int set_eunatch(struct bpf_sockopt *ctx)
+{
+       __sync_fetch_and_add(&invocations, 1);
+
+       if (bpf_set_retval(-EUNATCH))
+               assertion_error = 1;
+
+       return 0;
+}
+
+SEC("cgroup/setsockopt")
+int set_eisconn(struct bpf_sockopt *ctx)
+{
+       __sync_fetch_and_add(&invocations, 1);
+
+       if (bpf_set_retval(-EISCONN))
+               assertion_error = 1;
+
+       return 0;
+}
+
+SEC("cgroup/setsockopt")
+int legacy_eperm(struct bpf_sockopt *ctx)
+{
+       __sync_fetch_and_add(&invocations, 1);
+
+       return 0;
+}
index 68a5a9d..7e94412 100644 (file)
@@ -7,12 +7,12 @@
 #include <bpf/bpf_endian.h>
 #include <bpf/bpf_helpers.h>
 
-struct bpf_map_def SEC("maps") sock_map = {
-       .type = BPF_MAP_TYPE_SOCKMAP,
-       .key_size = sizeof(int),
-       .value_size = sizeof(int),
-       .max_entries = 2,
-};
+struct {
+       __uint(type, BPF_MAP_TYPE_SOCKMAP);
+       __type(key, int);
+       __type(value, int);
+       __uint(max_entries, 2);
+} sock_map SEC(".maps");
 
 SEC("freplace/cls_redirect")
 int freplace_cls_redirect_test(struct __sk_buff *skb)
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_race.c b/tools/testing/selftests/bpf/progs/kfunc_call_race.c
new file mode 100644 (file)
index 0000000..4e8fed7
--- /dev/null
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+extern void bpf_testmod_test_mod_kfunc(int i) __ksym;
+
+SEC("tc")
+int kfunc_call_fail(struct __sk_buff *ctx)
+{
+       bpf_testmod_test_mod_kfunc(0);
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
index 8a8cf59..5aecbb9 100644 (file)
@@ -1,13 +1,20 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2021 Facebook */
-#include <linux/bpf.h>
+#include <vmlinux.h>
 #include <bpf/bpf_helpers.h>
-#include "bpf_tcp_helpers.h"
 
 extern int bpf_kfunc_call_test2(struct sock *sk, __u32 a, __u32 b) __ksym;
 extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b,
                                  __u32 c, __u64 d) __ksym;
 
+extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym;
+extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym;
+extern void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb) __ksym;
+extern void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) __ksym;
+extern void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) __ksym;
+extern void bpf_kfunc_call_test_mem_len_pass1(void *mem, int len) __ksym;
+extern void bpf_kfunc_call_test_mem_len_fail2(__u64 *mem, int len) __ksym;
+
 SEC("tc")
 int kfunc_call_test2(struct __sk_buff *skb)
 {
@@ -44,4 +51,45 @@ int kfunc_call_test1(struct __sk_buff *skb)
        return ret;
 }
 
+SEC("tc")
+int kfunc_call_test_ref_btf_id(struct __sk_buff *skb)
+{
+       struct prog_test_ref_kfunc *pt;
+       unsigned long s = 0;
+       int ret = 0;
+
+       pt = bpf_kfunc_call_test_acquire(&s);
+       if (pt) {
+               if (pt->a != 42 || pt->b != 108)
+                       ret = -1;
+               bpf_kfunc_call_test_release(pt);
+       }
+       return ret;
+}
+
+SEC("tc")
+int kfunc_call_test_pass(struct __sk_buff *skb)
+{
+       struct prog_test_pass1 p1 = {};
+       struct prog_test_pass2 p2 = {};
+       short a = 0;
+       __u64 b = 0;
+       long c = 0;
+       char d = 0;
+       int e = 0;
+
+       bpf_kfunc_call_test_pass_ctx(skb);
+       bpf_kfunc_call_test_pass1(&p1);
+       bpf_kfunc_call_test_pass2(&p2);
+
+       bpf_kfunc_call_test_mem_len_pass1(&a, sizeof(a));
+       bpf_kfunc_call_test_mem_len_pass1(&b, sizeof(b));
+       bpf_kfunc_call_test_mem_len_pass1(&c, sizeof(c));
+       bpf_kfunc_call_test_mem_len_pass1(&d, sizeof(d));
+       bpf_kfunc_call_test_mem_len_pass1(&e, sizeof(e));
+       bpf_kfunc_call_test_mem_len_fail2(&b, -1);
+
+       return 0;
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/ksym_race.c b/tools/testing/selftests/bpf/progs/ksym_race.c
new file mode 100644 (file)
index 0000000..def97f2
--- /dev/null
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+extern int bpf_testmod_ksym_percpu __ksym;
+
+SEC("tc")
+int ksym_fail(struct __sk_buff *ctx)
+{
+       return *(int *)bpf_this_cpu_ptr(&bpf_testmod_ksym_percpu);
+}
+
+char _license[] SEC("license") = "GPL";
index 1612a32..495990d 100644 (file)
@@ -2,19 +2,19 @@
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
 
-struct bpf_map_def SEC("maps") htab = {
-       .type = BPF_MAP_TYPE_HASH,
-       .key_size = sizeof(__u32),
-       .value_size = sizeof(long),
-       .max_entries = 2,
-};
+struct {
+       __uint(type, BPF_MAP_TYPE_HASH);
+       __type(key, __u32);
+       __type(value, long);
+       __uint(max_entries, 2);
+} htab SEC(".maps");
 
-struct bpf_map_def SEC("maps") array = {
-       .type = BPF_MAP_TYPE_ARRAY,
-       .key_size = sizeof(__u32),
-       .value_size = sizeof(long),
-       .max_entries = 2,
-};
+struct {
+       __uint(type, BPF_MAP_TYPE_ARRAY);
+       __type(key, __u32);
+       __type(value, long);
+       __uint(max_entries, 2);
+} array SEC(".maps");
 
 /* Sample program which should always load for testing control paths. */
 SEC(".text") int func()
index 95d5b94..c9abfe3 100644 (file)
@@ -7,8 +7,6 @@ int bpf_prog1(struct __sk_buff *skb)
 {
        void *data_end = (void *)(long) skb->data_end;
        void *data = (void *)(long) skb->data;
-       __u32 lport = skb->local_port;
-       __u32 rport = skb->remote_port;
        __u8 *d = data;
        int err;
 
index 79c8139..d0298dc 100644 (file)
@@ -73,17 +73,17 @@ int _getsockopt(struct bpf_sockopt *ctx)
                 */
 
                if (optval + sizeof(struct tcp_zerocopy_receive) > optval_end)
-                       return 0; /* EPERM, bounds check */
+                       return 0; /* bounds check */
 
                if (((struct tcp_zerocopy_receive *)optval)->address != 0)
-                       return 0; /* EPERM, unexpected data */
+                       return 0; /* unexpected data */
 
                return 1;
        }
 
        if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
                if (optval + 1 > optval_end)
-                       return 0; /* EPERM, bounds check */
+                       return 0; /* bounds check */
 
                ctx->retval = 0; /* Reset system call return value to zero */
 
@@ -96,24 +96,24 @@ int _getsockopt(struct bpf_sockopt *ctx)
                 * bytes of data.
                 */
                if (optval_end - optval != page_size)
-                       return 0; /* EPERM, unexpected data size */
+                       return 0; /* unexpected data size */
 
                return 1;
        }
 
        if (ctx->level != SOL_CUSTOM)
-               return 0; /* EPERM, deny everything except custom level */
+               return 0; /* deny everything except custom level */
 
        if (optval + 1 > optval_end)
-               return 0; /* EPERM, bounds check */
+               return 0; /* bounds check */
 
        storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
                                     BPF_SK_STORAGE_GET_F_CREATE);
        if (!storage)
-               return 0; /* EPERM, couldn't get sk storage */
+               return 0; /* couldn't get sk storage */
 
        if (!ctx->retval)
-               return 0; /* EPERM, kernel should not have handled
+               return 0; /* kernel should not have handled
                           * SOL_CUSTOM, something is wrong!
                           */
        ctx->retval = 0; /* Reset system call return value to zero */
@@ -152,7 +152,7 @@ int _setsockopt(struct bpf_sockopt *ctx)
                /* Overwrite SO_SNDBUF value */
 
                if (optval + sizeof(__u32) > optval_end)
-                       return 0; /* EPERM, bounds check */
+                       return 0; /* bounds check */
 
                *(__u32 *)optval = 0x55AA;
                ctx->optlen = 4;
@@ -164,7 +164,7 @@ int _setsockopt(struct bpf_sockopt *ctx)
                /* Always use cubic */
 
                if (optval + 5 > optval_end)
-                       return 0; /* EPERM, bounds check */
+                       return 0; /* bounds check */
 
                memcpy(optval, "cubic", 5);
                ctx->optlen = 5;
@@ -175,10 +175,10 @@ int _setsockopt(struct bpf_sockopt *ctx)
        if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
                /* Original optlen is larger than PAGE_SIZE. */
                if (ctx->optlen != page_size * 2)
-                       return 0; /* EPERM, unexpected data size */
+                       return 0; /* unexpected data size */
 
                if (optval + 1 > optval_end)
-                       return 0; /* EPERM, bounds check */
+                       return 0; /* bounds check */
 
                /* Make sure we can trim the buffer. */
                optval[0] = 0;
@@ -189,21 +189,21 @@ int _setsockopt(struct bpf_sockopt *ctx)
                 * bytes of data.
                 */
                if (optval_end - optval != page_size)
-                       return 0; /* EPERM, unexpected data size */
+                       return 0; /* unexpected data size */
 
                return 1;
        }
 
        if (ctx->level != SOL_CUSTOM)
-               return 0; /* EPERM, deny everything except custom level */
+               return 0; /* deny everything except custom level */
 
        if (optval + 1 > optval_end)
-               return 0; /* EPERM, bounds check */
+               return 0; /* bounds check */
 
        storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
                                     BPF_SK_STORAGE_GET_F_CREATE);
        if (!storage)
-               return 0; /* EPERM, couldn't get sk storage */
+               return 0; /* couldn't get sk storage */
 
        storage->val = optval[0];
        ctx->optlen = -1; /* BPF has consumed this option, don't call kernel
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
new file mode 100644 (file)
index 0000000..f00a973
--- /dev/null
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+#define EAFNOSUPPORT 97
+#define EPROTO 71
+#define ENONET 64
+#define EINVAL 22
+#define ENOENT 2
+
+int test_einval_bpf_tuple = 0;
+int test_einval_reserved = 0;
+int test_einval_netns_id = 0;
+int test_einval_len_opts = 0;
+int test_eproto_l4proto = 0;
+int test_enonet_netns_id = 0;
+int test_enoent_lookup = 0;
+int test_eafnosupport = 0;
+
+struct nf_conn;
+
+struct bpf_ct_opts___local {
+       s32 netns_id;
+       s32 error;
+       u8 l4proto;
+       u8 reserved[3];
+} __attribute__((preserve_access_index));
+
+struct nf_conn *bpf_xdp_ct_lookup(struct xdp_md *, struct bpf_sock_tuple *, u32,
+                                 struct bpf_ct_opts___local *, u32) __ksym;
+struct nf_conn *bpf_skb_ct_lookup(struct __sk_buff *, struct bpf_sock_tuple *, u32,
+                                 struct bpf_ct_opts___local *, u32) __ksym;
+void bpf_ct_release(struct nf_conn *) __ksym;
+
+static __always_inline void
+nf_ct_test(struct nf_conn *(*func)(void *, struct bpf_sock_tuple *, u32,
+                                  struct bpf_ct_opts___local *, u32),
+          void *ctx)
+{
+       struct bpf_ct_opts___local opts_def = { .l4proto = IPPROTO_TCP, .netns_id = -1 };
+       struct bpf_sock_tuple bpf_tuple;
+       struct nf_conn *ct;
+
+       __builtin_memset(&bpf_tuple, 0, sizeof(bpf_tuple.ipv4));
+
+       ct = func(ctx, NULL, 0, &opts_def, sizeof(opts_def));
+       if (ct)
+               bpf_ct_release(ct);
+       else
+               test_einval_bpf_tuple = opts_def.error;
+
+       opts_def.reserved[0] = 1;
+       ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def));
+       opts_def.reserved[0] = 0;
+       opts_def.l4proto = IPPROTO_TCP;
+       if (ct)
+               bpf_ct_release(ct);
+       else
+               test_einval_reserved = opts_def.error;
+
+       opts_def.netns_id = -2;
+       ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def));
+       opts_def.netns_id = -1;
+       if (ct)
+               bpf_ct_release(ct);
+       else
+               test_einval_netns_id = opts_def.error;
+
+       ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def) - 1);
+       if (ct)
+               bpf_ct_release(ct);
+       else
+               test_einval_len_opts = opts_def.error;
+
+       opts_def.l4proto = IPPROTO_ICMP;
+       ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def));
+       opts_def.l4proto = IPPROTO_TCP;
+       if (ct)
+               bpf_ct_release(ct);
+       else
+               test_eproto_l4proto = opts_def.error;
+
+       opts_def.netns_id = 0xf00f;
+       ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def));
+       opts_def.netns_id = -1;
+       if (ct)
+               bpf_ct_release(ct);
+       else
+               test_enonet_netns_id = opts_def.error;
+
+       ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def));
+       if (ct)
+               bpf_ct_release(ct);
+       else
+               test_enoent_lookup = opts_def.error;
+
+       ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4) - 1, &opts_def, sizeof(opts_def));
+       if (ct)
+               bpf_ct_release(ct);
+       else
+               test_eafnosupport = opts_def.error;
+}
+
+SEC("xdp")
+int nf_xdp_ct_test(struct xdp_md *ctx)
+{
+       nf_ct_test((void *)bpf_xdp_ct_lookup, ctx);
+       return 0;
+}
+
+SEC("tc")
+int nf_skb_ct_test(struct __sk_buff *ctx)
+{
+       nf_ct_test((void *)bpf_skb_ct_lookup, ctx);
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
index 160ead6..07c94df 100644 (file)
@@ -9,12 +9,15 @@ struct ipv_counts {
        unsigned int v6;
 };
 
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
 struct bpf_map_def SEC("maps") btf_map = {
        .type = BPF_MAP_TYPE_ARRAY,
        .key_size = sizeof(int),
        .value_size = sizeof(struct ipv_counts),
        .max_entries = 4,
 };
+#pragma GCC diagnostic pop
 
 BPF_ANNOTATE_KV_PAIR(btf_map, int, struct ipv_counts);
 
index 1884a5b..762671a 100644 (file)
@@ -9,6 +9,8 @@ struct ipv_counts {
        unsigned int v6;
 };
 
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
 /* just to validate we can handle maps in multiple sections */
 struct bpf_map_def SEC("maps") btf_map_legacy = {
        .type = BPF_MAP_TYPE_ARRAY,
@@ -16,6 +18,7 @@ struct bpf_map_def SEC("maps") btf_map_legacy = {
        .value_size = sizeof(long long),
        .max_entries = 4,
 };
+#pragma GCC diagnostic pop
 
 BPF_ANNOTATE_KV_PAIR(btf_map_legacy, int, struct ipv_counts);
 
index 15e0f99..1dabb88 100644 (file)
@@ -8,12 +8,12 @@ struct ipv_counts {
        unsigned int v6;
 };
 
-struct bpf_map_def SEC("maps") btf_map = {
-       .type = BPF_MAP_TYPE_ARRAY,
-       .key_size = sizeof(int),
-       .value_size = sizeof(struct ipv_counts),
-       .max_entries = 4,
-};
+struct {
+       __uint(type, BPF_MAP_TYPE_ARRAY);
+       __uint(key_size, sizeof(int));
+       __uint(value_size, sizeof(struct ipv_counts));
+       __uint(max_entries, 4);
+} btf_map SEC(".maps");
 
 __attribute__((noinline))
 int test_long_fname_2(void)
index c304cd5..37aacc6 100644 (file)
 
 #define NUM_CGROUP_LEVELS      4
 
-struct bpf_map_def SEC("maps") cgroup_ids = {
-       .type = BPF_MAP_TYPE_ARRAY,
-       .key_size = sizeof(__u32),
-       .value_size = sizeof(__u64),
-       .max_entries = NUM_CGROUP_LEVELS,
-};
+struct {
+       __uint(type, BPF_MAP_TYPE_ARRAY);
+       __type(key, __u32);
+       __type(value, __u64);
+       __uint(max_entries, NUM_CGROUP_LEVELS);
+} cgroup_ids SEC(".maps");
 
 static __always_inline void log_nth_level(struct __sk_buff *skb, __u32 level)
 {
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_progs_query.c b/tools/testing/selftests/bpf/progs/test_sockmap_progs_query.c
new file mode 100644 (file)
index 0000000..9d58d61
--- /dev/null
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+struct {
+       __uint(type, BPF_MAP_TYPE_SOCKMAP);
+       __uint(max_entries, 1);
+       __type(key, __u32);
+       __type(value, __u64);
+} sock_map SEC(".maps");
+
+SEC("sk_skb")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+       return SK_PASS;
+}
+
+SEC("sk_msg")
+int prog_skmsg_verdict(struct sk_msg_md *msg)
+{
+       return SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
index bf28814..950a70b 100644 (file)
 #define THROTTLE_RATE_BPS (5 * 1000 * 1000)
 
 /* flow_key => last_tstamp timestamp used */
-struct bpf_map_def SEC("maps") flow_map = {
-       .type = BPF_MAP_TYPE_HASH,
-       .key_size = sizeof(uint32_t),
-       .value_size = sizeof(uint64_t),
-       .max_entries = 1,
-};
+struct {
+       __uint(type, BPF_MAP_TYPE_HASH);
+       __type(key, uint32_t);
+       __type(value, uint64_t);
+       __uint(max_entries, 1);
+} flow_map SEC(".maps");
 
 static inline int throttle_flow(struct __sk_buff *skb)
 {
index cd747cd..6edebce 100644 (file)
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_endian.h>
 
-struct bpf_map_def SEC("maps") results = {
-       .type = BPF_MAP_TYPE_ARRAY,
-       .key_size = sizeof(__u32),
-       .value_size = sizeof(__u32),
-       .max_entries = 3,
-};
+struct {
+       __uint(type, BPF_MAP_TYPE_ARRAY);
+       __type(key, __u32);
+       __type(value, __u32);
+       __uint(max_entries, 3);
+} results SEC(".maps");
 
 static __always_inline __s64 gen_syncookie(void *data_end, struct bpf_sock *sk,
                                           void *iph, __u32 ip_size,
index 199c61b..53b64c9 100644 (file)
@@ -7,11 +7,10 @@ int _xdp_adjust_tail_grow(struct xdp_md *xdp)
 {
        void *data_end = (void *)(long)xdp->data_end;
        void *data = (void *)(long)xdp->data;
-       unsigned int data_len;
+       int data_len = bpf_xdp_get_buff_len(xdp);
        int offset = 0;
 
        /* Data length determine test case */
-       data_len = data_end - data;
 
        if (data_len == 54) { /* sizeof(pkt_v4) */
                offset = 4096; /* test too large offset */
@@ -20,7 +19,12 @@ int _xdp_adjust_tail_grow(struct xdp_md *xdp)
        } else if (data_len == 64) {
                offset = 128;
        } else if (data_len == 128) {
-               offset = 4096 - 256 - 320 - data_len; /* Max tail grow 3520 */
+               /* Max tail grow 3520 */
+               offset = 4096 - 256 - 320 - data_len;
+       } else if (data_len == 9000) {
+               offset = 10;
+       } else if (data_len == 9001) {
+               offset = 4096;
        } else {
                return XDP_ABORTED; /* No matching test */
        }
index b744825..ca68c03 100644 (file)
 SEC("xdp")
 int _xdp_adjust_tail_shrink(struct xdp_md *xdp)
 {
-       void *data_end = (void *)(long)xdp->data_end;
-       void *data = (void *)(long)xdp->data;
+       __u8 *data_end = (void *)(long)xdp->data_end;
+       __u8 *data = (void *)(long)xdp->data;
        int offset = 0;
 
-       if (data_end - data == 54) /* sizeof(pkt_v4) */
+       switch (bpf_xdp_get_buff_len(xdp)) {
+       case 54:
+               /* sizeof(pkt_v4) */
                offset = 256; /* shrink too much */
-       else
+               break;
+       case 9000:
+               /* non-linear buff test cases */
+               if (data + 1 > data_end)
+                       return XDP_DROP;
+
+               switch (data[0]) {
+               case 0:
+                       offset = 10;
+                       break;
+               case 1:
+                       offset = 4100;
+                       break;
+               case 2:
+                       offset = 8200;
+                       break;
+               default:
+                       return XDP_DROP;
+               }
+               break;
+       default:
                offset = 20;
+               break;
+       }
        if (bpf_xdp_adjust_tail(xdp, 0 - offset))
                return XDP_DROP;
        return XDP_TX;
index 58cf434..3379d30 100644 (file)
@@ -49,7 +49,7 @@ int BPF_PROG(trace_on_entry, struct xdp_buff *xdp)
        void *data = (void *)(long)xdp->data;
 
        meta.ifindex = xdp->rxq->dev->ifindex;
-       meta.pkt_len = data_end - data;
+       meta.pkt_len = bpf_xdp_get_buff_len((struct xdp_md *)xdp);
        bpf_xdp_output(xdp, &perf_buf_map,
                       ((__u64) meta.pkt_len << 32) |
                       BPF_F_CURRENT_CPU,
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_update_frags.c b/tools/testing/selftests/bpf/progs/test_xdp_update_frags.c
new file mode 100644 (file)
index 0000000..2a3496d
--- /dev/null
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <bpf/bpf_helpers.h>
+
+int _version SEC("version") = 1;
+
+SEC("xdp.frags")
+int xdp_adjust_frags(struct xdp_md *xdp)
+{
+       __u8 *data_end = (void *)(long)xdp->data_end;
+       __u8 *data = (void *)(long)xdp->data;
+       __u8 val[16] = {};
+       __u32 offset;
+       int err;
+
+       if (data + sizeof(__u32) > data_end)
+               return XDP_DROP;
+
+       offset = *(__u32 *)data;
+       err = bpf_xdp_load_bytes(xdp, offset, val, sizeof(val));
+       if (err < 0)
+               return XDP_DROP;
+
+       if (val[0] != 0xaa || val[15] != 0xaa) /* marker */
+               return XDP_DROP;
+
+       val[0] = 0xbb; /* update the marker */
+       val[15] = 0xbb;
+       err = bpf_xdp_store_bytes(xdp, offset, val, sizeof(val));
+       if (err < 0)
+               return XDP_DROP;
+
+       return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c
new file mode 100644 (file)
index 0000000..62fb7cd
--- /dev/null
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+#define IFINDEX_LO     1
+
+struct {
+       __uint(type, BPF_MAP_TYPE_CPUMAP);
+       __uint(key_size, sizeof(__u32));
+       __uint(value_size, sizeof(struct bpf_cpumap_val));
+       __uint(max_entries, 4);
+} cpu_map SEC(".maps");
+
+SEC("xdp_cpumap/dummy_cm")
+int xdp_dummy_cm(struct xdp_md *ctx)
+{
+       return XDP_PASS;
+}
+
+SEC("xdp.frags/cpumap")
+int xdp_dummy_cm_frags(struct xdp_md *ctx)
+{
+       return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
index 5320250..48007f1 100644 (file)
@@ -33,4 +33,10 @@ int xdp_dummy_cm(struct xdp_md *ctx)
        return XDP_PASS;
 }
 
+SEC("xdp.frags/cpumap")
+int xdp_dummy_cm_frags(struct xdp_md *ctx)
+{
+       return XDP_PASS;
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c
new file mode 100644 (file)
index 0000000..e1caf51
--- /dev/null
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+       __uint(type, BPF_MAP_TYPE_DEVMAP);
+       __uint(key_size, sizeof(__u32));
+       __uint(value_size, sizeof(struct bpf_devmap_val));
+       __uint(max_entries, 4);
+} dm_ports SEC(".maps");
+
+/* valid program on DEVMAP entry via SEC name;
+ * has access to egress and ingress ifindex
+ */
+SEC("xdp_devmap/map_prog")
+int xdp_dummy_dm(struct xdp_md *ctx)
+{
+       return XDP_PASS;
+}
+
+SEC("xdp.frags/devmap")
+int xdp_dummy_dm_frags(struct xdp_md *ctx)
+{
+       return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
index 1e6b9c3..8ae11fa 100644 (file)
@@ -40,4 +40,11 @@ int xdp_dummy_dm(struct xdp_md *ctx)
 
        return XDP_PASS;
 }
+
+SEC("xdp.frags/devmap")
+int xdp_dummy_dm_frags(struct xdp_md *ctx)
+{
+       return XDP_PASS;
+}
+
 char _license[] SEC("license") = "GPL";
index 76cd903..29bbaa5 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/if_ether.h>
 #include <linux/btf.h>
 
+#include <bpf/btf.h>
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
 
@@ -66,6 +67,11 @@ static bool unpriv_disabled = false;
 static int skips;
 static bool verbose = false;
 
+struct kfunc_btf_id_pair {
+       const char *kfunc;
+       int insn_idx;
+};
+
 struct bpf_test {
        const char *descr;
        struct bpf_insn insns[MAX_INSNS];
@@ -92,6 +98,7 @@ struct bpf_test {
        int fixup_map_reuseport_array[MAX_FIXUPS];
        int fixup_map_ringbuf[MAX_FIXUPS];
        int fixup_map_timer[MAX_FIXUPS];
+       struct kfunc_btf_id_pair fixup_kfunc_btf_id[MAX_FIXUPS];
        /* Expected verifier log output for result REJECT or VERBOSE_ACCEPT.
         * Can be a tab-separated sequence of expected strings. An empty string
         * means no log verification.
@@ -744,6 +751,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
        int *fixup_map_reuseport_array = test->fixup_map_reuseport_array;
        int *fixup_map_ringbuf = test->fixup_map_ringbuf;
        int *fixup_map_timer = test->fixup_map_timer;
+       struct kfunc_btf_id_pair *fixup_kfunc_btf_id = test->fixup_kfunc_btf_id;
 
        if (test->fill_helper) {
                test->fill_insns = calloc(MAX_TEST_INSNS, sizeof(struct bpf_insn));
@@ -936,6 +944,26 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
                        fixup_map_timer++;
                } while (*fixup_map_timer);
        }
+
+       /* Patch in kfunc BTF IDs */
+       if (fixup_kfunc_btf_id->kfunc) {
+               struct btf *btf;
+               int btf_id;
+
+               do {
+                       btf_id = 0;
+                       btf = btf__load_vmlinux_btf();
+                       if (btf) {
+                               btf_id = btf__find_by_name_kind(btf,
+                                                               fixup_kfunc_btf_id->kfunc,
+                                                               BTF_KIND_FUNC);
+                               btf_id = btf_id < 0 ? 0 : btf_id;
+                       }
+                       btf__free(btf);
+                       prog[fixup_kfunc_btf_id->insn_idx].imm = btf_id;
+                       fixup_kfunc_btf_id++;
+               } while (fixup_kfunc_btf_id->kfunc);
+       }
 }
 
 struct libcap {
index d7b74eb..829be2b 100644 (file)
        .result  = ACCEPT,
 },
 {
+       "calls: invalid kfunc call: ptr_to_mem to struct with non-scalar",
+       .insns = {
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       .result = REJECT,
+       .errstr = "arg#0 pointer type STRUCT prog_test_fail1 must point to scalar",
+       .fixup_kfunc_btf_id = {
+               { "bpf_kfunc_call_test_fail1", 2 },
+       },
+},
+{
+       "calls: invalid kfunc call: ptr_to_mem to struct with nesting depth > 4",
+       .insns = {
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       .result = REJECT,
+       .errstr = "max struct nesting depth exceeded\narg#0 pointer type STRUCT prog_test_fail2",
+       .fixup_kfunc_btf_id = {
+               { "bpf_kfunc_call_test_fail2", 2 },
+       },
+},
+{
+       "calls: invalid kfunc call: ptr_to_mem to struct with FAM",
+       .insns = {
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       .result = REJECT,
+       .errstr = "arg#0 pointer type STRUCT prog_test_fail3 must point to scalar",
+       .fixup_kfunc_btf_id = {
+               { "bpf_kfunc_call_test_fail3", 2 },
+       },
+},
+{
+       "calls: invalid kfunc call: reg->type != PTR_TO_CTX",
+       .insns = {
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       .result = REJECT,
+       .errstr = "arg#0 expected pointer to ctx, but got PTR",
+       .fixup_kfunc_btf_id = {
+               { "bpf_kfunc_call_test_pass_ctx", 2 },
+       },
+},
+{
+       "calls: invalid kfunc call: void * not allowed in func proto without mem size arg",
+       .insns = {
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       .result = REJECT,
+       .errstr = "arg#0 pointer type UNKNOWN  must point to scalar",
+       .fixup_kfunc_btf_id = {
+               { "bpf_kfunc_call_test_mem_len_fail1", 2 },
+       },
+},
+{
        "calls: basic sanity",
        .insns = {
        BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
index 0a5d23d..ffa5502 100644 (file)
@@ -906,7 +906,10 @@ static bool rx_stats_are_valid(struct ifobject *ifobject)
                        return true;
                case STAT_TEST_RX_FULL:
                        xsk_stat = stats.rx_ring_full;
-                       expected_stat -= RX_FULL_RXQSIZE;
+                       if (ifobject->umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS)
+                               expected_stat = ifobject->umem->num_frames - RX_FULL_RXQSIZE;
+                       else
+                               expected_stat = XSK_RING_PROD__DEFAULT_NUM_DESCS - RX_FULL_RXQSIZE;
                        break;
                case STAT_TEST_RX_FILL_EMPTY:
                        xsk_stat = stats.rx_fill_ring_empty_descs;
index 43ea840..3b04899 100755 (executable)
@@ -96,7 +96,7 @@ fib_rule6_del()
 
 fib_rule6_del_by_pref()
 {
-       pref=$($IP -6 rule show | grep "$1 lookup $TABLE" | cut -d ":" -f 1)
+       pref=$($IP -6 rule show $1 table $RTABLE | cut -d ":" -f 1)
        $IP -6 rule del pref $pref
 }
 
@@ -104,17 +104,21 @@ fib_rule6_test_match_n_redirect()
 {
        local match="$1"
        local getmatch="$2"
+       local description="$3"
 
        $IP -6 rule add $match table $RTABLE
        $IP -6 route get $GW_IP6 $getmatch | grep -q "table $RTABLE"
-       log_test $? 0 "rule6 check: $1"
+       log_test $? 0 "rule6 check: $description"
 
        fib_rule6_del_by_pref "$match"
-       log_test $? 0 "rule6 del by pref: $match"
+       log_test $? 0 "rule6 del by pref: $description"
 }
 
 fib_rule6_test()
 {
+       local getmatch
+       local match
+
        # setup the fib rule redirect route
        $IP -6 route add table $RTABLE default via $GW_IP6 dev $DEV onlink
 
@@ -165,7 +169,7 @@ fib_rule4_del()
 
 fib_rule4_del_by_pref()
 {
-       pref=$($IP rule show | grep "$1 lookup $TABLE" | cut -d ":" -f 1)
+       pref=$($IP rule show $1 table $RTABLE | cut -d ":" -f 1)
        $IP rule del pref $pref
 }
 
@@ -173,17 +177,21 @@ fib_rule4_test_match_n_redirect()
 {
        local match="$1"
        local getmatch="$2"
+       local description="$3"
 
        $IP rule add $match table $RTABLE
        $IP route get $GW_IP4 $getmatch | grep -q "table $RTABLE"
-       log_test $? 0 "rule4 check: $1"
+       log_test $? 0 "rule4 check: $description"
 
        fib_rule4_del_by_pref "$match"
-       log_test $? 0 "rule4 del by pref: $match"
+       log_test $? 0 "rule4 del by pref: $description"
 }
 
 fib_rule4_test()
 {
+       local getmatch
+       local match
+
        # setup the fib rule redirect route
        $IP route add table $RTABLE default via $GW_IP4 dev $DEV onlink
 
@@ -192,11 +200,11 @@ fib_rule4_test()
 
        # need enable forwarding and disable rp_filter temporarily as all the
        # addresses are in the same subnet and egress device == ingress device.
-       ip netns exec testns sysctl -w net.ipv4.ip_forward=1
-       ip netns exec testns sysctl -w net.ipv4.conf.$DEV.rp_filter=0
+       ip netns exec testns sysctl -qw net.ipv4.ip_forward=1
+       ip netns exec testns sysctl -qw net.ipv4.conf.$DEV.rp_filter=0
        match="from $SRC_IP iif $DEV"
        fib_rule4_test_match_n_redirect "$match" "$match" "iif redirect to table"
-       ip netns exec testns sysctl -w net.ipv4.ip_forward=0
+       ip netns exec testns sysctl -qw net.ipv4.ip_forward=0
 
        match="tos 0x10"
        fib_rule4_test_match_n_redirect "$match" "$match" "tos redirect to table"
index b90dff8..64bd00f 100755 (executable)
@@ -28,8 +28,9 @@ h2_destroy()
 
 switch_create()
 {
-       # 10 Seconds ageing time.
-       ip link add dev br0 type bridge vlan_filtering 1 ageing_time 1000 \
+       ip link add dev br0 type bridge \
+               vlan_filtering 1 \
+               ageing_time $LOW_AGEING_TIME \
                mcast_snooping 0
 
        ip link set dev $swp1 master br0
index c15c6c8..1c8a260 100755 (executable)
@@ -27,8 +27,9 @@ h2_destroy()
 
 switch_create()
 {
-       # 10 Seconds ageing time.
-       ip link add dev br0 type bridge ageing_time 1000 mcast_snooping 0
+       ip link add dev br0 type bridge \
+               ageing_time $LOW_AGEING_TIME \
+               mcast_snooping 0
 
        ip link set dev $swp1 master br0
        ip link set dev $swp2 master br0
index b0980a2..4a54650 100644 (file)
@@ -41,6 +41,8 @@ NETIF_CREATE=yes
 # Timeout (in seconds) before ping exits regardless of how many packets have
 # been sent or received
 PING_TIMEOUT=5
+# Minimum ageing_time (in centiseconds) supported by hardware
+LOW_AGEING_TIME=1000
 # Flag for tc match, supposed to be skip_sw/skip_hw which means do not process
 # filter by software/hardware
 TC_FLAG=skip_hw
index 7da783d..e7e434a 100644 (file)
@@ -24,6 +24,7 @@ PING_COUNT=${PING_COUNT:=10}
 PING_TIMEOUT=${PING_TIMEOUT:=5}
 WAIT_TIMEOUT=${WAIT_TIMEOUT:=20}
 INTERFACE_TIMEOUT=${INTERFACE_TIMEOUT:=600}
+LOW_AGEING_TIME=${LOW_AGEING_TIME:=1000}
 REQUIRE_JQ=${REQUIRE_JQ:=yes}
 REQUIRE_MZ=${REQUIRE_MZ:=yes}
 
index b8bdbec..bd106c7 100755 (executable)
@@ -289,7 +289,7 @@ do_transfer()
        addr_nr_ns1="$7"
        addr_nr_ns2="$8"
        speed="$9"
-       bkup="${10}"
+       sflags="${10}"
 
        port=$((10000+$TEST_COUNT))
        TEST_COUNT=$((TEST_COUNT+1))
@@ -461,14 +461,13 @@ do_transfer()
                fi
        fi
 
-       if [ ! -z $bkup ]; then
+       if [ ! -z $sflags ]; then
                sleep 1
                for netns in "$ns1" "$ns2"; do
                        dump=(`ip netns exec $netns ./pm_nl_ctl dump`)
                        if [ ${#dump[@]} -gt 0 ]; then
                                addr=${dump[${#dump[@]} - 1]}
-                               backup="ip netns exec $netns ./pm_nl_ctl set $addr flags $bkup"
-                               $backup
+                               ip netns exec $netns ./pm_nl_ctl set $addr flags $sflags
                        fi
                done
        fi
@@ -545,7 +544,7 @@ run_tests()
        addr_nr_ns1="${5:-0}"
        addr_nr_ns2="${6:-0}"
        speed="${7:-fast}"
-       bkup="${8:-""}"
+       sflags="${8:-""}"
        lret=0
        oldin=""
 
@@ -574,7 +573,7 @@ run_tests()
        fi
 
        do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} \
-               ${test_linkfail} ${addr_nr_ns1} ${addr_nr_ns2} ${speed} ${bkup}
+               ${test_linkfail} ${addr_nr_ns1} ${addr_nr_ns2} ${speed} ${sflags}
        lret=$?
 }
 
@@ -1888,6 +1887,44 @@ fullmesh_tests()
        run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_2 slow
        chk_join_nr "fullmesh test 1x2, limited" 4 4 4
        chk_add_nr 1 1
+
+       # set fullmesh flag
+       reset
+       ip netns exec $ns1 ./pm_nl_ctl limits 4 4
+       ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags subflow
+       ip netns exec $ns2 ./pm_nl_ctl limits 4 4
+       run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow fullmesh
+       chk_join_nr "set fullmesh flag test" 2 2 2
+       chk_rm_nr 0 1
+
+       # set nofullmesh flag
+       reset
+       ip netns exec $ns1 ./pm_nl_ctl limits 4 4
+       ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags subflow,fullmesh
+       ip netns exec $ns2 ./pm_nl_ctl limits 4 4
+       run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_1 slow nofullmesh
+       chk_join_nr "set nofullmesh flag test" 2 2 2
+       chk_rm_nr 0 1
+
+       # set backup,fullmesh flags
+       reset
+       ip netns exec $ns1 ./pm_nl_ctl limits 4 4
+       ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags subflow
+       ip netns exec $ns2 ./pm_nl_ctl limits 4 4
+       run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow backup,fullmesh
+       chk_join_nr "set backup,fullmesh flags test" 2 2 2
+       chk_prio_nr 0 1
+       chk_rm_nr 0 1
+
+       # set nobackup,nofullmesh flags
+       reset
+       ip netns exec $ns1 ./pm_nl_ctl limits 4 4
+       ip netns exec $ns2 ./pm_nl_ctl limits 4 4
+       ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow,backup,fullmesh
+       run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow nobackup,nofullmesh
+       chk_join_nr "set nobackup,nofullmesh flags test" 2 2 2
+       chk_prio_nr 0 1
+       chk_rm_nr 0 1
 }
 
 all_tests()
index 3547845..152b84e 100644 (file)
@@ -28,7 +28,7 @@ static void syntax(char *argv[])
        fprintf(stderr, "\tadd [flags signal|subflow|backup|fullmesh] [id <nr>] [dev <name>] <ip>\n");
        fprintf(stderr, "\tdel <id> [<ip>]\n");
        fprintf(stderr, "\tget <id>\n");
-       fprintf(stderr, "\tset <ip> [flags backup|nobackup]\n");
+       fprintf(stderr, "\tset <ip> [flags backup|nobackup|fullmesh|nofullmesh]\n");
        fprintf(stderr, "\tflush\n");
        fprintf(stderr, "\tdump\n");
        fprintf(stderr, "\tlimits [<rcv addr max> <subflow max>]\n");
@@ -704,12 +704,14 @@ int set_flags(int fd, int pm_family, int argc, char *argv[])
                        if (++arg >= argc)
                                error(1, 0, " missing flags value");
 
-                       /* do not support flag list yet */
                        for (str = argv[arg]; (tok = strtok(str, ","));
                             str = NULL) {
                                if (!strcmp(tok, "backup"))
                                        flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
-                               else if (strcmp(tok, "nobackup"))
+                               else if (!strcmp(tok, "fullmesh"))
+                                       flags |= MPTCP_PM_ADDR_FLAG_FULLMESH;
+                               else if (strcmp(tok, "nobackup") &&
+                                        strcmp(tok, "nofullmesh"))
                                        error(1, errno,
                                              "unknown flag %s", argv[arg]);
                        }
index aee631c..044bc0e 100644 (file)
@@ -325,8 +325,8 @@ int main(int argc, char **argv)
        struct ifreq device;
        struct ifreq hwtstamp;
        struct hwtstamp_config hwconfig, hwconfig_requested;
-       struct so_timestamping so_timestamping_get = { 0, -1 };
-       struct so_timestamping so_timestamping = { 0, -1 };
+       struct so_timestamping so_timestamping_get = { 0, 0 };
+       struct so_timestamping so_timestamping = { 0, 0 };
        struct sockaddr_in addr;
        struct ip_mreq imr;
        struct in_addr iaddr;