OSDN Git Service

net/sched: act_police: add support for packet-per-second policing
authorBaowen Zheng <baowen.zheng@corigine.com>
Fri, 12 Mar 2021 14:08:31 +0000 (15:08 +0100)
committerDavid S. Miller <davem@davemloft.net>
Sat, 13 Mar 2021 22:18:09 +0000 (14:18 -0800)
Allow a policer action to enforce a rate-limit based on packets-per-second,
configurable using a packet-per-second rate and burst parameters.

e.g.
tc filter add dev tap1 parent ffff: u32 match \
        u32 0 0 police pkts_rate 3000 pkts_burst 1000

Testing was unable to uncover a performance impact of this change on
existing features.

Signed-off-by: Baowen Zheng <baowen.zheng@corigine.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: Louis Peens <louis.peens@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/net/sch_generic.h
include/net/tc_act/tc_police.h
include/uapi/linux/pkt_cls.h
net/sched/act_police.c
net/sched/sch_generic.c

index 2d6eb60..f7a6e14 100644 (file)
@@ -1242,6 +1242,20 @@ static inline void psched_ratecfg_getrate(struct tc_ratespec *res,
        res->linklayer = (r->linklayer & TC_LINKLAYER_MASK);
 }
 
+struct psched_pktrate {
+       u64     rate_pkts_ps; /* packets per second */
+       u32     mult;
+       u8      shift;
+};
+
+static inline u64 psched_pkt2t_ns(const struct psched_pktrate *r,
+                                 unsigned int pkt_num)
+{
+       return ((u64)pkt_num * r->mult) >> r->shift;
+}
+
+void psched_ppscfg_precompute(struct psched_pktrate *r, u64 pktrate64);
+
 /* Mini Qdisc serves for specific needs of ingress/clsact Qdisc.
  * The fast path only needs to access filter list and to update stats
  */
index ae117f7..7264951 100644 (file)
@@ -10,10 +10,13 @@ struct tcf_police_params {
        s64                     tcfp_burst;
        u32                     tcfp_mtu;
        s64                     tcfp_mtu_ptoks;
+       s64                     tcfp_pkt_burst;
        struct psched_ratecfg   rate;
        bool                    rate_present;
        struct psched_ratecfg   peak;
        bool                    peak_present;
+       struct psched_pktrate   ppsrate;
+       bool                    pps_present;
        struct rcu_head rcu;
 };
 
@@ -24,6 +27,7 @@ struct tcf_police {
        spinlock_t              tcfp_lock ____cacheline_aligned_in_smp;
        s64                     tcfp_toks;
        s64                     tcfp_ptoks;
+       s64                     tcfp_pkttoks;
        s64                     tcfp_t_c;
 };
 
@@ -99,14 +103,50 @@ static inline u32 tcf_police_burst(const struct tc_action *act)
 
 static inline u64 tcf_police_rate_pkt_ps(const struct tc_action *act)
 {
-       /* Not implemented */
-       return 0;
+       struct tcf_police *police = to_police(act);
+       struct tcf_police_params *params;
+
+       params = rcu_dereference_protected(police->params,
+                                          lockdep_is_held(&police->tcf_lock));
+       return params->ppsrate.rate_pkts_ps;
 }
 
 static inline u32 tcf_police_burst_pkt(const struct tc_action *act)
 {
-       /* Not implemented */
-       return 0;
+       struct tcf_police *police = to_police(act);
+       struct tcf_police_params *params;
+       u32 burst;
+
+       params = rcu_dereference_protected(police->params,
+                                          lockdep_is_held(&police->tcf_lock));
+
+       /*
+        *  "rate" pkts     "burst" nanoseconds
+        *  ------------ *  -------------------
+        *    1 second          2^6 ticks
+        *
+        * ------------------------------------
+        *        NSEC_PER_SEC nanoseconds
+        *        ------------------------
+        *              2^6 ticks
+        *
+        *    "rate" pkts    "burst" nanoseconds            2^6 ticks
+        *  = ------------ * ------------------- * ------------------------
+        *      1 second          2^6 ticks        NSEC_PER_SEC nanoseconds
+        *
+        *   "rate" * "burst"
+        * = ---------------- pkts/nanosecond
+        *    NSEC_PER_SEC^2
+        *
+        *
+        *   "rate" * "burst"
+        * = ---------------- pkts/second
+        *     NSEC_PER_SEC
+        */
+       burst = div_u64(params->tcfp_pkt_burst * params->ppsrate.rate_pkts_ps,
+                       NSEC_PER_SEC);
+
+       return burst;
 }
 
 static inline u32 tcf_police_tcfp_mtu(const struct tc_action *act)
index 7ea59cf..025c40f 100644 (file)
@@ -190,6 +190,8 @@ enum {
        TCA_POLICE_PAD,
        TCA_POLICE_RATE64,
        TCA_POLICE_PEAKRATE64,
+       TCA_POLICE_PKTRATE64,
+       TCA_POLICE_PKTBURST64,
        __TCA_POLICE_MAX
 #define TCA_POLICE_RESULT TCA_POLICE_RESULT
 };
index 8d8452b..0fab8de 100644 (file)
@@ -42,6 +42,8 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
        [TCA_POLICE_RESULT]     = { .type = NLA_U32 },
        [TCA_POLICE_RATE64]     = { .type = NLA_U64 },
        [TCA_POLICE_PEAKRATE64] = { .type = NLA_U64 },
+       [TCA_POLICE_PKTRATE64]  = { .type = NLA_U64, .min = 1 },
+       [TCA_POLICE_PKTBURST64] = { .type = NLA_U64, .min = 1 },
 };
 
 static int tcf_police_init(struct net *net, struct nlattr *nla,
@@ -61,6 +63,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
        bool exists = false;
        u32 index;
        u64 rate64, prate64;
+       u64 pps, ppsburst;
 
        if (nla == NULL)
                return -EINVAL;
@@ -142,6 +145,21 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
                }
        }
 
+       if ((tb[TCA_POLICE_PKTRATE64] && !tb[TCA_POLICE_PKTBURST64]) ||
+           (!tb[TCA_POLICE_PKTRATE64] && tb[TCA_POLICE_PKTBURST64])) {
+               NL_SET_ERR_MSG(extack,
+                              "Both or neither packet-per-second burst and rate must be provided");
+               err = -EINVAL;
+               goto failure;
+       }
+
+       if (tb[TCA_POLICE_PKTRATE64] && R_tab) {
+               NL_SET_ERR_MSG(extack,
+                              "packet-per-second and byte-per-second rate limits not allowed in same action");
+               err = -EINVAL;
+               goto failure;
+       }
+
        new = kzalloc(sizeof(*new), GFP_KERNEL);
        if (unlikely(!new)) {
                err = -ENOMEM;
@@ -183,6 +201,14 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
        if (tb[TCA_POLICE_AVRATE])
                new->tcfp_ewma_rate = nla_get_u32(tb[TCA_POLICE_AVRATE]);
 
+       if (tb[TCA_POLICE_PKTRATE64]) {
+               pps = nla_get_u64(tb[TCA_POLICE_PKTRATE64]);
+               ppsburst = nla_get_u64(tb[TCA_POLICE_PKTBURST64]);
+               new->pps_present = true;
+               new->tcfp_pkt_burst = PSCHED_TICKS2NS(ppsburst);
+               psched_ppscfg_precompute(&new->ppsrate, pps);
+       }
+
        spin_lock_bh(&police->tcf_lock);
        spin_lock_bh(&police->tcfp_lock);
        police->tcfp_t_c = ktime_get_ns();
@@ -217,8 +243,8 @@ static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a,
                          struct tcf_result *res)
 {
        struct tcf_police *police = to_police(a);
+       s64 now, toks, ppstoks = 0, ptoks = 0;
        struct tcf_police_params *p;
-       s64 now, toks, ptoks = 0;
        int ret;
 
        tcf_lastuse_update(&police->tcf_tm);
@@ -236,7 +262,7 @@ static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a,
        }
 
        if (qdisc_pkt_len(skb) <= p->tcfp_mtu) {
-               if (!p->rate_present) {
+               if (!p->rate_present && !p->pps_present) {
                        ret = p->tcfp_result;
                        goto end;
                }
@@ -251,14 +277,23 @@ static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a,
                        ptoks -= (s64)psched_l2t_ns(&p->peak,
                                                    qdisc_pkt_len(skb));
                }
-               toks += police->tcfp_toks;
-               if (toks > p->tcfp_burst)
-                       toks = p->tcfp_burst;
-               toks -= (s64)psched_l2t_ns(&p->rate, qdisc_pkt_len(skb));
-               if ((toks|ptoks) >= 0) {
+               if (p->rate_present) {
+                       toks += police->tcfp_toks;
+                       if (toks > p->tcfp_burst)
+                               toks = p->tcfp_burst;
+                       toks -= (s64)psched_l2t_ns(&p->rate, qdisc_pkt_len(skb));
+               } else if (p->pps_present) {
+                       ppstoks = min_t(s64, now - police->tcfp_t_c, p->tcfp_pkt_burst);
+                       ppstoks += police->tcfp_pkttoks;
+                       if (ppstoks > p->tcfp_pkt_burst)
+                               ppstoks = p->tcfp_pkt_burst;
+                       ppstoks -= (s64)psched_pkt2t_ns(&p->ppsrate, 1);
+               }
+               if ((toks | ptoks | ppstoks) >= 0) {
                        police->tcfp_t_c = now;
                        police->tcfp_toks = toks;
                        police->tcfp_ptoks = ptoks;
+                       police->tcfp_pkttoks = ppstoks;
                        spin_unlock_bh(&police->tcfp_lock);
                        ret = p->tcfp_result;
                        goto inc_drops;
@@ -331,6 +366,16 @@ static int tcf_police_dump(struct sk_buff *skb, struct tc_action *a,
                                      TCA_POLICE_PAD))
                        goto nla_put_failure;
        }
+       if (p->pps_present) {
+               if (nla_put_u64_64bit(skb, TCA_POLICE_PKTRATE64,
+                                     police->params->ppsrate.rate_pkts_ps,
+                                     TCA_POLICE_PAD))
+                       goto nla_put_failure;
+               if (nla_put_u64_64bit(skb, TCA_POLICE_PKTBURST64,
+                                     PSCHED_NS2TICKS(p->tcfp_pkt_burst),
+                                     TCA_POLICE_PAD))
+                       goto nla_put_failure;
+       }
        if (nla_put(skb, TCA_POLICE_TBF, sizeof(opt), &opt))
                goto nla_put_failure;
        if (p->tcfp_result &&
index 49eae93..44991ea 100644 (file)
@@ -1325,6 +1325,48 @@ void dev_shutdown(struct net_device *dev)
        WARN_ON(timer_pending(&dev->watchdog_timer));
 }
 
+/**
+ * psched_ratecfg_precompute__() - Pre-compute values for reciprocal division
+ * @rate:   Rate to compute reciprocal division values of
+ * @mult:   Multiplier for reciprocal division
+ * @shift:  Shift for reciprocal division
+ *
+ * The multiplier and shift for reciprocal division by rate are stored
+ * in mult and shift.
+ *
+ * The deal here is to replace a divide by a reciprocal one
+ * in fast path (a reciprocal divide is a multiply and a shift)
+ *
+ * Normal formula would be :
+ *  time_in_ns = (NSEC_PER_SEC * len) / rate_bps
+ *
+ * We compute mult/shift to use instead :
+ *  time_in_ns = (len * mult) >> shift;
+ *
+ * We try to get the highest possible mult value for accuracy,
+ * but have to make sure no overflows will ever happen.
+ *
+ * reciprocal_value() is not used here it doesn't handle 64-bit values.
+ */
+static void psched_ratecfg_precompute__(u64 rate, u32 *mult, u8 *shift)
+{
+       u64 factor = NSEC_PER_SEC;
+
+       *mult = 1;
+       *shift = 0;
+
+       if (rate <= 0)
+               return;
+
+       for (;;) {
+               *mult = div64_u64(factor, rate);
+               if (*mult & (1U << 31) || factor & (1ULL << 63))
+                       break;
+               factor <<= 1;
+               (*shift)++;
+       }
+}
+
 void psched_ratecfg_precompute(struct psched_ratecfg *r,
                               const struct tc_ratespec *conf,
                               u64 rate64)
@@ -1333,34 +1375,17 @@ void psched_ratecfg_precompute(struct psched_ratecfg *r,
        r->overhead = conf->overhead;
        r->rate_bytes_ps = max_t(u64, conf->rate, rate64);
        r->linklayer = (conf->linklayer & TC_LINKLAYER_MASK);
-       r->mult = 1;
-       /*
-        * The deal here is to replace a divide by a reciprocal one
-        * in fast path (a reciprocal divide is a multiply and a shift)
-        *
-        * Normal formula would be :
-        *  time_in_ns = (NSEC_PER_SEC * len) / rate_bps
-        *
-        * We compute mult/shift to use instead :
-        *  time_in_ns = (len * mult) >> shift;
-        *
-        * We try to get the highest possible mult value for accuracy,
-        * but have to make sure no overflows will ever happen.
-        */
-       if (r->rate_bytes_ps > 0) {
-               u64 factor = NSEC_PER_SEC;
-
-               for (;;) {
-                       r->mult = div64_u64(factor, r->rate_bytes_ps);
-                       if (r->mult & (1U << 31) || factor & (1ULL << 63))
-                               break;
-                       factor <<= 1;
-                       r->shift++;
-               }
-       }
+       psched_ratecfg_precompute__(r->rate_bytes_ps, &r->mult, &r->shift);
 }
 EXPORT_SYMBOL(psched_ratecfg_precompute);
 
+void psched_ppscfg_precompute(struct psched_pktrate *r, u64 pktrate64)
+{
+       r->rate_pkts_ps = pktrate64;
+       psched_ratecfg_precompute__(r->rate_pkts_ps, &r->mult, &r->shift);
+}
+EXPORT_SYMBOL(psched_ppscfg_precompute);
+
 static void mini_qdisc_rcu_func(struct rcu_head *head)
 {
 }