2 * net/sched/sch_api.c Packet scheduler API.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
18 #include <linux/config.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/sched.h>
22 #include <linux/string.h>
24 #include <linux/socket.h>
25 #include <linux/sockios.h>
27 #include <linux/errno.h>
28 #include <linux/interrupt.h>
29 #include <linux/netdevice.h>
30 #include <linux/skbuff.h>
31 #include <linux/rtnetlink.h>
32 #include <linux/init.h>
33 #include <linux/proc_fs.h>
34 #include <linux/kmod.h>
35 #include <linux/list.h>
38 #include <net/pkt_sched.h>
40 #include <asm/processor.h>
41 #include <asm/uaccess.h>
42 #include <asm/system.h>
43 #include <asm/bitops.h>
45 static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid,
46 struct Qdisc *old, struct Qdisc *new);
47 static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
48 struct Qdisc *q, unsigned long cl, int event);
55 This file consists of two interrelated parts:
57 1. queueing disciplines manager frontend.
58 2. traffic classes manager frontend.
60 Generally, queueing discipline ("qdisc") is a black box,
61 which is able to enqueue packets and to dequeue them (when
62 device is ready to send something) in order and at times
63 determined by algorithm hidden in it.
65 qdisc's are divided to two categories:
66 - "queues", which have no internal structure visible from outside.
67 - "schedulers", which split all the packets to "traffic classes",
68 using "packet classifiers" (look at cls_api.c)
70 In turn, classes may have child qdiscs (as rule, queues)
71 attached to them etc. etc. etc.
73 The goal of the routines in this file is to translate
74 information supplied by user in the form of handles
75 to more intelligible for kernel form, to make some sanity
76 checks and part of work, which is common to all qdiscs
77 and to provide rtnetlink notifications.
79 All real intelligent work is done inside qdisc modules.
83 Every discipline has two major routines: enqueue and dequeue.
87 dequeue usually returns a skb to send. It is allowed to return NULL,
88 but it does not mean that queue is empty, it just means that
89 discipline does not want to send anything this time.
90 Queue is really empty if q->q.qlen == 0.
91 For complicated disciplines with multiple queues q->q is not
92 real packet queue, but however q->q.qlen must be valid.
96 enqueue returns 0, if packet was enqueued successfully.
97 If packet (this one or another one) was dropped, it returns
99 NET_XMIT_DROP - this packet dropped
100 Expected action: do not backoff, but wait until queue will clear.
101 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
102 Expected action: backoff or ignore
103 NET_XMIT_POLICED - dropped by police.
104 Expected action: backoff or error to real-time apps.
110 requeues once dequeued packet. It is used for non-standard or
111 just buggy devices, which can defer output even if dev->tbusy=0.
115 returns qdisc to initial state: purge all buffers, clear all
116 timers, counters (except for statistics) etc.
120 initializes newly created qdisc.
124 destroys resources allocated by init and during lifetime of qdisc.
128 changes qdisc parameters.
131 /* Protects list of registered TC modules. It is pure SMP lock. */
132 static rwlock_t qdisc_mod_lock = RW_LOCK_UNLOCKED;
135 /************************************************
136 * Queueing disciplines manipulation. *
137 ************************************************/
140 /* The list of all installed queueing disciplines. */
142 static struct Qdisc_ops *qdisc_base = NULL;
144 /* Register/uregister queueing discipline */
146 int register_qdisc(struct Qdisc_ops *qops)
148 struct Qdisc_ops *q, **qp;
150 write_lock(&qdisc_mod_lock);
151 for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next) {
152 if (strcmp(qops->id, q->id) == 0) {
153 write_unlock(&qdisc_mod_lock);
158 if (qops->enqueue == NULL)
159 qops->enqueue = noop_qdisc_ops.enqueue;
160 if (qops->requeue == NULL)
161 qops->requeue = noop_qdisc_ops.requeue;
162 if (qops->dequeue == NULL)
163 qops->dequeue = noop_qdisc_ops.dequeue;
167 write_unlock(&qdisc_mod_lock);
171 int unregister_qdisc(struct Qdisc_ops *qops)
173 struct Qdisc_ops *q, **qp;
176 write_lock(&qdisc_mod_lock);
177 for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next)
185 write_unlock(&qdisc_mod_lock);
189 /* We know handle. Find qdisc among all qdisc's attached to device
190 (root qdisc, all its children, children of children etc.)
193 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
197 list_for_each_entry(q, &dev->qdisc_list, list) {
198 if (q->handle == handle)
204 struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
208 struct Qdisc_class_ops *cops = p->ops->cl_ops;
212 cl = cops->get(p, classid);
216 leaf = cops->leaf(p, cl);
221 /* Find queueing discipline by name */
223 struct Qdisc_ops *qdisc_lookup_ops(struct rtattr *kind)
225 struct Qdisc_ops *q = NULL;
228 read_lock(&qdisc_mod_lock);
229 for (q = qdisc_base; q; q = q->next) {
230 if (rtattr_strcmp(kind, q->id) == 0)
233 read_unlock(&qdisc_mod_lock);
238 static struct qdisc_rate_table *qdisc_rtab_list;
240 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct rtattr *tab)
242 struct qdisc_rate_table *rtab;
244 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
245 if (memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) == 0) {
251 if (tab == NULL || r->rate == 0 || r->cell_log == 0 || RTA_PAYLOAD(tab) != 1024)
254 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
258 memcpy(rtab->data, RTA_DATA(tab), 1024);
259 rtab->next = qdisc_rtab_list;
260 qdisc_rtab_list = rtab;
265 void qdisc_put_rtab(struct qdisc_rate_table *tab)
267 struct qdisc_rate_table *rtab, **rtabp;
269 if (!tab || --tab->refcnt)
272 for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) {
282 /* Allocate an unique handle from space managed by kernel */
284 u32 qdisc_alloc_handle(struct net_device *dev)
287 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
290 autohandle += TC_H_MAKE(0x10000U, 0);
291 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
292 autohandle = TC_H_MAKE(0x80000000U, 0);
293 } while (qdisc_lookup(dev, autohandle) && --i > 0);
295 return i>0 ? autohandle : 0;
298 /* Attach toplevel qdisc to device dev */
300 static struct Qdisc *
301 dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc)
303 struct Qdisc *oqdisc;
305 if (dev->flags & IFF_UP)
308 write_lock(&qdisc_tree_lock);
309 spin_lock_bh(&dev->queue_lock);
310 if (qdisc && qdisc->flags&TCQ_F_INGRESS) {
311 oqdisc = dev->qdisc_ingress;
312 /* Prune old scheduler */
313 if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) {
316 dev->qdisc_ingress = NULL;
318 dev->qdisc_ingress = qdisc;
323 oqdisc = dev->qdisc_sleeping;
325 /* Prune old scheduler */
326 if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1)
329 /* ... and graft new one */
332 dev->qdisc_sleeping = qdisc;
333 dev->qdisc = &noop_qdisc;
336 spin_unlock_bh(&dev->queue_lock);
337 write_unlock(&qdisc_tree_lock);
339 if (dev->flags & IFF_UP)
346 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
349 Old qdisc is not destroyed but returned in *old.
352 int qdisc_graft(struct net_device *dev, struct Qdisc *parent, u32 classid,
353 struct Qdisc *new, struct Qdisc **old)
356 struct Qdisc *q = *old;
359 if (parent == NULL) {
360 if (q && q->flags&TCQ_F_INGRESS) {
361 *old = dev_graft_qdisc(dev, q);
363 *old = dev_graft_qdisc(dev, new);
366 struct Qdisc_class_ops *cops = parent->ops->cl_ops;
371 unsigned long cl = cops->get(parent, classid);
373 err = cops->graft(parent, cl, new, old);
375 new->parent = classid;
376 cops->put(parent, cl);
384 Allocate and initialize new qdisc.
386 Parameters are passed via opt.
389 static struct Qdisc *
390 qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp)
393 struct rtattr *kind = tca[TCA_KIND-1];
394 struct Qdisc *sch = NULL;
395 struct Qdisc_ops *ops;
398 ops = qdisc_lookup_ops(kind);
400 if (ops==NULL && tca[TCA_KIND-1] != NULL) {
401 char module_name[4 + IFNAMSIZ + 1];
403 if (RTA_PAYLOAD(kind) <= IFNAMSIZ) {
404 sprintf(module_name, "sch_%s", (char*)RTA_DATA(kind));
405 request_module (module_name);
406 ops = qdisc_lookup_ops(kind);
415 size = sizeof(*sch) + ops->priv_size;
417 sch = kmalloc(size, GFP_KERNEL);
422 /* Grrr... Resolve race condition with module unload */
425 if (ops != qdisc_lookup_ops(kind))
428 memset(sch, 0, size);
430 INIT_LIST_HEAD(&sch->list);
431 skb_queue_head_init(&sch->q);
433 if (handle == TC_H_INGRESS)
434 sch->flags |= TCQ_F_INGRESS;
437 sch->enqueue = ops->enqueue;
438 sch->dequeue = ops->dequeue;
440 atomic_set(&sch->refcnt, 1);
441 sch->stats.lock = &dev->queue_lock;
443 handle = qdisc_alloc_handle(dev);
449 if (handle == TC_H_INGRESS)
450 sch->handle =TC_H_MAKE(TC_H_INGRESS, 0);
452 sch->handle = handle;
454 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS-1])) == 0) {
455 write_lock(&qdisc_tree_lock);
456 list_add_tail(&sch->list, &dev->qdisc_list);
457 write_unlock(&qdisc_tree_lock);
458 #ifdef CONFIG_NET_ESTIMATOR
460 qdisc_new_estimator(&sch->stats, tca[TCA_RATE-1]);
472 static int qdisc_change(struct Qdisc *sch, struct rtattr **tca)
474 if (tca[TCA_OPTIONS-1]) {
477 if (sch->ops->change == NULL)
479 err = sch->ops->change(sch, tca[TCA_OPTIONS-1]);
483 #ifdef CONFIG_NET_ESTIMATOR
484 if (tca[TCA_RATE-1]) {
485 qdisc_kill_estimator(&sch->stats);
486 qdisc_new_estimator(&sch->stats, tca[TCA_RATE-1]);
492 struct check_loop_arg
494 struct qdisc_walker w;
499 static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w);
501 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
503 struct check_loop_arg arg;
505 if (q->ops->cl_ops == NULL)
508 arg.w.stop = arg.w.skip = arg.w.count = 0;
509 arg.w.fn = check_loop_fn;
512 q->ops->cl_ops->walk(q, &arg.w);
513 return arg.w.stop ? -ELOOP : 0;
517 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
520 struct Qdisc_class_ops *cops = q->ops->cl_ops;
521 struct check_loop_arg *arg = (struct check_loop_arg *)w;
523 leaf = cops->leaf(q, cl);
525 if (leaf == arg->p || arg->depth > 7)
527 return check_loop(leaf, arg->p, arg->depth + 1);
536 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
538 struct tcmsg *tcm = NLMSG_DATA(n);
539 struct rtattr **tca = arg;
540 struct net_device *dev;
541 u32 clid = tcm->tcm_parent;
542 struct Qdisc *q = NULL;
543 struct Qdisc *p = NULL;
546 if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
550 if (clid != TC_H_ROOT) {
551 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
552 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
554 q = qdisc_leaf(p, clid);
555 } else { /* ingress */
556 q = dev->qdisc_ingress;
559 q = dev->qdisc_sleeping;
564 if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
567 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
571 if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
574 if (n->nlmsg_type == RTM_DELQDISC) {
579 if ((err = qdisc_graft(dev, p, clid, NULL, &q)) != 0)
582 qdisc_notify(skb, n, clid, q, NULL);
583 spin_lock_bh(&dev->queue_lock);
585 spin_unlock_bh(&dev->queue_lock);
588 qdisc_notify(skb, n, clid, NULL, q);
597 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
599 struct tcmsg *tcm = NLMSG_DATA(n);
600 struct rtattr **tca = arg;
601 struct net_device *dev;
602 u32 clid = tcm->tcm_parent;
603 struct Qdisc *q = NULL;
604 struct Qdisc *p = NULL;
607 if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
611 if (clid != TC_H_ROOT) {
612 if (clid != TC_H_INGRESS) {
613 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
615 q = qdisc_leaf(p, clid);
616 } else { /*ingress */
617 q = dev->qdisc_ingress;
620 q = dev->qdisc_sleeping;
623 /* It may be default qdisc, ignore it */
624 if (q && q->handle == 0)
627 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
628 if (tcm->tcm_handle) {
629 if (q && !(n->nlmsg_flags&NLM_F_REPLACE))
631 if (TC_H_MIN(tcm->tcm_handle))
633 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
635 if (n->nlmsg_flags&NLM_F_EXCL)
637 if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
640 (p && check_loop(q, p, 0)))
642 atomic_inc(&q->refcnt);
648 /* This magic test requires explanation.
650 * We know, that some child q is already
651 * attached to this parent and have choice:
652 * either to change it or to create/graft new one.
654 * 1. We are allowed to create/graft only
655 * if CREATE and REPLACE flags are set.
657 * 2. If EXCL is set, requestor wanted to say,
658 * that qdisc tcm_handle is not expected
659 * to exist, so that we choose create/graft too.
661 * 3. The last case is when no flags are set.
662 * Alas, it is sort of hole in API, we
663 * cannot decide what to do unambiguously.
664 * For now we select create/graft, if
665 * user gave KIND, which does not match existing.
667 if ((n->nlmsg_flags&NLM_F_CREATE) &&
668 (n->nlmsg_flags&NLM_F_REPLACE) &&
669 ((n->nlmsg_flags&NLM_F_EXCL) ||
671 rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))))
676 if (!tcm->tcm_handle)
678 q = qdisc_lookup(dev, tcm->tcm_handle);
681 /* Change qdisc parameters */
684 if (n->nlmsg_flags&NLM_F_EXCL)
686 if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
688 err = qdisc_change(q, tca);
690 qdisc_notify(skb, n, clid, NULL, q);
694 if (!(n->nlmsg_flags&NLM_F_CREATE))
696 if (clid == TC_H_INGRESS)
697 q = qdisc_create(dev, tcm->tcm_parent, tca, &err);
699 q = qdisc_create(dev, tcm->tcm_handle, tca, &err);
705 struct Qdisc *old_q = NULL;
706 err = qdisc_graft(dev, p, clid, q, &old_q);
709 spin_lock_bh(&dev->queue_lock);
711 spin_unlock_bh(&dev->queue_lock);
715 qdisc_notify(skb, n, clid, old_q, q);
717 spin_lock_bh(&dev->queue_lock);
718 qdisc_destroy(old_q);
719 spin_unlock_bh(&dev->queue_lock);
725 int qdisc_copy_stats(struct sk_buff *skb, struct tc_stats *st)
727 spin_lock_bh(st->lock);
728 RTA_PUT(skb, TCA_STATS, (char*)&st->lock - (char*)st, st);
729 spin_unlock_bh(st->lock);
733 spin_unlock_bh(st->lock);
738 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
739 u32 pid, u32 seq, unsigned flags, int event)
742 struct nlmsghdr *nlh;
743 unsigned char *b = skb->tail;
745 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*tcm));
746 nlh->nlmsg_flags = flags;
747 tcm = NLMSG_DATA(nlh);
748 tcm->tcm_family = AF_UNSPEC;
749 tcm->tcm_ifindex = q->dev ? q->dev->ifindex : 0;
750 tcm->tcm_parent = clid;
751 tcm->tcm_handle = q->handle;
752 tcm->tcm_info = atomic_read(&q->refcnt);
753 RTA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id);
754 if (q->ops->dump && q->ops->dump(q, skb) < 0)
756 q->stats.qlen = q->q.qlen;
757 if (qdisc_copy_stats(skb, &q->stats))
759 nlh->nlmsg_len = skb->tail - b;
764 skb_trim(skb, b - skb->data);
768 static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
769 u32 clid, struct Qdisc *old, struct Qdisc *new)
772 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
774 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
778 if (old && old->handle) {
779 if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0)
783 if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
788 return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
795 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
799 struct net_device *dev;
803 s_q_idx = q_idx = cb->args[1];
804 read_lock(&dev_base_lock);
805 for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
810 read_lock(&qdisc_tree_lock);
812 list_for_each_entry(q, &dev->qdisc_list, list) {
813 if (q_idx < s_q_idx) {
817 if (tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
818 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) {
819 read_unlock(&qdisc_tree_lock);
824 read_unlock(&qdisc_tree_lock);
828 read_unlock(&dev_base_lock);
838 /************************************************
839 * Traffic classes manipulation. *
840 ************************************************/
844 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
846 struct tcmsg *tcm = NLMSG_DATA(n);
847 struct rtattr **tca = arg;
848 struct net_device *dev;
849 struct Qdisc *q = NULL;
850 struct Qdisc_class_ops *cops;
851 unsigned long cl = 0;
852 unsigned long new_cl;
853 u32 pid = tcm->tcm_parent;
854 u32 clid = tcm->tcm_handle;
855 u32 qid = TC_H_MAJ(clid);
858 if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
862 parent == TC_H_UNSPEC - unspecified parent.
863 parent == TC_H_ROOT - class is root, which has no parent.
864 parent == X:0 - parent is root class.
865 parent == X:Y - parent is a node in hierarchy.
866 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
868 handle == 0:0 - generate handle from kernel pool.
869 handle == 0:Y - class is X:Y, where X:0 is qdisc.
870 handle == X:Y - clear.
871 handle == X:0 - root class.
874 /* Step 1. Determine qdisc handle X:0 */
876 if (pid != TC_H_ROOT) {
877 u32 qid1 = TC_H_MAJ(pid);
880 /* If both majors are known, they must be identical. */
886 qid = dev->qdisc_sleeping->handle;
888 /* Now qid is genuine qdisc handle consistent
889 both with parent and child.
891 TC_H_MAJ(pid) still may be unspecified, complete it now.
894 pid = TC_H_MAKE(qid, pid);
897 qid = dev->qdisc_sleeping->handle;
900 /* OK. Locate qdisc */
901 if ((q = qdisc_lookup(dev, qid)) == NULL)
904 /* An check that it supports classes */
905 cops = q->ops->cl_ops;
909 /* Now try to get class */
911 if (pid == TC_H_ROOT)
914 clid = TC_H_MAKE(qid, clid);
917 cl = cops->get(q, clid);
921 if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE))
924 switch (n->nlmsg_type) {
927 if (n->nlmsg_flags&NLM_F_EXCL)
931 err = cops->delete(q, cl);
933 tclass_notify(skb, n, q, cl, RTM_DELTCLASS);
936 err = tclass_notify(skb, n, q, cl, RTM_NEWTCLASS);
945 err = cops->change(q, clid, pid, tca, &new_cl);
947 tclass_notify(skb, n, q, new_cl, RTM_NEWTCLASS);
957 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
959 u32 pid, u32 seq, unsigned flags, int event)
962 struct nlmsghdr *nlh;
963 unsigned char *b = skb->tail;
965 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*tcm));
966 nlh->nlmsg_flags = flags;
967 tcm = NLMSG_DATA(nlh);
968 tcm->tcm_family = AF_UNSPEC;
969 tcm->tcm_ifindex = q->dev ? q->dev->ifindex : 0;
970 tcm->tcm_parent = q->handle;
971 tcm->tcm_handle = q->handle;
973 RTA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id);
974 if (q->ops->cl_ops->dump && q->ops->cl_ops->dump(q, cl, skb, tcm) < 0)
976 nlh->nlmsg_len = skb->tail - b;
981 skb_trim(skb, b - skb->data);
985 static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
986 struct Qdisc *q, unsigned long cl, int event)
989 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
991 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
995 if (tc_fill_tclass(skb, q, cl, pid, n->nlmsg_seq, 0, event) < 0) {
1000 return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
1003 struct qdisc_dump_args
1005 struct qdisc_walker w;
1006 struct sk_buff *skb;
1007 struct netlink_callback *cb;
1010 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
1012 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1014 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).pid,
1015 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
1018 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1022 struct net_device *dev;
1024 struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
1025 struct qdisc_dump_args arg;
1027 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
1029 if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL)
1035 read_lock(&qdisc_tree_lock);
1036 list_for_each_entry(q, &dev->qdisc_list, list) {
1037 if (t < s_t || !q->ops->cl_ops ||
1039 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1044 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1045 arg.w.fn = qdisc_class_dump;
1049 arg.w.skip = cb->args[1];
1051 q->ops->cl_ops->walk(q, &arg.w);
1052 cb->args[1] = arg.w.count;
1057 read_unlock(&qdisc_tree_lock);
1065 int psched_us_per_tick = 1;
1066 int psched_tick_per_us = 1;
1068 #ifdef CONFIG_PROC_FS
1069 static int psched_read_proc(char *buffer, char **start, off_t offset,
1070 int length, int *eof, void *data)
1074 len = sprintf(buffer, "%08x %08x %08x %08x\n",
1075 psched_tick_per_us, psched_us_per_tick,
1085 *start = buffer + offset;
1092 #if PSCHED_CLOCK_SOURCE == PSCHED_GETTIMEOFDAY
1093 int psched_tod_diff(int delta_sec, int bound)
1097 if (bound <= 1000000 || delta_sec > (0x7FFFFFFF/1000000)-1)
1099 delta = delta_sec * 1000000;
1106 psched_time_t psched_time_base;
1108 #if PSCHED_CLOCK_SOURCE == PSCHED_CPU
1109 psched_tdiff_t psched_clock_per_hz;
1110 int psched_clock_scale;
1113 #ifdef PSCHED_WATCHER
1114 PSCHED_WATCHER psched_time_mark;
1116 static void psched_tick(unsigned long);
1118 static struct timer_list psched_timer =
1119 { function: psched_tick };
1121 static void psched_tick(unsigned long dummy)
1123 #if PSCHED_CLOCK_SOURCE == PSCHED_CPU
1124 psched_time_t dummy_stamp;
1125 PSCHED_GET_TIME(dummy_stamp);
1126 /* It is OK up to 4GHz cpu */
1127 psched_timer.expires = jiffies + 1*HZ;
1129 unsigned long now = jiffies;
1130 psched_time_base += ((u64)(now-psched_time_mark))<<PSCHED_JSCALE;
1131 psched_time_mark = now;
1132 psched_timer.expires = now + 60*60*HZ;
1134 add_timer(&psched_timer);
1138 #if PSCHED_CLOCK_SOURCE == PSCHED_CPU
1139 int __init psched_calibrate_clock(void)
1141 psched_time_t stamp, stamp1;
1142 struct timeval tv, tv1;
1143 psched_tdiff_t delay;
1147 #ifdef PSCHED_WATCHER
1150 stop = jiffies + HZ/10;
1151 PSCHED_GET_TIME(stamp);
1152 do_gettimeofday(&tv);
1153 while (time_before(jiffies, stop)) {
1157 PSCHED_GET_TIME(stamp1);
1158 do_gettimeofday(&tv1);
1160 delay = PSCHED_TDIFF(stamp1, stamp);
1161 rdelay = tv1.tv_usec - tv.tv_usec;
1162 rdelay += (tv1.tv_sec - tv.tv_sec)*1000000;
1166 psched_tick_per_us = delay;
1167 while ((delay>>=1) != 0)
1168 psched_clock_scale++;
1169 psched_us_per_tick = 1<<psched_clock_scale;
1170 psched_clock_per_hz = (psched_tick_per_us*(1000000/HZ))>>psched_clock_scale;
1175 int __init pktsched_init(void)
1177 struct rtnetlink_link *link_p;
1179 #if PSCHED_CLOCK_SOURCE == PSCHED_CPU
1180 if (psched_calibrate_clock() < 0)
1182 #elif PSCHED_CLOCK_SOURCE == PSCHED_JIFFIES
1183 psched_tick_per_us = HZ<<PSCHED_JSCALE;
1184 psched_us_per_tick = 1000000;
1185 #ifdef PSCHED_WATCHER
1190 link_p = rtnetlink_links[PF_UNSPEC];
1192 /* Setup rtnetlink links. It is made here to avoid
1193 exporting large number of public symbols.
1197 link_p[RTM_NEWQDISC-RTM_BASE].doit = tc_modify_qdisc;
1198 link_p[RTM_DELQDISC-RTM_BASE].doit = tc_get_qdisc;
1199 link_p[RTM_GETQDISC-RTM_BASE].doit = tc_get_qdisc;
1200 link_p[RTM_GETQDISC-RTM_BASE].dumpit = tc_dump_qdisc;
1201 link_p[RTM_NEWTCLASS-RTM_BASE].doit = tc_ctl_tclass;
1202 link_p[RTM_DELTCLASS-RTM_BASE].doit = tc_ctl_tclass;
1203 link_p[RTM_GETTCLASS-RTM_BASE].doit = tc_ctl_tclass;
1204 link_p[RTM_GETTCLASS-RTM_BASE].dumpit = tc_dump_tclass;
1207 #define INIT_QDISC(name) { \
1208 extern struct Qdisc_ops name##_qdisc_ops; \
1209 register_qdisc(& name##_qdisc_ops); \
1215 #ifdef CONFIG_NET_SCH_CBQ
1218 #ifdef CONFIG_NET_SCH_HTB
1221 #ifdef CONFIG_NET_SCH_CSZ
1224 #ifdef CONFIG_NET_SCH_HPFQ
1227 #ifdef CONFIG_NET_SCH_HFSC
1230 #ifdef CONFIG_NET_SCH_RED
1233 #ifdef CONFIG_NET_SCH_GRED
1236 #ifdef CONFIG_NET_SCH_INGRESS
1237 INIT_QDISC(ingress);
1239 #ifdef CONFIG_NET_SCH_DSMARK
1242 #ifdef CONFIG_NET_SCH_SFQ
1245 #ifdef CONFIG_NET_SCH_TBF
1248 #ifdef CONFIG_NET_SCH_TEQL
1251 #ifdef CONFIG_NET_SCH_PRIO
1254 #ifdef CONFIG_NET_SCH_ATM
1257 #ifdef CONFIG_NET_CLS
1261 #ifdef CONFIG_PROC_FS
1262 create_proc_read_entry("net/psched", 0, 0, psched_read_proc, NULL);