Merge branch 'for-5.16' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup

author Linus Torvalds <torvalds@linux-foundation.org>

Tue, 2 Nov 2021 22:37:27 +0000 (15:37 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Tue, 2 Nov 2021 22:37:27 +0000 (15:37 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Tue, 2 Nov 2021 22:37:27 +0000 (15:37 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Tue, 2 Nov 2021 22:37:27 +0000 (15:37 -0700)
diff --combined Documentation/admin-guide/cgroup-v2.rst

index 81d37ac,6658598..2aeb7ae
--- 1/Documentation/admin-guide/cgroup-v2.rst
--- 2/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@@ -1016,8 -1016,6 +1016,8 @@@ All time durations are in microseconds
         - nr_periods
         - nr_throttled
         - throttled_usec
+ +      - nr_bursts
+ +      - burst_usec
   
     cpu.weight
         A read-write single value file which exists on non-root
@@@ -1049,12 -1047,6 +1049,12 @@@
         $PERIOD duration.  "max" for $MAX indicates no limit.  If only
         one number is written, $MAX is updated.
   
+ +  cpu.max.burst
+ +      A read-write single value file which exists on non-root
+ +      cgroups.  The default is "0".
+ +
+ +      The burst in the range [0, $MAX].
+ +
     cpu.pressure
         A read-write nested-keyed file.
   
@@@ -2318,6 -2310,16 +2318,16 @@@ Miscellaneous controller provides 3 int
           Limits can be set higher than the capacity value in the misc.capacity
           file.
   
+   misc.events
+       A read-only flat-keyed file which exists on non-root cgroups. The
+       following entries are defined. Unless specified otherwise, a value
+       change in this file generates a file modified event. All fields in
+       this file are hierarchical.
+ 
+         max
+               The number of times the cgroup's resource usage was
+               about to go over the max boundary.
+ 
   Migration and Ownership
   ~~~~~~~~~~~~~~~~~~~~~~~
   
diff --combined include/linux/bpf-cgroup.h

index 3536ab4,9aad4e3..11820a4
--- 1/include/linux/bpf-cgroup.h
--- 2/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@@ -157,26 -157,6 +157,6 @@@ struct cgroup_bpf 
   int cgroup_bpf_inherit(struct cgroup *cgrp);
   void cgroup_bpf_offline(struct cgroup *cgrp);
   
- int __cgroup_bpf_attach(struct cgroup *cgrp,
-                       struct bpf_prog *prog, struct bpf_prog *replace_prog,
-                       struct bpf_cgroup_link *link,
-                       enum bpf_attach_type type, u32 flags);
- int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
-                       struct bpf_cgroup_link *link,
-                       enum bpf_attach_type type);
- int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
-                      union bpf_attr __user *uattr);
- 
- /* Wrapper for __cgroup_bpf_*() protected by cgroup_mutex */
- int cgroup_bpf_attach(struct cgroup *cgrp,
-                     struct bpf_prog *prog, struct bpf_prog *replace_prog,
-                     struct bpf_cgroup_link *link, enum bpf_attach_type type,
-                     u32 flags);
- int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
-                     enum bpf_attach_type type);
- int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
-                    union bpf_attr __user *uattr);
- 
   int __cgroup_bpf_run_filter_skb(struct sock *sk,
                                 struct sk_buff *skb,
                                 enum cgroup_bpf_attach_type atype);
@@@ -517,7 -497,6 +497,7 @@@ static inline int bpf_percpu_cgroup_sto
   
   #define cgroup_bpf_enabled(atype) (0)
   #define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx) ({ 0; })
+ +#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, atype) ({ 0; })
   #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
   #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
   #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
diff --combined kernel/cgroup/cgroup.c

index ea08f01,c73f634..919194d
--- 1/kernel/cgroup/cgroup.c
--- 2/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@@ -1740,6 -1740,7 +1740,7 @@@ int rebind_subsystems(struct cgroup_roo
         struct cgroup *dcgrp = &dst_root->cgrp;
         struct cgroup_subsys *ss;
         int ssid, i, ret;
+       u16 dfl_disable_ss_mask = 0;
   
         lockdep_assert_held(&cgroup_mutex);
   
@@@ -1756,8 -1757,28 +1757,28 @@@
                 /* can't move between two non-dummy roots either */
                 if (ss->root != &cgrp_dfl_root && dst_root != &cgrp_dfl_root)
                         return -EBUSY;
+ 
+               /*
+                * Collect ssid's that need to be disabled from default
+                * hierarchy.
+                */
+               if (ss->root == &cgrp_dfl_root)
+                       dfl_disable_ss_mask |= 1 << ssid;
+ 
         } while_each_subsys_mask();
   
+       if (dfl_disable_ss_mask) {
+               struct cgroup *scgrp = &cgrp_dfl_root.cgrp;
+ 
+               /*
+                * Controllers from default hierarchy that need to be rebound
+                * are all disabled together in one go.
+                */
+               cgrp_dfl_root.subsys_mask &= ~dfl_disable_ss_mask;
+               WARN_ON(cgroup_apply_control(scgrp));
+               cgroup_finalize_control(scgrp, 0);
+       }
+ 
         do_each_subsys_mask(ss, ssid, ss_mask) {
                 struct cgroup_root *src_root = ss->root;
                 struct cgroup *scgrp = &src_root->cgrp;
@@@ -1766,10 -1787,12 +1787,12 @@@
   
                 WARN_ON(!css || cgroup_css(dcgrp, ss));
   
-               /* disable from the source */
-               src_root->subsys_mask &= ~(1 << ssid);
-               WARN_ON(cgroup_apply_control(scgrp));
-               cgroup_finalize_control(scgrp, 0);
+               if (src_root != &cgrp_dfl_root) {
+                       /* disable from the source */
+                       src_root->subsys_mask &= ~(1 << ssid);
+                       WARN_ON(cgroup_apply_control(scgrp));
+                       cgroup_finalize_control(scgrp, 0);
+               }
   
                 /* rebind */
                 RCU_INIT_POINTER(scgrp->subsys[ssid], NULL);
@@@ -2187,10 -2210,8 +2210,10 @@@ static void cgroup_kill_sb(struct super
          * And don't kill the default root.
          */
         if (list_empty(&root->cgrp.self.children) && root != &cgrp_dfl_root &&
- -          !percpu_ref_is_dying(&root->cgrp.self.refcnt))
+ +          !percpu_ref_is_dying(&root->cgrp.self.refcnt)) {
+ +              cgroup_bpf_offline(&root->cgrp);
                 percpu_ref_kill(&root->cgrp.self.refcnt);
+ +      }
         cgroup_put(&root->cgrp);
         kernfs_kill_sb(sb);
   }
@@@ -5911,17 -5932,20 +5934,20 @@@ struct cgroup *cgroup_get_from_id(u64 i
         struct kernfs_node *kn;
         struct cgroup *cgrp = NULL;
   
-       mutex_lock(&cgroup_mutex);
         kn = kernfs_find_and_get_node_by_id(cgrp_dfl_root.kf_root, id);
         if (!kn)
-               goto out_unlock;
+               goto out;
   
-       cgrp = kn->priv;
-       if (cgroup_is_dead(cgrp) || !cgroup_tryget(cgrp))
+       rcu_read_lock();
+ 
+       cgrp = rcu_dereference(*(void __rcu __force **)&kn->priv);
+       if (cgrp && !cgroup_tryget(cgrp))
                 cgrp = NULL;
+ 
+       rcu_read_unlock();
+ 
         kernfs_put(kn);
- out_unlock:
-       mutex_unlock(&cgroup_mutex);
+ out:
         return cgrp;
   }
   EXPORT_SYMBOL_GPL(cgroup_get_from_id);
@@@ -6474,30 -6498,34 +6500,34 @@@ struct cgroup_subsys_state *css_from_id
    *
    * Find the cgroup at @path on the default hierarchy, increment its
    * reference count and return it.  Returns pointer to the found cgroup on
-  * success, ERR_PTR(-ENOENT) if @path doesn't exist and ERR_PTR(-ENOTDIR)
-  * if @path points to a non-directory.
+  * success, ERR_PTR(-ENOENT) if @path doesn't exist or if the cgroup has already
+  * been released and ERR_PTR(-ENOTDIR) if @path points to a non-directory.
    */
   struct cgroup *cgroup_get_from_path(const char *path)
   {
         struct kernfs_node *kn;
-       struct cgroup *cgrp;
- 
-       mutex_lock(&cgroup_mutex);
+       struct cgroup *cgrp = ERR_PTR(-ENOENT);
   
         kn = kernfs_walk_and_get(cgrp_dfl_root.cgrp.kn, path);
-       if (kn) {
-               if (kernfs_type(kn) == KERNFS_DIR) {
-                       cgrp = kn->priv;
-                       cgroup_get_live(cgrp);
-               } else {
-                       cgrp = ERR_PTR(-ENOTDIR);
-               }
-               kernfs_put(kn);
-       } else {
-               cgrp = ERR_PTR(-ENOENT);
+       if (!kn)
+               goto out;
+ 
+       if (kernfs_type(kn) != KERNFS_DIR) {
+               cgrp = ERR_PTR(-ENOTDIR);
+               goto out_kernfs;
         }
   
-       mutex_unlock(&cgroup_mutex);
+       rcu_read_lock();
+ 
+       cgrp = rcu_dereference(*(void __rcu __force **)&kn->priv);
+       if (!cgrp || !cgroup_tryget(cgrp))
+               cgrp = ERR_PTR(-ENOENT);
+ 
+       rcu_read_unlock();
+ 
+ out_kernfs:
+       kernfs_put(kn);
+ out:
         return cgrp;
   }
   EXPORT_SYMBOL_GPL(cgroup_get_from_path);
@@@ -6574,95 -6602,80 +6604,57 @@@ int cgroup_parse_float(const char *inpu
    */
   #ifdef CONFIG_SOCK_CGROUP_DATA
   
- -#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID)
- -
- -DEFINE_SPINLOCK(cgroup_sk_update_lock);
- -static bool cgroup_sk_alloc_disabled __read_mostly;
- -
- -void cgroup_sk_alloc_disable(void)
- -{
- -      if (cgroup_sk_alloc_disabled)
- -              return;
- -      pr_info("cgroup: disabling cgroup2 socket matching due to net_prio or net_cls activation\n");
- -      cgroup_sk_alloc_disabled = true;
- -}
- -
- -#else
- -
- -#define cgroup_sk_alloc_disabled      false
- -
- -#endif
- -
   void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
   {
- -      if (cgroup_sk_alloc_disabled) {
- -              skcd->no_refcnt = 1;
- -              return;
- -      }
- -
- -      /* Don't associate the sock with unrelated interrupted task's cgroup. */
- -      if (in_interrupt())
- -              return;
+ +      struct cgroup *cgroup;
   
         rcu_read_lock();
+ +      /* Don't associate the sock with unrelated interrupted task's cgroup. */
+ +      if (in_interrupt()) {
+ +              cgroup = &cgrp_dfl_root.cgrp;
+ +              cgroup_get(cgroup);
+ +              goto out;
+ +      }
   
         while (true) {
                 struct css_set *cset;
   
                 cset = task_css_set(current);
                 if (likely(cgroup_tryget(cset->dfl_cgrp))) {
- -                      skcd->val = (unsigned long)cset->dfl_cgrp;
- -                      cgroup_bpf_get(cset->dfl_cgrp);
+ +                      cgroup = cset->dfl_cgrp;
                         break;
                 }
                 cpu_relax();
         }
- -
+ +out:
+ +      skcd->cgroup = cgroup;
+ +      cgroup_bpf_get(cgroup);
         rcu_read_unlock();
   }
   
   void cgroup_sk_clone(struct sock_cgroup_data *skcd)
   {
- -      if (skcd->val) {
- -              if (skcd->no_refcnt)
- -                      return;
- -              /*
- -               * We might be cloning a socket which is left in an empty
- -               * cgroup and the cgroup might have already been rmdir'd.
- -               * Don't use cgroup_get_live().
- -               */
- -              cgroup_get(sock_cgroup_ptr(skcd));
- -              cgroup_bpf_get(sock_cgroup_ptr(skcd));
- -      }
+ +      struct cgroup *cgrp = sock_cgroup_ptr(skcd);
+ +
+ +      /*
+ +       * We might be cloning a socket which is left in an empty
+ +       * cgroup and the cgroup might have already been rmdir'd.
+ +       * Don't use cgroup_get_live().
+ +       */
+ +      cgroup_get(cgrp);
+ +      cgroup_bpf_get(cgrp);
   }
   
   void cgroup_sk_free(struct sock_cgroup_data *skcd)
   {
         struct cgroup *cgrp = sock_cgroup_ptr(skcd);
   
- -      if (skcd->no_refcnt)
- -              return;
         cgroup_bpf_put(cgrp);
         cgroup_put(cgrp);
   }
   
   #endif        /* CONFIG_SOCK_CGROUP_DATA */
   
- #ifdef CONFIG_CGROUP_BPF
- int cgroup_bpf_attach(struct cgroup *cgrp,
-                     struct bpf_prog *prog, struct bpf_prog *replace_prog,
-                     struct bpf_cgroup_link *link,
-                     enum bpf_attach_type type,
-                     u32 flags)
- {
-       int ret;
- 
-       mutex_lock(&cgroup_mutex);
-       ret = __cgroup_bpf_attach(cgrp, prog, replace_prog, link, type, flags);
-       mutex_unlock(&cgroup_mutex);
-       return ret;
- }
- 
- int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
-                     enum bpf_attach_type type)
- {
-       int ret;
- 
-       mutex_lock(&cgroup_mutex);
-       ret = __cgroup_bpf_detach(cgrp, prog, NULL, type);
-       mutex_unlock(&cgroup_mutex);
-       return ret;
- }
- 
- int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
-                    union bpf_attr __user *uattr)
- {
-       int ret;
- 
-       mutex_lock(&cgroup_mutex);
-       ret = __cgroup_bpf_query(cgrp, attr, uattr);
-       mutex_unlock(&cgroup_mutex);
-       return ret;
- }
- #endif /* CONFIG_CGROUP_BPF */
- 
   #ifdef CONFIG_SYSFS
   static ssize_t show_delegatable_files(struct cftype *files, char *buf,
                                       ssize_t size, const char *prefix)
author	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 2 Nov 2021 22:37:27 +0000 (15:37 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 2 Nov 2021 22:37:27 +0000 (15:37 -0700)
		1	2
Documentation/admin-guide/cgroup-v2.rst	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/bpf-cgroup.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/cgroup/cgroup.c	patch \|	diff1 \|	diff2 \|	blob \| history